# 00 · Environment Check ✅

> 轻量环境自检与一键补齐。该 Notebook **不会**修改你的核心依赖，仅在当前内核按需补齐小组件（pyyaml/openpyxl/pyarrow等）。

In [1]:

# %% [bootstrap deps]
import sys, subprocess, importlib

REQ = {
    "yaml": "pyyaml==6.0.2",
    "openpyxl": "openpyxl==3.1.5",
    "pyarrow": "pyarrow==16.1.0",
    "joblib": "joblib==1.4.2",
    "seaborn": "seaborn==0.13.2"
}

def ensure(import_name: str, pip_spec: str):
    try:
        importlib.import_module(import_name)
        print(f"[ok] {import_name} already available")
        return
    except Exception:
        pass
    print(f"[setup] installing {pip_spec} ...")
    r = subprocess.run(
        [sys.executable, "-m", "pip", "install", pip_spec, "-q", "--disable-pip-version-check", "--no-input"],
        capture_output=True, text=True
    )
    if r.returncode != 0:
        print("\n".join(r.stderr.splitlines()[-10:]))
        raise RuntimeError(f"pip install failed: {pip_spec}")
    importlib.invalidate_caches()
    importlib.import_module(import_name)
    print(f"[ok] {import_name} ready")

for imp, spec in REQ.items():
    ensure(imp, spec)
print("[done] dependency bootstrap complete.")


[ok] yaml already available
[ok] openpyxl already available
[ok] pyarrow already available
[ok] joblib already available
[setup] installing seaborn==0.13.2 ...
[ok] seaborn ready
[done] dependency bootstrap complete.


In [3]:
# ! pip install scikit-learn

Looking in indexes: https://pypi.tuna.tsinghua.edu.cn/simple
Collecting scikit-learn
  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/58/0e/8c2a03d518fb6bd0b6b0d4b114c63d5f1db01ff0f9925d8eb10960d01c01/scikit_learn-1.7.2-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (9.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.7/9.7 MB[0m [31m27.3 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting scipy>=1.8.0
  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/8e/6d/41991e503e51fc1134502694c5fa7a1671501a17ffa12716a4a9151af3df/scipy-1.15.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (37.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m37.7/37.7 MB[0m [31m28.8 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Collecting threadpoolctl>=3.1.0
  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/32/d5/f9a850d79b0851d1d4ef6456097579a9005b31fea68726a4ae5f2d82ddd9/threadpoolctl-3.6.0-py3-none-any.whl

In [4]:

# %% [version report]
import platform, sklearn, pandas as pd, numpy as np
import yaml
import matplotlib, seaborn

print("Python   :", platform.python_version())
print("OS       :", platform.platform())
print("NumPy    :", np.__version__)
print("Pandas   :", pd.__version__)
print("sklearn  :", sklearn.__version__)
print("Matplotlib:", matplotlib.__version__)
print("Seaborn  :", seaborn.__version__)
try:
    import shap
    print("shap     :", shap.__version__)
except Exception:
    print("shap     : (not installed)")


Python   : 3.10.12
OS       : Linux-3.10.0-957.el7.x86_64-x86_64-with-glibc2.35
NumPy    : 2.2.6
Pandas   : 2.3.3
sklearn  : 1.7.2
Matplotlib: 3.10.7
Seaborn  : 0.13.2
shap     : (not installed)


In [5]:

# %% [project check]
from pathlib import Path
PROJECT_ROOT = Path.cwd().resolve().parents[0] if Path.cwd().name == "notebooks" else Path.cwd()
CONF_PATH = PROJECT_ROOT / "conf" / "config.yaml"

print("[proj] root :", PROJECT_ROOT)
print("[proj] config:", CONF_PATH, "(exists=", CONF_PATH.exists(), ")")

cfg = {}
if CONF_PATH.exists():
    cfg = yaml.safe_load(CONF_PATH.read_text(encoding="utf-8"))
    print("[ok] config loaded.")
else:
    print("[warn] config.yaml not found.")

# check key paths
from pprint import pprint
print("\n[config preview]")
pprint(cfg)

raw_dir = PROJECT_ROOT / cfg.get("data", {}).get("raw_dir", "data_raw")
processed_dir = PROJECT_ROOT / cfg.get("data", {}).get("processed_dir", "data_processed")
print("\n[data dirs]")
print("- raw_dir     :", raw_dir, "exists=", raw_dir.exists())
print("- processed_dir:", processed_dir, "exists=", processed_dir.exists())

# expected outputs
out_dir = PROJECT_ROOT / "outputs"
print("\n[outputs]")
print("- outputs     :", out_dir, "exists=", out_dir.exists())
print("- models      :", (out_dir / "models"), "exists=", (out_dir / "models").exists())
print("- figures     :", (out_dir / "figures"), "exists=", (out_dir / "figures").exists())
print("- tables      :", (out_dir / "tables"), "exists=", (out_dir / "tables").exists())

print("\n[hint] 如首次运行：请先执行 Ingest/Preprocess/MI/Train 等脚本，再运行建模/可视化类 Notebook。")


[proj] root : /public/home/aojiang/海南医科大学/icu-lymphoma-ml-repro
[proj] config: /public/home/aojiang/海南医科大学/icu-lymphoma-ml-repro/conf/config.yaml (exists= True )
[ok] config loaded.

[config preview]
{'cohort': {'exclude': [{'stay_lt_24h': True}],
            'include': [{'adult_only': True},
                        {'first_icu_stay': True},
                        {'lymphoma': True}]},
 'data': {'id_col': None,
          'input_files': ['data.xlsx'],
          'input_format': 'excel',
          'outcome_col': 'mor_hospital',
          'positive_label': 1,
          'processed_dir': 'data_processed',
          'raw_dir': 'data_raw'},
 'evaluation': {'calibration': True,
                'calibration_method': ['none', 'isotonic'],
                'curves': {'calibration_curves': True, 'pr': True, 'roc': True},
                'metrics': ['roc_auc',
                            'pr_auc',
                            'f1',
                            'accuracy',
                            '