In [None]:
import os
import glob
import pickle
import numpy as np
import pandas as pd
import torch
from sklearn.metrics import (roc_auc_score, average_precision_score, roc_curve, confusion_matrix)
import ehr_utils,ehr_models,plot_utils
from typing import Tuple,Any, List


In [None]:
def evaluate_model(y_true: np.ndarray, y_prob: np.ndarray, model_name: str, xl_path: str = "metrics.xlsx", sheet_name: str = "Sheet1", threshold: float = 0.5) -> dict:
    y_true = np.asarray(y_true).astype(int)
    y_prob = np.asarray(y_prob)

    auroc = roc_auc_score(y_true, y_prob)
    auprc = average_precision_score(y_true, y_prob)

    y_pred = (y_prob >= threshold).astype(int)
    tp = int(((y_pred == 1) & (y_true == 1)).sum())
    tn = int(((y_pred == 0) & (y_true == 0)).sum())
    fp = int(((y_pred == 1) & (y_true == 0)).sum())
    fn = int(((y_pred == 0) & (y_true == 1)).sum())
    sens = tp / (tp + fn + 1e-8)
    spec = tn / (tn + fp + 1e-8)
    youden = sens + spec - 1
    res = {"Model": model_name, "AUROC": auroc, "AUPRC": auprc, "Sensitivity": sens, "Specificity": spec, "Youden": youden}
    df_new = pd.DataFrame([res])

    # 如果文件不存在，先创建一个空文件
    if not os.path.isfile(xl_path):
        with pd.ExcelWriter(xl_path, engine='openpyxl') as writer:
            df_new.to_excel(writer, sheet_name=sheet_name, index=False)
    else:
        # 追加/更新指定 sheet
        with pd.ExcelWriter(xl_path, mode='w', engine='openpyxl') as writer:
            book = writer.book
            if sheet_name in book.sheetnames:
                # 读取旧数据并追加
                df_old = pd.read_excel(xl_path, sheet_name=sheet_name)
                df_out = pd.concat([df_old, df_new], ignore_index=True)
            else:
                df_out = df_new
            df_out.to_excel(writer, sheet_name=sheet_name, index=False)
    return res


# ---------- 2. 批量评估 ----------
def batch_evaluate(base_models: List[Tuple[str, Any]],
                   meta_wrapper: ehr_models.PytorchModelWrapper,
                   X_test,
                   y_test,
                   meta_test: torch.Tensor,
                   output_path: str = "metrics.xlsx",
                   sheet: str = "Sheet1"):
    for name, m in base_models:
        print(name)
        if hasattr(m, "predict_proba"):
            prob = m.predict_proba(X_test)[:, 1]
        else:
            prob = m.predict(X_test)
        name = name.split('_')[0]
        print(name)
        evaluate_model(y_test, prob, name, output_path, sheet)

    stack_prob = meta_wrapper.predict_proba(meta_test)[:, 1]
    evaluate_model(y_test, stack_prob, "StackingMLP", output_path, sheet)

In [None]:
# ---------- 1. 数据字典 ----------
data_dict = {
    "Internal": "data_processed/benbu_baseline_cleaned_onehot.csv",
    "Shangjin": "data_processed/shangjin_baseline_cleaned_onehot.csv",
    "Tianfu": "data_processed/tianfu_baseline_cleaned_onehot.csv",
    "Wuhou": "data_processed/wuhou_baseline_cleaned_onehot.csv",
}
# ---------- 2. 加载基模型 ----------
base_models = ehr_utils.load_all_pkls('./models/')

# ---------- 3. 设备 & 二级模型 ----------
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
meta_model = ehr_models.StackingMLP(len(base_models)).to(device)
meta_model.load_state_dict(torch.load('checkpoints/stacking_best.pt', map_location=device))
mlp_wrapper = ehr_models.PytorchModelWrapper(meta_model, device)

[INFO] 共加载 10 个模型：['AdaBoost_cv_0.5042', 'DecisionTree_cv_0.4930', 'GaussianNB_cv_0.4886', 'GradientBoosting_cv_0.5097', 'LightGBM_cv_0.4985', 'LinearDiscriminantAnalysis_cv_0.4882', 'LogisticRegression_cv_0.4908', 'MLPClassifier_cv_0.5016', 'RandomForest_cv_0.4996', 'XGBoost_cv_0.4972']

>>> 正在评估 Internal ...
总特征数：96，需要log1p变换的特征数：66
训练集样本数：8000，验证集样本数：2000
[INFO] 直接读取缓存: cache\meta_internal.pt
AdaBoost_cv_0.5042
AdaBoost
DecisionTree_cv_0.4930
DecisionTree
GaussianNB_cv_0.4886
GaussianNB
GradientBoosting_cv_0.5097
GradientBoosting
LightGBM_cv_0.4985
LightGBM
LinearDiscriminantAnalysis_cv_0.4882
LinearDiscriminantAnalysis
LogisticRegression_cv_0.4908
LogisticRegression
MLPClassifier_cv_0.5016
MLPClassifier
RandomForest_cv_0.4996
RandomForest
XGBoost_cv_0.4972
XGBoost

>>> 正在评估 Shangjin ...
总特征数：96，需要log1p变换的特征数：66
缺失值统计：
Series([], dtype: int64)
变换后缺失值统计：
Series([], dtype: int64)
[INFO] 直接读取缓存: cache\meta_shangjin.pt
AdaBoost_cv_0.5042
AdaBoost
DecisionTree_cv_0.4930
DecisionTree
Gaus

In [None]:
# ---------- 4. 一键全部评估 ----------
os.makedirs("result", exist_ok=True)
metrics_output_path = "result/all_metrics.xlsx"
cache_dir = "cache"
for sheet_name, csv_path in data_dict.items():
    print(f"\n>>> 正在评估 {sheet_name} ...")
    if sheet_name == "Internal":
        X_train, X_test, y_train, y_test = ehr_utils.preprocess_ehr_train_test_data(csv_path)
        X, y = X_test, y_test
    else:
        X, y = ehr_utils.load_external(csv_path)

    meta_prob = ehr_utils.generate_meta_features(base_models, X, cache_name=f"meta_{sheet_name.lower()}", cache_dir=cache_dir, use_cache=True)
    batch_evaluate(base_models=base_models, meta_wrapper=mlp_wrapper, X_test=X, y_test=y, meta_test=meta_prob.cpu().numpy(), output_path=metrics_output_path, sheet=sheet_name)

print("\n[INFO] 全部评估完成！文件 ->", metrics_output_path)

In [1]:
import pandas as pd 
data =pd.read_csv('data_processed/benbu_baseline_cleaned_onehot.csv')
data.head()

Unnamed: 0,ABSI,AKP,ALB_to_GLO_ratio,ALT,AST,AST_to_ALT_ratio,Age,Albumin,BLR,BMI,...,Stone_history_Unknown,marriage_simplified_Unknown,marriage_simplified_married,marriage_simplified_unmarried,nation_Han People,nation_Minority Ethnic Groups,nation_Unknown,occupation_Manual Laborer,occupation_No Manual Laborer,occupation_Unknown
0,0.89,146,1.92,15,17,1.13,57,48,0.01,24.03,...,0,0,0,1,1,0,0,1,0,0
1,0.58,140,1.58,21,21,1.0,47,41,0.02,27.01,...,0,1,0,0,1,0,0,0,1,0
2,0.67,104,2.25,30,38,1.27,60,54,0.06,27.94,...,1,0,1,0,0,0,1,0,0,1
3,0.56,118,1.7,22,22,1.0,73,51,0.03,33.66,...,1,1,0,0,0,1,0,1,0,0
4,0.68,97,2.0,36,40,1.11,26,56,0.02,35.09,...,0,0,0,1,1,0,0,0,0,1


In [5]:
a=list(data.columns)
print(len(a))
for i in a:
    print(i)

97
ABSI
AKP
ALB_to_GLO_ratio
ALT
AST
AST_to_ALT_ratio
Age
Albumin
BLR
BMI
BRI
Basophil_count
Basophil_percentage
CK
Cholesterol
Creatinine
Cystatin.C
DBIL
Diastolic_BP
ELR
Eosinophil_count
Eosinophil_percentage
Fasting_blood_glucose
GGT
Gender
Globulin
HB
HCT
HDL
Height
Hips
IBIL
LDH
LDL
Lymphocytes_count
Lymphocytes_percentage
MCH
MCHC
MCV
MLR
Monocytes_count
Monocytes_percentage
NLR
Neutrophil_count
Neutrophil_percentage
PLR
RBC
RDW.CV
RDW.SD
Serum_Urea
Serum_Uric_acid
Systolic_BP
TBIL
Total_protein
Triglycerides
Uric_PH
Uric_RBC
Uric_WBC
Uric_bacteria
Uric_conductivity
Uric_epithelium
Uric_specific_gravity
WBC
Waist_to_hip_ratio
Waistline
Weight
a_HBDH
eGFR
platelets_count
stone
Alcohol_Drinker
Alcohol_Never
Alcohol_Unknown
BPH_BPH
BPH_None
BPH_Unknown
Diabete_Diabete
Diabete_None
Diabete_Unknown
Hypertension_Hypertension
Hypertension_None Hypertension
Hypertension_Unknown
Smoke_Never
Smoke_Smoker
Smoke_Unknown
Stone_history_Former Stone Carrier
Stone_history_None
Stone_history_Unkn