In [1]:
# 由于环境重置，需要重新加载数据并执行评估代码
import pandas as pd
import numpy as np
from sklearn.model_selection import StratifiedKFold, cross_val_score
from sklearn.pipeline import make_pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.ensemble import RandomForestClassifier

# 设置 n_splits 为 3，并重新运行模型评估代码
features_df = pd.read_csv("../result/features1.csv")
clinical_df = pd.read_csv("../testdata/dataset1/clinical1.csv")
labels_df = clinical_df[["PatientID", "deadstatus.event"]].dropna()

def evaluate_model(X, y, name="model", n_splits=3):
    skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)
    model = make_pipeline(StandardScaler(), RandomForestClassifier(n_estimators=100, random_state=42))
    f1 = cross_val_score(model, X, y, cv=skf, scoring="f1")
    auc = cross_val_score(model, X, y, cv=skf, scoring="roc_auc")
    print(f"{name}")
    print(f"  F1:  {f1.mean():.3f}  每折: {np.round(f1, 3)}")
    print(f"  AUC: {auc.mean():.3f}  每折: {np.round(auc, 3)}")
    print()

# A. 影像特征
df_img = features_df.merge(labels_df, left_on="patient_id", right_on="PatientID")
X_img = df_img.drop(columns=["patient_id", "PatientID", "deadstatus.event"])
y_img = df_img["deadstatus.event"]

evaluate_model(X_img, y_img, name="image_model", n_splits=3)

# B. 临床特征
clinical_vars = clinical_df.set_index("PatientID").drop(columns=["Survival.time", "deadstatus.event"])
df_clinical = labels_df.merge(clinical_vars, left_on="PatientID", right_index=True)

X_c = df_clinical.drop(columns=["PatientID", "deadstatus.event"])
y_c = df_clinical["deadstatus.event"]

num_cols = X_c.select_dtypes(include=["number"]).columns.tolist()
cat_cols = X_c.select_dtypes(include=["object", "category"]).columns.tolist()

num_pipeline = make_pipeline(SimpleImputer(strategy="median"), StandardScaler())
cat_pipeline = make_pipeline(SimpleImputer(strategy="most_frequent"), OneHotEncoder(handle_unknown="ignore"))

preprocessor = ColumnTransformer([("num", num_pipeline, num_cols), ("cat", cat_pipeline, cat_cols)])
model_clinical = make_pipeline(preprocessor, RandomForestClassifier(n_estimators=100, random_state=42))

skf = StratifiedKFold(n_splits=3, shuffle=True, random_state=42)
f1 = cross_val_score(model_clinical, X_c, y_c, cv=skf, scoring="f1")
auc = cross_val_score(model_clinical, X_c, y_c, cv=skf, scoring="roc_auc")

print(f"临床特征模型")
print(f"  F1:  {f1.mean():.3f}  每折: {np.round(f1, 3)}")
print(f"  AUC: {auc.mean():.3f}  每折: {np.round(auc, 3)}")
print()

# C. 联合模型（影像+临床）
df_merged = features_df.merge(clinical_df, left_on="patient_id", right_on="PatientID")
df_merged = df_merged[df_merged["deadstatus.event"].notna()]

X_all = df_merged.drop(columns=["patient_id", "PatientID", "Survival.time", "deadstatus.event"])
y_all = df_merged["deadstatus.event"]

num_cols = X_all.select_dtypes(include=["number"]).columns.tolist()
cat_cols = X_all.select_dtypes(include=["object", "category"]).columns.tolist()

preprocessor_all = ColumnTransformer([
    ("num", SimpleImputer(strategy="median"), num_cols),
    ("cat", make_pipeline(SimpleImputer(strategy="most_frequent"), OneHotEncoder(handle_unknown="ignore")), cat_cols)
    ])

model_all = make_pipeline(preprocessor_all, RandomForestClassifier(n_estimators=100, random_state=42))

f1 = cross_val_score(model_all, X_all, y_all, cv=skf, scoring="f1")
auc = cross_val_score(model_all, X_all, y_all, cv=skf, scoring="roc_auc")

print(f"联合模型（影像 + 临床）")
print(f"  F1:  {f1.mean():.3f}  每折: {np.round(f1, 3)}")
print(f"  AUC: {auc.mean():.3f}  每折: {np.round(auc, 3)}")

Traceback (most recent call last):
  File "/home/zixi/miniconda3/lib/python3.12/site-packages/sklearn/metrics/_scorer.py", line 140, in __call__
    score = scorer._score(
            ^^^^^^^^^^^^^^
  File "/home/zixi/miniconda3/lib/python3.12/site-packages/sklearn/metrics/_scorer.py", line 380, in _score
    y_pred = method_caller(
             ^^^^^^^^^^^^^^
  File "/home/zixi/miniconda3/lib/python3.12/site-packages/sklearn/metrics/_scorer.py", line 90, in _cached_call
    result, _ = _get_response_values(
                ^^^^^^^^^^^^^^^^^^^^^
  File "/home/zixi/miniconda3/lib/python3.12/site-packages/sklearn/utils/_response.py", line 217, in _get_response_values
    y_pred = _process_predict_proba(
             ^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/zixi/miniconda3/lib/python3.12/site-packages/sklearn/utils/_response.py", line 54, in _process_predict_proba
    raise ValueError(
ValueError: Got predict_proba of shape (1, 1), but need classifier with two classes.

Traceback (most recen

image_model
  F1:  1.000  每折: [1. 1. 1.]
  AUC: nan  每折: [nan nan nan]

临床特征模型
  F1:  0.920  每折: [0.907 0.915 0.939]
  AUC: 0.622  每折: [0.651 0.606 0.61 ]

联合模型（影像 + 临床）
  F1:  1.000  每折: [1. 1. 1.]
  AUC: nan  每折: [nan nan nan]


Traceback (most recent call last):
  File "/home/zixi/miniconda3/lib/python3.12/site-packages/sklearn/metrics/_scorer.py", line 140, in __call__
    score = scorer._score(
            ^^^^^^^^^^^^^^
  File "/home/zixi/miniconda3/lib/python3.12/site-packages/sklearn/metrics/_scorer.py", line 380, in _score
    y_pred = method_caller(
             ^^^^^^^^^^^^^^
  File "/home/zixi/miniconda3/lib/python3.12/site-packages/sklearn/metrics/_scorer.py", line 90, in _cached_call
    result, _ = _get_response_values(
                ^^^^^^^^^^^^^^^^^^^^^
  File "/home/zixi/miniconda3/lib/python3.12/site-packages/sklearn/utils/_response.py", line 217, in _get_response_values
    y_pred = _process_predict_proba(
             ^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/zixi/miniconda3/lib/python3.12/site-packages/sklearn/utils/_response.py", line 54, in _process_predict_proba
    raise ValueError(
ValueError: Got predict_proba of shape (1, 1), but need classifier with two classes.

Traceback (most recen