In [None]:
import numpy as np
import pandas as pd
from lifelines import CoxPHFitter
from lifelines.utils import concordance_index
from sklearn.model_selection import KFold
import matplotlib.pyplot as plt
from sksurv.metrics import brier_score

# 예제 데이터 로드 (임의의 데이터셋 사용)
from lifelines.datasets import load_rossi

data = load_rossi()

# 데이터 전처리
X = data.drop(columns=["week", "arrest"])
y = data[["week", "arrest"]]


In [None]:
# 10-fold 교차 검증 설정
kf = KFold(n_splits=10, shuffle=True, random_state=42)

# 각 모델의 C-index와 Brier Score를 저장할 리스트
coxph_cindices = []
brier_scores = []

# 교차 검증
for train_index, test_index in kf.split(X):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # 데이터 변환 (sksurv 포맷)
    y_train_structured = np.array(
        [
            (y_train["arrest"].iloc[i], y_train["week"].iloc[i])
            for i in range(len(y_train))
        ],
        dtype=[("event", bool), ("time", float)],
    )
    y_test_structured = np.array(
        [
            (y_test["arrest"].iloc[i], y_test["week"].iloc[i])
            for i in range(len(y_test))
        ],
        dtype=[("event", bool), ("time", float)],
    )

    # Cox Proportional Hazards Model
    coxph = CoxPHFitter()
    coxph.fit(
        pd.concat([X_train, y_train], axis=1), duration_col="week", event_col="arrest"
    )

    # 예측과 성능 평가 (C-index)
    coxph_pred = coxph.predict_partial_hazard(X_test)
    cindex = concordance_index(y_test["week"], -coxph_pred, y_test["arrest"])
    coxph_cindices.append(cindex)
    
    # Brier Score 계산
    times = np.arange(1, y_test["week"].max() + 1)
    surv_probs = coxph.predict_survival_function(X_test, times=times).T
    brier = brier_score(y_test_structured, surv_probs, times=times)
    brier_scores.append(brier)

# C-index 박스 플롯
plt.figure(figsize=(10, 5))
plt.boxplot(coxph_cindices, vert=True, patch_artist=True)
plt.title("CoxPH Model C-index (10-fold CV)")
plt.ylabel("Concordance Index")
plt.show()

# Brier Score 꺾은선 그래프
plt.figure(figsize=(10, 5))
mean_brier_scores = np.mean(brier_scores, axis=0)
plt.plot(times, mean_brier_scores, label="CoxPH Brier Score")
plt.title("Brier Score over Time (CoxPH)")
plt.xlabel("Time")
plt.ylabel("Brier Score")
plt.legend()
plt.show()

# 결과 출력
print(f"CoxPH C-indices: {coxph_cindices}")
print(f"Mean C-index: {np.mean(coxph_cindices):.4f}")

In [None]:
    # Cox Proportional Hazards Model
    coxph = CoxPHFitter()
    coxph.fit(
        pd.concat([X_train, y_train], axis=1), duration_col="week", event_col="arrest"
    )

    # 예측과 성능 평가 (C-index)
    coxph_pred = coxph.predict_partial_hazard(X_test)
    cindex = concordance_index(y_test["week"], -coxph_pred, y_test["arrest"])
    coxph_cindices.append(cindex)
    
    # Brier Score 계산
    times = np.arange(1, y_test["week"].max() + 1)
    surv_probs = coxph.predict_survival_function(X_test, times=times).T
    brier = brier_score(y_test_structured, surv_probs, times=times)
    brier_scores.append(brier)

# C-index 박스 플롯
plt.figure(figsize=(10, 5))
plt.boxplot(coxph_cindices, vert=True, patch_artist=True)
plt.title("CoxPH Model C-index (10-fold CV)")
plt.ylabel("Concordance Index")
plt.show()

# Brier Score 꺾은선 그래프
plt.figure(figsize=(10, 5))
mean_brier_scores = np.mean(brier_scores, axis=0)
plt.plot(times, mean_brier_scores, label="CoxPH Brier Score")
plt.title("Brier Score over Time (CoxPH)")
plt.xlabel("Time")
plt.ylabel("Brier Score")
plt.legend()
plt.show()

# 결과 출력
print(f"CoxPH C-indices: {coxph_cindices}")
print(f"Mean C-index: {np.mean(coxph_cindices):.4f}")