In [None]:
import pandas as pd
import numpy as np
import pickle
import os
import matplotlib.pyplot as plt
import xgboost as xgb
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV, cross_validate, StratifiedKFold, RepeatedStratifiedKFold
from sklearn.metrics import accuracy_score, roc_auc_score, confusion_matrix, classification_report, RocCurveDisplay, roc_curve,auc, precision_recall_curve, average_precision_score, f1_score, auc
from skopt import BayesSearchCV
from skopt.space import Real, Categorical, Integer
from scipy.stats import gmean
import seaborn as sns
from imblearn.metrics import geometric_mean_score
import warnings
import json
warnings.filterwarnings("ignore")
sns.set_theme()

from tqdm import tqdm

In [2]:
for i in range(1,30+1):
    
    X_train = pd.read_csv(f'/camin1/chlee/jupyter/ML project/[24-12-13]/Data/X_train/X_train_{i}.csv')
    X_test = pd.read_csv(f'/camin1/chlee/jupyter/ML project/[24-12-13]/Data/X_test/X_test_{i}.csv')    
    y_train = pd.read_csv(f'/camin1/chlee/jupyter/ML project/[24-12-13]/Data/y_train/y_train_{i}.csv')
    y_test = pd.read_csv(f'/camin1/chlee/jupyter/ML project/[24-12-13]/Data/y_test/y_test_{i}.csv')

    X_train = X_train.drop(['FAMILY_HISTORY'], axis=1)
    X_test = X_test.drop(['FAMILY_HISTORY'], axis=1)    
    
    globals()[f'X_train_{i}'] = X_train.drop(X_train.columns[0], axis=1)
    globals()[f'X_test_{i}'] = X_test.drop(X_test.columns[0], axis=1)
    globals()[f'y_train_{i}'] = y_train.drop(y_train.columns[0], axis=1)
    globals()[f'y_test_{i}'] = y_test.drop(y_test.columns[0], axis=1)

In [3]:
import scipy

def mean_confidence_interval(data, confidence=0.95):
    a = 1.0 * np.array(data)
    n = len(a)
    m, se = np.mean(a), scipy.stats.sem(a)
    h = se * scipy.stats.t.ppf((1 + confidence) / 2., n-1)
    return m, h

In [6]:
# import model
for i in range(1,30+1):
    globals()[f'fit_LR_{i}'] = pickle.load(open(f"/camin1/chlee/jupyter/ML project/[24-12-13]/model pkl save/LR/fit_LR_{i}.pkl", 'rb'))
    globals()[f'fit_rf_{i}'] = pickle.load(open(f"/camin1/chlee/jupyter/ML project/[24-12-13]/model pkl save/RF/fit_rf_{i}.pkl", 'rb'))
    globals()[f'fit_catboost_{i}'] = pickle.load(open(f"/camin1/chlee/jupyter/ML project/[24-12-13]/model pkl save/Catboost/fit_catboost_{i}.pkl", 'rb'))
    globals()[f'fit_xgb_{i}'] = pickle.load(open(f"/camin1/chlee/jupyter/ML project/[24-12-13]/model pkl save/XGB/fit_xgb_{i}.pkl", 'rb'))

## Model performance comparison

#### LR

In [7]:
Accuracy_LR = []
AUROC_LR = []
AP_LR = []
Sensitivity_LR = []
Specificity_LR = []
Youden_LR = []
f1_LR = []
gmean_LR = []


LR_fpr_ = []
LR_tpr_ = []
LR_roc_auc_ = []

LR_precision_ = []
LR_recall_ = []
LR_ap_ = []
    
for i in range(1,30+1):

    exec(f'X_train = X_train_{i}')
    exec(f'X_test = X_test_{i}')
    
    exec(f'y_train = y_train_{i}')
    exec(f'y_test = y_test_{i}')

    exec(f'model = fit_LR_{i}')

    age_mean = np.mean(X_train['AGE'])
    age_std = np.std(X_train['AGE'])
    
    X_train['AGE'] = (X_train['AGE'] - age_mean) / age_std
    X_test['AGE'] = (X_test['AGE'] - age_mean) / age_std
    
    y_pred_probs = model.predict_proba(X_test)[:,1]
    y_pred = model.predict(X_test)
    
        
    tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
    sensitivity = tp / (tp + fn)
    specificity = tn / (tn + fp)
    f1score = f1_score(y_test, y_pred, average='binary')
    youden_index = sensitivity + specificity - 1
    gmean = geometric_mean_score(y_test, y_pred)

    Accuracy_LR.append(accuracy_score(y_test, y_pred))
    AUROC_LR.append(roc_auc_score(y_test, y_pred_probs))
    AP_LR.append(average_precision_score(y_test, y_pred_probs))
    Sensitivity_LR.append(sensitivity)
    Specificity_LR.append(specificity)
    Youden_LR.append(youden_index)
    f1_LR.append(f1score)
    gmean_LR.append(gmean)
    
    fpr, tpr, thresholds = roc_curve(y_test, y_pred_probs)
    roc_auc = roc_auc_score(y_test, y_pred_probs)

    LR_fpr_.append(fpr)
    LR_tpr_.append(tpr)
    LR_roc_auc_.append(roc_auc)

    precision, recall, thresholds = precision_recall_curve(y_test, y_pred_probs)
    average_precision = average_precision_score(y_test, y_pred_probs)

    LR_precision_.append(precision)
    LR_recall_.append(recall)
    LR_ap_.append(average_precision)

In [None]:
acc_m, acc_h = np.round(mean_confidence_interval(Accuracy_LR),3)
spe_m, spe_h = np.round(mean_confidence_interval(Specificity_LR),3)
sen_m, sen_h = np.round(mean_confidence_interval(Sensitivity_LR),3)
f1_m, f1_h = np.round(mean_confidence_interval(f1_LR),3)
auroc_m, auroc_h = np.round(mean_confidence_interval(AUROC_LR),3)
ap_m, ap_h = np.round(mean_confidence_interval(AP_LR),3)
gmean_m, gmean_h = np.round(mean_confidence_interval(gmean_LR),3)

# 신뢰구간
print(acc_m,'±', acc_h)
print(spe_m,'±', spe_h)
print(sen_m,'±', sen_h)
print(f1_m,'±', f1_h)
print(auroc_m,'±', auroc_h)
print(ap_m,'±', ap_h)
print(gmean_m,'±', gmean_h)

### Random Forest

In [9]:
Accuracy_RF = []
AUROC_RF = []
AP_RF = []
Sensitivity_RF = []
Specificity_RF = []
Youden_RF = []
f1_RF = []
gmean_RF = []

rf_fpr_ = []
rf_tpr_ = []
rf_roc_auc_ = []

rf_precision_ = []
rf_recall_ = []
rf_ap_ = []
    
for i in range(1,30+1):

    exec(f'X_train = X_train_{i}')
    exec(f'X_test = X_test_{i}')
    
    exec(f'y_train = y_train_{i}')
    exec(f'y_test = y_test_{i}')

    exec(f'model = fit_rf_{i}')
    
    age_mean = np.mean(X_train['AGE'])
    age_std = np.std(X_train['AGE'])
    
    X_train['AGE'] = (X_train['AGE'] - age_mean) / age_std
    X_test['AGE'] = (X_test['AGE'] - age_mean) / age_std
    
    y_pred_probs = model.predict_proba(X_test)[:,1]
    y_pred = model.predict(X_test)

        
    tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
    sensitivity = tp / (tp + fn)
    specificity = tn / (tn + fp)
    f1score = f1_score(y_test, y_pred, average='binary')
    youden_index = sensitivity + specificity - 1
    gmean = geometric_mean_score(y_test, y_pred)

    Accuracy_RF.append(accuracy_score(y_test, y_pred))
    AUROC_RF.append(roc_auc_score(y_test, y_pred_probs))
    AP_RF.append(average_precision_score(y_test, y_pred_probs))
    Sensitivity_RF.append(sensitivity)
    Specificity_RF.append(specificity)
    Youden_RF.append(youden_index)
    f1_RF.append(f1score)
    gmean_RF.append(gmean)

    # for auroc curve and precision call
    fpr, tpr, thresholds = roc_curve(y_test, y_pred_probs)
    roc_auc = roc_auc_score(y_test, y_pred_probs)

    rf_fpr_.append(fpr)
    rf_tpr_.append(tpr)
    rf_roc_auc_.append(roc_auc)
    
    precision, recall, thresholds = precision_recall_curve(y_test, y_pred_probs)
    average_precision = average_precision_score(y_test, y_pred_probs)

    rf_precision_.append(precision)
    rf_recall_.append(recall)
    rf_ap_.append(average_precision)

In [None]:
acc_m, acc_h = np.round(mean_confidence_interval(Accuracy_RF),3)
spe_m, spe_h = np.round(mean_confidence_interval(Specificity_RF),3)
sen_m, sen_h = np.round(mean_confidence_interval(Sensitivity_RF),3)
f1_m, f1_h = np.round(mean_confidence_interval(f1_RF),3)
auroc_m, auroc_h = np.round(mean_confidence_interval(AUROC_RF),3)
ap_m, ap_h = np.round(mean_confidence_interval(AP_RF),3)
gmean_m, gmean_h = np.round(mean_confidence_interval(gmean_RF),3)

# 신뢰구간
print(acc_m,'±', acc_h)
print(spe_m,'±', spe_h)
print(sen_m,'±', sen_h)
print(f1_m,'±', f1_h)
print(auroc_m,'±', auroc_h)
print(ap_m,'±', ap_h)
print(gmean_m,'±', gmean_h)

### XGBoost

In [None]:
Accuracy_XGB = []
AUROC_XGB = []
AP_XGB = []
Sensitivity_XGB = []
Specificity_XGB = []
Youden_XGB = []
f1_XGB = []
gmean_XGB = []
    
xgb_fpr_ = []
xgb_tpr_ = []
xgb_roc_auc_ = []
    
xgb_precision_ = []
xgb_recall_ = []
xgb_ap_ = []

for i in range(1,30+1):

    exec(f'X_train = X_train_{i}')
    exec(f'X_test = X_test_{i}')
    
    exec(f'y_train = y_train_{i}')
    exec(f'y_test = y_test_{i}')

    exec(f'model = fit_xgb_{i}')
    
    age_mean = np.mean(X_train['AGE'])
    age_std = np.std(X_train['AGE'])
    
    X_train['AGE'] = (X_train['AGE'] - age_mean) / age_std
    X_test['AGE'] = (X_test['AGE'] - age_mean) / age_std
    
    y_pred_probs = model.predict_proba(X_test)[:,1]
    y_pred = model.predict(X_test)

        
    tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
    sensitivity = tp / (tp + fn)
    specificity = tn / (tn + fp)
    f1score = f1_score(y_test, y_pred, average='binary')
    youden_index = sensitivity + specificity - 1
    gmean = geometric_mean_score(y_test, y_pred)


    Accuracy_XGB.append(accuracy_score(y_test, y_pred))
    AUROC_XGB.append(roc_auc_score(y_test, y_pred_probs))
    AP_XGB.append(average_precision_score(y_test, y_pred_probs))
    Sensitivity_XGB.append(sensitivity)
    Specificity_XGB.append(specificity)
    Youden_XGB.append(youden_index)
    f1_XGB.append(f1score)
    gmean_XGB.append(gmean)


    fpr, tpr, thresholds = roc_curve(y_test, y_pred_probs)
    roc_auc = roc_auc_score(y_test, y_pred_probs)

    xgb_fpr_.append(fpr)
    xgb_tpr_.append(tpr)
    xgb_roc_auc_.append(roc_auc)

    precision, recall, thresholds = precision_recall_curve(y_test, y_pred_probs)
    average_precision = average_precision_score(y_test, y_pred_probs)

    xgb_precision_.append(precision)
    xgb_recall_.append(recall)
    xgb_ap_.append(average_precision)

In [None]:
acc_m, acc_h = np.round(mean_confidence_interval(Accuracy_XGB),3)
spe_m, spe_h = np.round(mean_confidence_interval(Specificity_XGB),3)
sen_m, sen_h = np.round(mean_confidence_interval(Sensitivity_XGB),3)
f1_m, f1_h = np.round(mean_confidence_interval(f1_XGB),3)
auroc_m, auroc_h = np.round(mean_confidence_interval(AUROC_XGB),3)
ap_m, ap_h = np.round(mean_confidence_interval(AP_XGB),3)
gmean_m, gmean_h = np.round(mean_confidence_interval(gmean_XGB),3)

# 신뢰구간
print(acc_m,'±', acc_h)
print(spe_m,'±', spe_h)
print(sen_m,'±', sen_h)
print(f1_m,'±', f1_h)
print(auroc_m,'±', auroc_h)
print(ap_m,'±', ap_h)
print(gmean_m,'±', gmean_h)

### catboost

In [11]:
Accuracy_CATBOOST = []
AUROC_CATBOOST = []
AP_CATBOOST = []
Sensitivity_CATBOOST = []
Specificity_CATBOOST = []
Youden_CATBOOST = []
f1_CATBOOST = []
gmean_CATBOOST = []

catboost_fpr_ = []
catboost_tpr_ = []
catboost_roc_auc_ = []

catboost_precision_ = []
catboost_recall_ = []
catboost_ap_ = []

for i in range(1,30+1):

    exec(f'X_train = X_train_{i}')
    exec(f'X_test = X_test_{i}')

    exec(f'y_train = y_train_{i}')
    exec(f'y_test = y_test_{i}')

    exec(f'model = fit_catboost_{i}')

    age_mean = np.mean(X_train['AGE'])
    age_std = np.std(X_train['AGE'])

    X_train['AGE'] = (X_train['AGE'] - age_mean) / age_std
    X_test['AGE'] = (X_test['AGE'] - age_mean) / age_std

    y_pred_probs = model.predict_proba(X_test)[:,1]
    y_pred = model.predict(X_test)

    tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
    sensitivity = tp / (tp + fn)
    specificity = tn / (tn + fp)
    f1score = f1_score(y_test, y_pred, average='binary')
    youden_index = sensitivity + specificity - 1
    gmean = geometric_mean_score(y_test, y_pred)

    Accuracy_CATBOOST.append(accuracy_score(y_test, y_pred))
    AUROC_CATBOOST.append(roc_auc_score(y_test, y_pred_probs))
    AP_CATBOOST.append(average_precision_score(y_test, y_pred_probs))
    Sensitivity_CATBOOST.append(sensitivity)
    Specificity_CATBOOST.append(specificity)
    Youden_CATBOOST.append(youden_index)
    f1_CATBOOST.append(f1score)
    gmean_CATBOOST.append(gmean)

    fpr, tpr, thresholds = roc_curve(y_test, y_pred_probs)
    roc_auc = roc_auc_score(y_test, y_pred_probs)

    catboost_fpr_.append(fpr)
    catboost_tpr_.append(tpr)
    catboost_roc_auc_.append(roc_auc)

    precision, recall, thresholds = precision_recall_curve(y_test, y_pred_probs)
    average_precision = average_precision_score(y_test, y_pred_probs)

    catboost_precision_.append(precision)
    catboost_recall_.append(recall)
    catboost_ap_.append(average_precision)


In [None]:
acc_m, acc_h = np.round(mean_confidence_interval(Accuracy_CATBOOST),3)
spe_m, spe_h = np.round(mean_confidence_interval(Specificity_CATBOOST),3)
sen_m, sen_h = np.round(mean_confidence_interval(Sensitivity_CATBOOST),3)
f1_m, f1_h = np.round(mean_confidence_interval(f1_CATBOOST),3)
auroc_m, auroc_h = np.round(mean_confidence_interval(AUROC_CATBOOST),3)
ap_m, ap_h = np.round(mean_confidence_interval(AP_CATBOOST),3)
gmean_m, gmean_h = np.round(mean_confidence_interval(gmean_CATBOOST),3)

# 신뢰구간
print(acc_m,'±', acc_h)
print(spe_m,'±', spe_h)
print(sen_m,'±', sen_h)
print(f1_m,'±', f1_h)
print(auroc_m,'±', auroc_h)
print(ap_m,'±', ap_h)
print(gmean_m,'±', gmean_h)

### paired t-test 수행

In [None]:
from scipy import stats

### LR vs. RF ###

print("### LR vs. RF ###")
print("1. Accuracy")
stat, p_val = stats.ttest_rel(Accuracy_LR, Accuracy_RF)
print("statistic:", np.round(stat, 4), '  p-value:', np.round(p_val, 4))

print("2. AUROC")
stat, p_val = stats.ttest_rel(AUROC_LR, AUROC_RF)
print("statistic:", np.round(stat, 4), '  p-value:', np.round(p_val, 4))

print("3. AP")
stat, p_val = stats.ttest_rel(AP_LR, AP_RF)
print("statistic:", np.round(stat, 4), '  p-value:', np.round(p_val, 4))

print("4. Sensitivity")
stat, p_val = stats.ttest_rel(Sensitivity_LR, Sensitivity_RF)
print("statistic:", np.round(stat, 4), '  p-value:', np.round(p_val, 4))

print("5. Specificity")
stat, p_val = stats.ttest_rel(Specificity_LR, Specificity_RF)
print("statistic:", np.round(stat, 4), '  p-value:', np.round(p_val, 4))

print("6. f1")
stat, p_val = stats.ttest_rel(f1_LR, f1_RF)
print("statistic:", np.round(stat, 4), '  p-value:', np.round(p_val, 4))

print("7. g-mean")
stat, p_val = stats.ttest_rel(gmean_LR, gmean_RF)
print("statistic:", np.round(stat, 4), '  p-value:', np.round(p_val, 4))

In [None]:
### LR vs. CATBOOST ###

print("### LR vs. CATBOOST ###")
print("1. Accuracy")
stat, p_val = stats.ttest_rel(Accuracy_LR, Accuracy_CATBOOST)
print("statistic:", np.round(stat, 4), '  p-value:', np.round(p_val, 4))

print("2. AUROC")
stat, p_val = stats.ttest_rel(AUROC_LR, AUROC_CATBOOST)
print("statistic:", np.round(stat, 4), '  p-value:', np.round(p_val, 4))

print("3. AP")
stat, p_val = stats.ttest_rel(AP_LR, AP_CATBOOST)
print("statistic:", np.round(stat, 4), '  p-value:', np.round(p_val, 4))

print("4. Sensitivity")
stat, p_val = stats.ttest_rel(Sensitivity_LR, Sensitivity_CATBOOST)
print("statistic:", np.round(stat, 4), '  p-value:', np.round(p_val, 4))

print("5. Specificity")
stat, p_val = stats.ttest_rel(Specificity_LR, Specificity_CATBOOST)
print("statistic:", np.round(stat, 4), '  p-value:', np.round(p_val, 4))

print("6. f1")
stat, p_val = stats.ttest_rel(f1_LR, f1_CATBOOST)
print("statistic:", np.round(stat, 4), '  p-value:', np.round(p_val, 4))

print("7. g-mean")
stat, p_val = stats.ttest_rel(gmean_LR, gmean_CATBOOST)
print("statistic:", np.round(stat, 4), '  p-value:', np.round(p_val, 4))

In [None]:
print("### LR vs. XGB ###")
print("1. Accuracy")
stat, p_val = stats.ttest_rel(Accuracy_LR, Accuracy_XGB)
print("statistic:", np.round(stat, 4), '  p-value:', np.round(p_val, 4))

print("2. AUROC")
stat, p_val = stats.ttest_rel(AUROC_LR, AUROC_XGB)
print("statistic:", np.round(stat, 4), '  p-value:', np.round(p_val, 4))

print("3. AP")
stat, p_val = stats.ttest_rel(AP_LR, AP_XGB)
print("statistic:", np.round(stat, 4), '  p-value:', np.round(p_val, 4))

print("4. Sensitivity")
stat, p_val = stats.ttest_rel(Sensitivity_LR, Sensitivity_XGB)
print("statistic:", np.round(stat, 4), '  p-value:', np.round(p_val, 4))

print("5. Specificity")
stat, p_val = stats.ttest_rel(Specificity_LR, Specificity_XGB)
print("statistic:", np.round(stat, 4), '  p-value:', np.round(p_val, 4))

print("6. f1")
stat, p_val = stats.ttest_rel(f1_LR, f1_XGB)
print("statistic:", np.round(stat, 4), '  p-value:', np.round(p_val, 4))

print("7. g-mean")
stat, p_val = stats.ttest_rel(gmean_LR, gmean_XGB)
print("statistic:", np.round(stat, 4), '  p-value:', np.round(p_val, 4))

## mean ROC curve

In [18]:
def mean_auroc(model_name):
    
    fig, ax = plt.subplots(figsize=(6, 6))
    
    tprs = []
    aucs = []
    mean_fpr = np.linspace(0, 1, 100)

    for i in range(1,30+1):
        
        X_train = globals()[f'X_train_{i}']
        X_test = globals()[f'X_test_{i}']
        
        y_train = globals()[f'y_train_{i}']
        y_test = globals()[f'y_test_{i}']

        classifier = globals()[f'fit_{model_name}_{i}']

        age_mean = np.mean(X_train['AGE'])
        age_std = np.std(X_train['AGE'])
        
        X_train['AGE'] = (X_train['AGE'] - age_mean) / age_std
        X_test['AGE'] = (X_test['AGE'] - age_mean) / age_std
                        
        viz = RocCurveDisplay.from_estimator(
            classifier,
            X_test,
            y_test,
            ax=ax,)
        interp_tpr = np.interp(mean_fpr, viz.fpr, viz.tpr)
        interp_tpr[0] = 0.0
        tprs.append(interp_tpr)
        aucs.append(viz.roc_auc)

    mean_tpr = np.mean(tprs, axis=0)
    mean_tpr[-1] = 1.0
    mean_auc = auc(mean_fpr, mean_tpr)
    std_auc = np.std(aucs)
    
    ax.plot(0)

    plt.show()
    
    return mean_fpr, mean_tpr, mean_auc, std_auc

In [None]:
LR_mean_fpr, LR_mean_tpr, LR_mean_auc, LR_std_auc =  mean_auroc(model_name='LR')
RF_mean_fpr, RF_mean_tpr, RF_mean_auc, RF_std_auc =  mean_auroc(model_name='rf')
CATBOOST_mean_fpr, CATBOOST_mean_tpr, CATBOOST_mean_auc, CATBOOST_std_auc =  mean_auroc(model_name='catboost')
XGB_mean_fpr, XGB_mean_tpr, XGB_mean_auc, XGB_std_auc =  mean_auroc(model_name='xgb')

In [20]:
models = {'LR':{'mean_fpr': LR_mean_fpr, 'mean_tpr':LR_mean_tpr, 'mean_auc':LR_mean_auc, 'std_auc':LR_std_auc},
          'RF':{'mean_fpr': RF_mean_fpr, 'mean_tpr':RF_mean_tpr, 'mean_auc':RF_mean_auc, 'std_auc':RF_std_auc},
          'GBM':{'mean_fpr': XGB_mean_fpr, 'mean_tpr':XGB_mean_tpr, 'mean_auc':XGB_mean_auc, 'std_auc':XGB_std_auc},
          'Catboost':{'mean_fpr': CATBOOST_mean_fpr, 'mean_tpr':CATBOOST_mean_tpr, 'mean_auc':CATBOOST_mean_auc, 'std_auc':CATBOOST_std_auc}}

In [21]:
plt.figure(0)

for name, values in models.items():
    fpr = values['mean_fpr']
    tpr = values['mean_tpr']
    roc_auc = values['mean_auc']

    plt.plot(fpr, tpr, label='%s (AUC = %0.3f)'  % (name, roc_auc))
    plt.plot([0, 1], [0, 1], color='black', linestyle='--')

plt.title('ROC curves')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.legend(loc='lower right')


plt.savefig('/camin1/chlee/jupyter/ML project/[24-12-13]/figure/figure 1.tiff', format='tiff', dpi=300)  
plt.savefig('/camin1/chlee/jupyter/ML project/[24-12-13]/figure/figure 1.pdf', format='pdf', dpi=300)  
plt.close()  

In [None]:
plt.figure(0)

for name, values in models.items():
    fpr = values['mean_fpr']
    tpr = values['mean_tpr']
    roc_auc = values['mean_auc']

    plt.plot(fpr, tpr, label='%s (AUC = %0.3f)'  % (name, roc_auc))
    plt.plot([0, 1], [0, 1], color='black', linestyle='--')

plt.title('ROC curves')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.legend(loc='lower right')


# plt.savefig('/camin1/chlee/jupyter/ML project/[24-11-22]-3/figure/figure 1.tiff', format='tiff', dpi=300)  
# plt.savefig('/camin1/chlee/jupyter/ML project/[24-11-22]-3/figure/figure 1.pdf', format='pdf', dpi=300)  
# plt.close()  