In [None]:
import numpy as np
import matplotlib.pyplot as plt
import joblib
from sklearn.metrics import auc
from sklearn.metrics import RocCurveDisplay
import lightgbm as lgb
import pandas as pd
import os
from numpy import asarray
from numpy import savetxt, loadtxt
import copy
from sklearn.metrics import roc_auc_score,roc_curve, auc

In [None]:
roi_names = ["aTV", "iTV_2mm", "sTV_2mm", "sTV_4mm", "eTV_2mm", "eTV_4mm"]
folds = [0,1,2,3,4]
ups_methods = ["NONUPSAMPLED", "ADASYN", "SMOTE", "SVMSMOTE"]
trained_model_folder = "path/to/trained/models"
model_file_names = ["lgbm.pkl", "subspaceKNN.pkl", "rf.pkl"]
selected_features_folder = "path/to/selected/features"
rocs_path = "path/to/save/rocs"

In [None]:
def read_model_and_data(trained_model_folder, model_file_name, selected_features_folder, roi_name, fold, ups_method):
    model_path = os.path.join(trained_model_folder, roi_name, f"FOLD{fold}", ups_method, model_file_name)
    model = joblib.load(model_path)
    
    folder_path = os.path.join(selected_features_folder,roi_name, f"FOLD{fold}", ups_method)
    file_names = os.listdir(folder_path)
    for file_name in file_names:
        if file_name.startswith("train"):
            train_df_path = os.path.join(folder_path, file_name)
            print("train_df path: ", train_df_path)
        if file_name.startswith("test"):
            test_df_path = os.path.join(folder_path, file_name)
            print("test_df path: ", test_df_path)
            
    train_data = pd.read_csv(train_df_path)
    test_data = pd.read_csv(test_df_path)
    y_train = train_data["label"]
    X_train = train_data.drop(["label"], axis=1)
    y_test = test_data["label"]
    X_test = test_data.drop(["label"], axis=1)
    
    return(model, X_train, X_test, y_train, y_test)

In [None]:
for roi_name in roi_names:
    for ups_method in ups_methods:
        
        for model_file_name in model_file_names:
            tprs = []
            aucs = []
            mean_fpr = np.linspace(0, 1, 100)
            fig, ax = plt.subplots(figsize=(8,6))
            
            for fold in folds:
                model, X_train, X_test, y_train, y_test = read_model_and_data(trained_model_folder, 
                                                                              model_file_name, 
                                                                              selected_features_folder, 
                                                                              roi_name, 
                                                                              fold, 
                                                                              ups_method)
                if model_file_name == "lgbm.pkl":
                    y_pred = model.predict(X_test)
                    data = asarray(y_pred)
                    y_pred_save_path = os.path.join(selected_features_folder, roi_name, f"FOLD{fold}", ups_method, "y_pred_lgbm.txt")
                elif model_file_name == "rf.pkl":
                    y_pred = model.predict_proba(X_test)[:,1]
                    data = asarray(y_pred)
                    y_pred_save_path = os.path.join(selected_features_folder, roi_name, f"FOLD{fold}", ups_method, "y_pred_rf.txt")
                elif model_file_name == "subspaceKNN.pkl":
                    y_pred = model.predict_proba(X_test)[:,1]
                    data = asarray(y_pred)
                    y_pred_save_path = os.path.join(selected_features_folder, roi_name, f"FOLD{fold}", ups_method, "y_pred_subspaceKNN.txt")
                os.makedirs(os.path.dirname(y_pred_save_path), exist_ok=True)
                savetxt(y_pred_save_path, data, delimiter=',')
                viz = RocCurveDisplay.from_predictions(
                    y_test,
                    y_pred,
                    name="ROC fold {}".format(fold),
                    alpha=0.3,
                    lw=1,
                    ax=ax,
                )
                interp_tpr = np.interp(mean_fpr, viz.fpr, viz.tpr)
                interp_tpr[0] = 0.0
                tprs.append(interp_tpr)
                aucs.append(viz.roc_auc)
            
            ax.plot([0, 1], [0, 1], linestyle="--", lw=2, color="r", label="Chance", alpha=0.8)

            mean_tpr = np.mean(tprs, axis=0)
            mean_tpr[-1] = 1.0
            mean_auc = auc(mean_fpr, mean_tpr)
            std_auc = np.std(aucs)
            if model_file_name == "subspaceKNN.pkl":
                label=  "KNN" + r" Mean ROC (AUC = %0.2f $\pm$ %0.2f)" % (mean_auc, std_auc)
            else:
                label=  model_file_name.split('.')[0].upper() + r" Mean ROC (AUC = %0.2f $\pm$ %0.2f)" % (mean_auc, std_auc)
            ax.plot(
                mean_fpr,
                mean_tpr,
                color="b",
                label = label,
                lw=2,
                alpha=0.8,
            )

            std_tpr = np.std(tprs, axis=0)
            tprs_upper = np.minimum(mean_tpr + std_tpr, 1)
            tprs_lower = np.maximum(mean_tpr - std_tpr, 0)
            ax.fill_between(
                mean_fpr,
                tprs_lower,
                tprs_upper,
                color="grey",
                alpha=0.2,
                label=r"$\pm$ 1 std. dev.",
            )

            ax.set(
                xlim=[-0.05, 1.05],
                ylim=[-0.05, 1.05],
                title="Receiver operating characteristic",
            )
            ax.legend(loc="lower right")
            fig_name = f"ROC_{model_file_name.split('.')[0]}.png"
            save_fig_path = os.path.join(rocs_path, roi_name, ups_method, fig_name)
            os.makedirs(os.path.dirname(save_fig_path), exist_ok=True)
            plt.savefig(save_fig_path, dpi=600)
            plt.show()
            print(roi_name, ups_method, model_file_name )
            print("-"*140)