### Author: Ally Sprik
### Last-updated: 25-02-2024

Goal of this notebook is to evaluate the models on a wide variety of imputed data, to compare imputation methods.

In [None]:
import pyAgrum as gum
import pyAgrum.lib.notebook as gnb
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns


origineel_net = gum.loadBN("../0.3. Original_Casper_files/Results/Casper_fitted_952.net")
woPlat_woRest = gum.loadBN("../3. Model/Fitted_Networks/R_WOP_original.net")
WOPlat_MRI = gum.loadBN("../3. Model/Fitted_Networks/R_WOP_AddMRIMI_fitted.net")
WOPlat_TCGA = gum.loadBN("../3. Model/Fitted_Networks/R_WOP_AddTCGA_1_fitted.net")
WOPlat_TCGA_MRI = gum.loadBN("../3. Model/Fitted_Networks/R_WOP_AddMRIMITCGA_fitted.net")
wPlat_woRest = gum.loadBN("../3. Model/Fitted_Networks/R_WP_Original.net")
WPlat_all = gum.loadBN("../3. Model/Fitted_Networks/R_WP_all_train.net")


Select the data to use 

In [None]:
#data = pd.read_csv("../0.2. Imputed_data/Combined_P53_imputed_Peter.csv", sep=";")
#data = pd.read_csv("../0.2. Imputed_data/TrainingTCGA_EM_imp.csv", sep=",")
#data = pd.read_csv("../0.2. Imputed_data/Peter_imputeddata.csv", sep=";")
#data = pd.read_csv("../0.2. Imputed_data/Peter-imputeddata-originalset.csv", sep=";")
#data = pd.read_csv("../0.3. Original_Casper_files/Results/Casper_imputed.csv")
#data = pd.read_csv("../0.2. Imputed_data/MIDAS_Imputed_TCGATraining_filled5yr.csv", sep=";")
#data = pd.read_csv("../0.2. Imputed_data/TrainingTCGA_JAMA_imputed.csv")
#casper_data = pd.read_csv("../0.3. Original_Casper_files/Results/Casper_imputed.csv")
#data = pd.read_csv("../0.2. Imputed_data/MIDAS_Imputed_TCGATraining_JAMA.csv")
data = pd.read_csv("../0.2. Imputed_data/Training_JAMA_EM_imputed.csv")

# select the data that has yes in Included_in_training_cohort column
#data = data[data["Included_in_training_cohort"] == "yes"]

data.reset_index(inplace=True, drop=True)
data

Clinical ESGO model generation

In [None]:
# Clinical model generation
# Create a function to get the results based on a threshold for the probability
# Simulate clinical decision making through 
# Construct a column ESGOwoTCGA, ESGOwTCGA based on classification chart 
# Low risk ESGOwoTCGA: FIGO Stage IA with grade 1/2, no LVSI
# Low risk ESGOwTCGA: FIGO Stage I-II with POLE mutation, no LVSI; 
#                               or Stage IA with grade 1/2, no LVSI, MSI or NSMP
# Intermediate risk ESGOwoTCGA: FIGO Stage IB with grade 1/2, no LVSI; 
#                               or Stage IA with grade 3, no LVSI; 
#                               or Stage IA with grade 1/2, LVSI;
#                               or Stage IA non-endometrioid. no MRI_MI
# Intermediate risk ESGOwTCGA: FIGO Stage IB with grade 1/2, no LVSI, MSI or NSMP;
#                               or Stage IA with grade 3, no LVSI, MSI or NSMP;
#                               or Stage IA with p53 abn and or non-endometrioid, LVSI, MSI or NSMP, no MRI_MI;
# High-intermediate risk ESGOwoTCGA: FIGO Stage I with LVSI;
#                               or Stage IB with grade 3, regardless of LVSI;
#                               or Stage II;
# High-intermediate risk ESGOwTCGA: FIGO Stage I with LVSI, MSI or NSMP;
#                               or Stage IB with grade 3, regardless of LVSI, MSI or NSMP;
#                               or Stage II with MSI or NSMP;
# High risk ESGOwoTCGA: FIGO Stage III-IV;
#                               or Stage I-IVA non-endometrioid, with MRI_MI;
# High risk ESGOwTCGA: FIGO Stage III-IV, with MSI or NSMP;
#                               or Stage I-IVA non-endometrioid, with MRI_MI, p53abn;
#                               or Stage I-IVA non-endometrioid, with MRI_MI;
# Advanced metastatic risk ESGOwoTCGA: FIGO Stage III-IVA with residual disease;
#                               or Stage IVB;
# Advanced metastatic risk ESGOwTCGA: FIGO Stage III-IVA with residual disease;
#                               or Stage IVB any molecular profile;

data["ESGOwoTCGA"] = np.nan
data["ESGOwTCGA"] = np.nan

not_included_woTCGA = pd.DataFrame(columns=data.columns)
not_included_wTCGA = pd.DataFrame(columns=data.columns)


for i in range(len(data)):
    FIGO = data["FIGO"][i]
    Grade = data["PostoperativeGrade"][i]
    LVSI = data["LVSI"][i]
    MRI_MI = data["MRI_MI"][i]
    p53_PREOP = data["p53"][i]
    Histology = data["Histology"][i]
    
    if data["MSI"][i] == "yes":
        MolClass = "MSI"
    elif data["POLE"][i] == "yes":
        MolClass = "POLE"
    elif data["MSI"][i] == "no" or data["POLE"][i] == "no":
        MolClass = "NSMP"
    else:
        MolClass = np.nan
    
    
          # woTCGA
    if FIGO == "IA" and (Grade == "grade 1" or Grade == "grade 2") and LVSI == "no" and Histology == "endometrioid":
        data["ESGOwoTCGA"][i] = "Low"
    elif FIGO == "IB" and (Grade == "grade 1" or Grade == "grade 2") and LVSI == "no" and Histology == "endometrioid":
        data["ESGOwoTCGA"][i] = "Intermediate"
    elif FIGO == "IA" and (Grade == "grade 3") and LVSI == "no" and Histology == "endometrioid":
        data["ESGOwoTCGA"][i] = "Intermediate"
    elif FIGO == "IA" and (Histology == "non-endometrioid") and MRI_MI == "lt_50":
        data["ESGOwoTCGA"][i] = "Intermediate"
    elif (FIGO == "IA" or FIGO == "IB") and Histology == "endometrioid" and LVSI == "yes":
        data["ESGOwoTCGA"][i] = "High-intermediate"
    elif FIGO == "IB" and (Grade == "grade 3") and Histology == "endometrioid":
        data["ESGOwoTCGA"][i] = "High-intermediate"
    elif FIGO == "II":
        data["ESGOwoTCGA"][i] = "High-intermediate"
    elif FIGO == "IIIA" or FIGO == "IIIB" or FIGO == "IIIC" or FIGO == "IVA":
        data["ESGOwoTCGA"][i] = "High"
    elif FIGO != "IVB" and (Histology == "non-endometrioid") and MRI_MI == "ge_50":
        data["ESGOwoTCGA"][i] = "High"
    elif FIGO == "IVB":
        data["ESGOwoTCGA"][i] = "Advanced metastatic"
    else:
        data["ESGOwoTCGA"][i] = np.nan
        not_included_woTCGA = not_included_woTCGA._append(data.iloc[i:i+1])
        
    
    if (FIGO == "IA" or FIGO == "IB" or FIGO == "II") and (MolClass == "POLE") and Histology == "endometrioid":
        data["ESGOwTCGA"][i] = "Low"
    elif FIGO == "IA" and Histology == "endometrioid" and LVSI == "no" and (MolClass == "MSI" or MolClass == "NSMP"):
        data["ESGOwTCGA"][i] = "Low"
    elif FIGO == "IB" and (Grade == "grade 1" or Grade == "grade 2") and LVSI == "no" and (MolClass == "MSI" or MolClass == "NSMP") and Histology == "endometrioid":
        data["ESGOwTCGA"][i] = "Intermediate"
    elif FIGO == "IA" and (Grade == "grade 3") and LVSI == "no" and Histology == "endometrioid" and (MolClass == "MSI" or MolClass == "NSMP"):
        data["ESGOwTCGA"][i] = "Intermediate"
    elif FIGO == "IA" and (Histology == "non-endometrioid" or p53_PREOP == "mutant") and MRI_MI == "lt_50":
        data["ESGOwTCGA"][i] = "Intermediate"
    elif (FIGO == "IA" or FIGO == "IB") and LVSI == "yes" and (MolClass == "MSI" or MolClass == "NSMP") and Histology == "endometrioid":
        data["ESGOwTCGA"][i] = "High-intermediate"
    elif FIGO == "IB" and (Grade == "grade 3") and (MolClass == "MSI" or MolClass == "NSMP") and Histology == "endometrioid":
        data["ESGOwTCGA"][i] = "High-intermediate"
    elif FIGO == "II" and (MolClass == "MSI" or MolClass == "NSMP") and Histology == "endometrioid":
        data["ESGOwTCGA"][i] = "High-intermediate"    
    elif (FIGO == "IIIA" or FIGO == "IIIB" or FIGO=="IIIC" or FIGO == "IVA") and (MolClass == "NSMP" or MolClass == "MSI") and Histology == "endometrioid":
        data["ESGOwTCGA"][i] = "High"
    elif FIGO != "IVB" and p53_PREOP == "mutant" and MRI_MI == "ge_50":
        data["ESGOwTCGA"][i] = "High"
    elif FIGO != "IVB" and (Histology == "non-endometrioid") and MRI_MI == "ge_50" and (MolClass == "NSMP" or MolClass == "MSI"):
        data["ESGOwTCGA"][i] = "High"
    elif FIGO == "IVB":
        data["ESGOwTCGA"][i] = "Advanced metastatic"
    else:
        data["ESGOwTCGA"][i] = np.nan
        not_included_wTCGA = not_included_wTCGA._append(data.iloc[i:i+1])

Insert probabilities from literature for LNM

In [None]:
# ESGOwoTCGA_LND, ESGOwTCGA_LND
# High-intermediate, High, and Advanced metastatic risk groups are considered for LND in the ESGO classification

data["ESGOwoTCGA_LND"] = np.nan
data["ESGOwTCGA_LND"] = np.nan

for i in range(len(data)):
    ESGOwoTCGA = data["ESGOwoTCGA"][i]
    ESGOwTCGA = data["ESGOwTCGA"][i]
    
    # Fill in per category
    if ESGOwoTCGA == "Low":
        data["ESGOwoTCGA_LND"][i] = 0.01
    elif ESGOwoTCGA == "Intermediate":
        data["ESGOwoTCGA_LND"][i] = 0.05
    elif ESGOwoTCGA == "High-intermediate":
        data["ESGOwoTCGA_LND"][i] = 0.11
    elif ESGOwoTCGA == "High":
        data["ESGOwoTCGA_LND"][i] = 0.20
    elif ESGOwoTCGA == "Advanced metastatic":
        data["ESGOwoTCGA_LND"][i] = 0.20
    elif pd.isna(ESGOwoTCGA):
        data["ESGOwoTCGA_LND"][i] = np.nan
    else:
        data["ESGOwoTCGA_LND"][i] = 0
        
    if ESGOwTCGA == "Low":
        data["ESGOwTCGA_LND"][i] = 0.01
    elif ESGOwTCGA == "Intermediate":
        data["ESGOwTCGA_LND"][i] = 0.05
    elif ESGOwTCGA == "High-intermediate":
        data["ESGOwTCGA_LND"][i] = 0.11
    elif ESGOwTCGA == "High":
        data["ESGOwTCGA_LND"][i] = 0.20
    elif ESGOwTCGA == "Advanced metastatic":
        data["ESGOwTCGA_LND"][i] = 0.20
    elif pd.isna(ESGOwTCGA):
        data["ESGOwTCGA_LND"][i] = np.nan
    else:
        data["ESGOwTCGALND"][i] = 0

Get the targets for the clinical models

In [None]:
# Get the target for both clinical models for LNM
# drop the rows that have nan in the ESGOwoTCGA_LND column and in the LNM column
ESGOwoTCGA = data["ESGOwoTCGA_LND"].copy()
ESGOwoTCGA.dropna(inplace=True)
indices = ESGOwoTCGA.index
ESGOwoTCGA.reset_index(inplace=True, drop=True)

ESGOwoTCGA_LNMTruth = data["LNM"].copy()
ESGOwoTCGA_LNMTruth = ESGOwoTCGA_LNMTruth.iloc[indices]
ESGOwoTCGA_LNMTruth.replace({"yes":1, "no":0}, inplace=True)
ESGOwoTCGA_LNMTruth.reset_index(inplace=True, drop=True)

# drop the rows that have nan in the ESGOwoTCGA_LND column and in the LNM column
ESGOwTCGA = data["ESGOwTCGA_LND"].copy()
ESGOwTCGA.dropna(inplace=True)
indices = ESGOwTCGA.index
ESGOwTCGA.reset_index(inplace=True, drop=True)

ESGOwTCGA_LNMTruth = data["LNM"].copy()
ESGOwTCGA_LNMTruth = ESGOwTCGA_LNMTruth.iloc[indices]
ESGOwTCGA_LNMTruth.replace({"yes":1, "no":0}, inplace=True)
ESGOwTCGA_LNMTruth.reset_index(inplace=True, drop=True)

# put in dataframes
ESGOwoTCGA = pd.DataFrame(ESGOwoTCGA)
ESGOwoTCGA_LNMTruth = pd.DataFrame(ESGOwoTCGA_LNMTruth)

ESGOwTCGA = pd.DataFrame(ESGOwTCGA)
ESGOwTCGA_LNMTruth = pd.DataFrame(ESGOwTCGA_LNMTruth)

Define the targets and change the labels

In [None]:
targets = data[["LNM", "Survival5yr"]].copy()
#targets = data[["LNM", "X5YR"]].copy()
targets.replace({"yes":1, "no":0}, inplace=True)
targets.replace({'negative':0, 'positive':1}, inplace=True)



Define evidence set

In [None]:
evidence = data.copy().drop(columns=["LNM", "Survival5yr"])


Create specific datasets for networks evaluation

In [None]:
# Create specific datasets
data_or = evidence.copy().drop(columns=['POLE', 'MSI', 'Platelets', 'MRI_MI', "FIGO", "Therapy"])
data_or["Recurrence"] = data["Recurrence_location"]
data_noMRI_NoTCGA_noPlat = evidence.copy().drop(columns=['POLE', 'MSI', 'Platelets', 'MRI_MI'])
data_noMRI_NoPlat = evidence.copy().drop(columns=['Platelets', 'MRI_MI'])
data_NoPlat = evidence.copy().drop(columns=['Platelets'])
data_noMRI = evidence.copy().drop(columns=['MRI_MI'])
data_noMRI_NoTCGA = evidence.copy().drop(columns=['POLE', 'MSI', 'MRI_MI'])
data_noTCGA_NoPlat = evidence.copy().drop(columns=['POLE', 'MSI', 'Platelets'])

Define a function to loop through the evidence and get the results

In [None]:
def getProbabilities(model,evidence, Surv = "Survival5yr", samples = 100):
    ls_result_LNM = []
    ls_result_Surv = []
    
    #for i in range(1, samples):
    resultsLNM = []
    resultsSurvival = []
    net = gum.LazyPropagation(model)
    net.getNumberOfThreads()
    
    for j in range(len(evidence)):
        evidencerow = evidence.iloc[j]
        evidencerow = evidencerow.dropna().to_dict()
        
        try:
            net.setEvidence(evidencerow)
            
            net.makeInference()

            resultLNM = net.posterior("LNM")
            resultSurvival = net.posterior(Surv)
        except Exception as error:
            print("Error at row", j)
            print(error)
            
            resultLNM = 0
            resultSurvival = 0

        #result = gum.getPosterior(model, evs = evidencerow, target = "LNM")
        #resultLNM, resultSurvival = bn.inference.fit(model, variables=["LNM", Surv], evidence=evidencerow, verbose=1)
        resultsLNM.append(resultLNM)
        
        #result = gum.getPosterior(model, evs = evidencerow, target = Surv)
        resultsSurvival.append(resultSurvival)

        #ls_result_LNM.append(resultsLNM)
        #ls_result_Surv.append(resultsSurvival)

    return resultsLNM, resultsSurvival

Get the results the networks

In [None]:
# Get the results for the original network
print("Started 1")
# Select the data to use in the model, through nodenames
names = origineel_net.names()
names.remove("LNM")
names.remove("Survival5yr")
names = list(names)
ev_temp = evidence.copy()[names]
# Define Recurrence_location as Recurrence since this is how it is defined in this model
ev_temp["Recurrence"] = data["Recurrence_location"]

org_LNM_res, org_Surv_res = getProbabilities(origineel_net, ev_temp)

print("Started 2")
# Select the data and get the results
names = woPlat_woRest.names()
names.remove("LNM")
names.remove("Survival5yr")
names = list(names)
ev_temp = evidence.copy()[names]
noPlat_noRest_LNM, noPlat_noRest_Surv = getProbabilities(woPlat_woRest, ev_temp)

print("Started 3")
# Select the data and get the results
names = WOPlat_MRI.names()
names.remove("LNM")
names.remove("Survival5yr")
names = list(names)
ev_temp = evidence.copy()[names]
noPlat_MRI_LNM, noPlat_MRI_Surv = getProbabilities(WOPlat_MRI, ev_temp)

print("Started 4")
# Select the data and get the results
names = WOPlat_TCGA.names()
names.remove("LNM")
names.remove("Survival5yr")
names = list(names)
ev_temp = evidence.copy()[names]
noPlat_TCGA_LNM, noPlat_TCGA_Surv = getProbabilities(WOPlat_TCGA, ev_temp)

print("Started 5")
# Select the data and get the results
names = WOPlat_TCGA_MRI.names()
names.remove("LNM")
names.remove("Survival5yr")
names = list(names)
ev_temp = evidence.copy()[names]
noPlat_TCGA_MRI_LNM, noPlat_TCGA_MRI_Surv = getProbabilities(WOPlat_TCGA_MRI, ev_temp)

print("Started 6")
# Select the data and get the results
names = wPlat_woRest.names()
names.remove("LNM")
names.remove("Survival5yr")
names = list(names)
ev_temp = evidence.copy()[names]
Plat_noRest_LNM, Plat_noRest_Surv = getProbabilities(wPlat_woRest, ev_temp)

print("Started 7")
# Select the data and get the results
names = WPlat_all.names()
names.remove("LNM")
names.remove("Survival5yr")
names = list(names)
ev_temp = evidence.copy()[names]
Plat_all_LNM, Plat_all_Surv = getProbabilities(WPlat_all, ev_temp)


Define the functions to unpack the results

In [None]:
# Define a function to get the results based on a threshold for the probability
def getResultsLNM(results, threshold, target):  
    res = []
    
    for i in range(len(results)):
        if results[i].argmax()[0][0][target] == 1 and results[i].argmax()[1] > threshold:
            res.append(1)
        else:
            res.append(0)
    return pd.DataFrame(res)

def getResultsSurv(results, threshold, target, Surv_tar = 1):
    res = []
    
    for i in range(len(results)):
        if results[i].argmax()[0][0][target] == Surv_tar and results[i].argmax()[1] > threshold:
            res.append(1)
        else:
            res.append(0)
    return pd.DataFrame(res)

def getProbResults(results, target):
    res = []
    
    for i in range(len(results)):
        try:
            res.append(results[i][target])
        except Exception as error:
            print("Error at row", i)
            print(error)
            res.append(0)
    
    return pd.DataFrame(res)

Unpack the probabilities

In [None]:
# results
org_LNM_res_prob = getProbResults(org_LNM_res, 1)
org_Surv_res_prob = getProbResults(org_Surv_res, 1)

noPlat_noRest_LNM_prob = getProbResults(noPlat_noRest_LNM, 1)
noPlat_noRest_Surv_prob = getProbResults(noPlat_noRest_Surv, 1)

noPlat_MRI_LNM_prob = getProbResults(noPlat_MRI_LNM, 1)
noPlat_MRI_Surv_prob = getProbResults(noPlat_MRI_Surv, 1)

noPlat_TCGA_LNM_prob = getProbResults(noPlat_TCGA_LNM, 1)
noPlat_TCGA_Surv_prob = getProbResults(noPlat_TCGA_Surv, 1)

noPlat_TCGA_MRI_LNM_prob = getProbResults(noPlat_TCGA_MRI_LNM, 1)
noPlat_TCGA_MRI_Surv_prob = getProbResults(noPlat_TCGA_MRI_Surv, 1)

Plat_noRest_LNM_prob = getProbResults(Plat_noRest_LNM, 1)
Plat_noRest_Surv_prob = getProbResults(Plat_noRest_Surv, 1)

Plat_all_LNM_prob = getProbResults(Plat_all_LNM, 1)
Plat_all_Surv_prob = getProbResults(Plat_all_Surv, 1)


Define the metric functions

In [None]:
from sklearn.metrics import roc_curve
# Find the accuracy, roc auc, precision and recall for the results and the targets data
from sklearn.metrics import accuracy_score, roc_auc_score, precision_score, recall_score, confusion_matrix, log_loss, brier_score_loss

def getMetrics(results, targets):
    TP = confusion_matrix(targets, results)[1][1]
    TN = confusion_matrix(targets, results)[0][0]
    FP = confusion_matrix(targets, results)[0][1]
    FN = confusion_matrix(targets, results)[1][0]
    
    accuracy = accuracy_score(targets, results)
    roc_auc = roc_auc_score(targets, results)
    precision = precision_score(targets, results)
    TPR = recall_score(targets, results)
    TNR = TN / (TN + FP)
    
    f1 = 2 * (precision * TPR) / (precision + TPR)
    brier = brier_score_loss(targets, results)
    loglike = log_loss(targets, results)
    
    x = pd.DataFrame([accuracy, roc_auc, precision, TPR,TNR, f1, brier, loglike], index=["Accuracy", "ROC AUC", "Precision (PPV)", "TPR (Recall/Sens)","TNR (Spec)", "F1", "Brier", "Log Loss"])
    return x

def getSlimMetrics(results, targets):
    
    if not len(results) == len(targets):
        raise Exception('Results and targets are not the same length. Results:', len(results), ' Targets:', len(targets))
    
    # Get ROC AUC, Log Loss, Brier, and N Predicted/N Observed
    curve = roc_curve(targets, results, pos_label=1)
    ROC = round(roc_auc_score(targets, results), 4)
    LL = round(log_loss(targets, results), 4)
    Brier = round(brier_score_loss(targets, results), 4)
    N_pred = results.sum()[0]
    N_obs = int(targets.sum())
    N_br = f"{int(N_pred)}/{int(N_obs)}"
    Ratio = round(N_pred/N_obs, 4)
    
    x = pd.DataFrame([ROC, LL, Brier, N_br, Ratio], index=["ROC AUC", "Log Loss", "Brier", "N Predicted/N Observed", "Ratio"])
    return x


In [None]:
# Get slim metrics
# Get the metrics for the original network
targetSurv = "Survival5yr"

print("Started 1")
org_LNM_metrics = getSlimMetrics(org_LNM_res_prob, targets["LNM"])
org_Surv_metrics = getSlimMetrics(org_Surv_res_prob, targets[targetSurv])

print("Started 2")
woPlat_noRest_LNM_metrics = getSlimMetrics(noPlat_noRest_LNM_prob, targets["LNM"])
woPlat_noRest_Surv_metrics = getSlimMetrics(noPlat_noRest_Surv_prob, targets[targetSurv])

print("Started 3")
woPlat_MRI_LNM_metrics = getSlimMetrics(noPlat_MRI_LNM_prob, targets["LNM"])
woPlat_MRI_Surv_metrics = getSlimMetrics(noPlat_MRI_Surv_prob, targets[targetSurv])

print("Started 4")
woPlat_TCGA_LNM_metrics = getSlimMetrics(noPlat_TCGA_LNM_prob, targets["LNM"])
woPlat_TCGA_Surv_metrics = getSlimMetrics(noPlat_TCGA_Surv_prob, targets[targetSurv])

print("Started 5")
woPlat_TCGA_MRI_LNM_metrics = getSlimMetrics(noPlat_TCGA_MRI_LNM_prob, targets["LNM"])
woPlat_TCGA_MRI_Surv_metrics = getSlimMetrics(noPlat_TCGA_MRI_Surv_prob, targets[targetSurv])

print("Started 6")
Plat_noRest_LNM_metrics = getSlimMetrics(Plat_noRest_LNM_prob, targets["LNM"])
Plat_noRest_Surv_metrics = getSlimMetrics(Plat_noRest_Surv_prob, targets[targetSurv])

print("Started 7")
Plat_all_LNM_metrics = getSlimMetrics(Plat_all_LNM_prob, targets["LNM"])
Plat_all_Surv_metrics = getSlimMetrics(Plat_all_Surv_prob, targets[targetSurv])

print("Started 8")
# Get the metrics for the clinical model
ESGOwoTCGA_LNM_metrics = getSlimMetrics(ESGOwoTCGA, ESGOwoTCGA_LNMTruth)
ESGOwTCGA_LNM_metrics = getSlimMetrics(ESGOwTCGA, ESGOwTCGA_LNMTruth)


Concatenate the metrics, to create a table with all the metrics, for comparison of LNM

In [None]:
LNM_metrics = pd.concat([ESGOwoTCGA_LNM_metrics, ESGOwTCGA_LNM_metrics, org_LNM_metrics, 
                         woPlat_noRest_LNM_metrics, woPlat_MRI_LNM_metrics, woPlat_TCGA_LNM_metrics, woPlat_TCGA_MRI_LNM_metrics, Plat_noRest_LNM_metrics, Plat_all_LNM_metrics], axis=1)

LNM_metrics.columns = ["ESGOwoTCGA","ESGOwTCGA", "Origineel Netwerk",
                       "-Plat -TCGA -MRI", "-Plat -TCGA +MRI", "-Plat +TCGA -MRI", "-Plat +TCGA +MRI", "+Plat -TCGA -MRI", "+Plat +TCGA +MRI"]
LNM_metrics


Concatenate the metrics, to create a table with all the metrics, for comparison of Survival

In [None]:
# Concatenate the metrics to between the models Survival
Surv_metrics = pd.concat([org_Surv_metrics,
                          woPlat_noRest_Surv_metrics, woPlat_MRI_Surv_metrics, woPlat_TCGA_Surv_metrics, woPlat_TCGA_MRI_Surv_metrics, Plat_noRest_Surv_metrics, Plat_all_Surv_metrics], axis=1)
Surv_metrics = Surv_metrics.round(3)
Surv_metrics.columns = ["Origineel Netwerk", 
                        "-Plat -TCGA -MRI", "-Plat -TCGA +MRI", "-Plat +TCGA -MRI", "-Plat +TCGA +MRI", "+Plat -TCGA -MRI", "+Plat +TCGA +MRI"]
Surv_metrics

Plot the ROC curves for the LNM and Survival

In [None]:
from matplotlib_inline.backend_inline import set_matplotlib_formats
import matplotlib.pyplot as plt
import seaborn as sns

# Set the style
sns.set_style("whitegrid")
# Set the output as png
set_matplotlib_formats('png', 'pdf')
plt.rcParams['figure.dpi'] = 150

# Recall vs Precision
# LNM
# plot all the ROC curves in one plot (LNM and Survival seperate)
fig, ax = plt.subplots(1,2, figsize=(15,5))

fig.suptitle("ROC Curves", fontsize=16)

# LNM
ax[0].plot([0, 1], [0, 1], linestyle='--', lw=2, color='r', alpha=.8)

fpr, tpr, _ = roc_curve(targets["LNM"], org_LNM_res_prob, pos_label=1)
ax[0].plot(fpr, tpr, label='Origineel Netwerk (area = %0.2f)' % roc_auc_score(targets["LNM"], org_LNM_res_prob))

fpr, tpr, _ = roc_curve(targets["LNM"], noPlat_noRest_LNM_prob, pos_label=1)
ax[0].plot(fpr, tpr, label='-Plat -TCGA -MRI (area = %0.2f)' % roc_auc_score(targets["LNM"], noPlat_noRest_LNM_prob))

fpr, tpr, _ = roc_curve(targets["LNM"], noPlat_MRI_LNM_prob, pos_label=1)
ax[0].plot(fpr, tpr, label='-Plat -TCGA +MRI (area = %0.2f)' % roc_auc_score(targets["LNM"], noPlat_MRI_LNM_prob))

fpr, tpr, _ = roc_curve(targets["LNM"], noPlat_TCGA_LNM_prob, pos_label=1)
ax[0].plot(fpr, tpr, label='-Plat +TCGA -MRI (area = %0.2f)' % roc_auc_score(targets["LNM"], noPlat_TCGA_LNM_prob))

fpr, tpr, _ = roc_curve(targets["LNM"], noPlat_TCGA_MRI_LNM_prob, pos_label=1)
ax[0].plot(fpr, tpr, label='-Plat +TCGA +MRI (area = %0.2f)' % roc_auc_score(targets["LNM"], noPlat_TCGA_MRI_LNM_prob))

fpr, tpr, _ = roc_curve(targets["LNM"], Plat_noRest_LNM_prob, pos_label=1)
ax[0].plot(fpr, tpr, label='+Plat -TCGA -MRI (area = %0.2f)' % roc_auc_score(targets["LNM"], Plat_noRest_LNM_prob))

fpr, tpr, _ = roc_curve(targets["LNM"], Plat_all_LNM_prob, pos_label=1)
ax[0].plot(fpr, tpr, label='+Plat +TCGA +MRI (area = %0.2f)' % roc_auc_score(targets["LNM"], Plat_all_LNM_prob))

# Add ESGOwoTCGA and ESGOwTCGA
fpr, tpr, _ = roc_curve(ESGOwoTCGA_LNMTruth, ESGOwoTCGA, pos_label=1)
ax[0].plot(fpr, tpr, label='ESGOwoTCGA (area = %0.2f)' % roc_auc_score(ESGOwoTCGA_LNMTruth, ESGOwoTCGA))

fpr, tpr, _ = roc_curve(ESGOwTCGA_LNMTruth, ESGOwTCGA, pos_label=1)
ax[0].plot(fpr, tpr, label='ESGOwTCGA (area = %0.2f)' % roc_auc_score(ESGOwTCGA_LNMTruth, ESGOwTCGA))

ax[0].title.set_text("LNM")
ax[0].legend(loc="lower right")
# Print AUC in the plot
    
# Survival
ax[1].plot([0, 1], [0, 1], linestyle='--', lw=2, color='r', alpha=.8)

fpr, tpr, _ = roc_curve(targets[targetSurv], org_Surv_res_prob, pos_label=1)
ax[1].plot(fpr, tpr, label='Origineel Netwerk (area = %0.2f)' % roc_auc_score(targets[targetSurv], org_Surv_res_prob))

fpr, tpr, _ = roc_curve(targets[targetSurv], noPlat_noRest_Surv_prob, pos_label=1)
ax[1].plot(fpr, tpr, label='-Plat -TCGA -MRI (area = %0.2f)' % roc_auc_score(targets[targetSurv], noPlat_noRest_Surv_prob))

fpr, tpr, _ = roc_curve(targets[targetSurv], noPlat_MRI_Surv_prob, pos_label=1)
ax[1].plot(fpr, tpr, label='-Plat -TCGA +MRI (area = %0.2f)' % roc_auc_score(targets[targetSurv], noPlat_MRI_Surv_prob))

fpr, tpr, _ = roc_curve(targets[targetSurv], noPlat_TCGA_Surv_prob, pos_label=1)
ax[1].plot(fpr, tpr, label='-Plat +TCGA -MRI (area = %0.2f)' % roc_auc_score(targets[targetSurv], noPlat_TCGA_Surv_prob))

fpr, tpr, _ = roc_curve(targets[targetSurv], noPlat_TCGA_MRI_Surv_prob, pos_label=1)
ax[1].plot(fpr, tpr, label='-Plat +TCGA +MRI (area = %0.2f)' % roc_auc_score(targets[targetSurv], noPlat_TCGA_MRI_Surv_prob))

fpr, tpr, _ = roc_curve(targets[targetSurv], Plat_noRest_Surv_prob, pos_label=1)
ax[1].plot(fpr, tpr, label='+Plat -TCGA -MRI (area = %0.2f)' % roc_auc_score(targets[targetSurv], Plat_noRest_Surv_prob))

fpr, tpr, _ = roc_curve(targets[targetSurv], Plat_all_Surv_prob, pos_label=1)
ax[1].plot(fpr, tpr, label='+Plat +TCGA +MRI (area = %0.2f)' % roc_auc_score(targets[targetSurv], Plat_all_Surv_prob))

ax[1].title.set_text("Survival")
ax[1].legend(loc="lower right")

# x and y labels
for i in range(2):
    ax[i].set_xlabel('False Positive Rate')
    ax[i].set_ylabel('True Positive Rate')

Define a function to get the results based on a range of thresholds for the probability

In [None]:
def getRangeResults(probResults):
    # Define thresholds
    thresholds = [0.05, 0.10, 0.20, 0.30, 0.40, 0.50, 0.60 ,0.70, 0.80, 0.90]
    # Create empty list
    rows = len(probResults)

    cols = len(thresholds)
    
    res = [[0 for i in range(cols)] for j in range(rows)]
    
    for i in range(len(probResults)):
        targetcol = 0
        
        for col in probResults.columns:
            if isinstance(col, str) and col.__contains__("ESGO"):
                targetcol = col
            
        for j in range(len(thresholds)):
            if probResults[targetcol][i] >= thresholds[j]:
                res[i][j] = 1
            else:
                res[i][j] = 0
                
    df = pd.DataFrame(res, columns=thresholds)
    
    return df


    

Get the range of thresholds for the probabilities

In [None]:
org_LNM_res_range = getRangeResults(org_LNM_res_prob)
org_Surv_res_range = getRangeResults(org_Surv_res_prob)

noPlat_noRest_LNM_range = getRangeResults(noPlat_noRest_LNM_prob)
noPlat_noRest_Surv_range = getRangeResults(noPlat_noRest_Surv_prob)

noPlat_MRI_LNM_range = getRangeResults(noPlat_MRI_LNM_prob)
noPlat_MRI_Surv_range = getRangeResults(noPlat_MRI_Surv_prob)

noPlat_TCGA_LNM_range = getRangeResults(noPlat_TCGA_LNM_prob)
noPlat_TCGA_Surv_range = getRangeResults(noPlat_TCGA_Surv_prob)

noPlat_TCGA_MRI_LNM_range = getRangeResults(noPlat_TCGA_MRI_LNM_prob)
noPlat_TCGA_MRI_Surv_range = getRangeResults(noPlat_TCGA_MRI_Surv_prob)

Plat_noRest_LNM_range = getRangeResults(Plat_noRest_LNM_prob)
Plat_noRest_Surv_range = getRangeResults(Plat_noRest_Surv_prob)

Plat_all_LNM_range = getRangeResults(Plat_all_LNM_prob)
Plat_all_Surv_range = getRangeResults(Plat_all_Surv_prob)

ESGOwoTCGA_LNM_range = getRangeResults(ESGOwoTCGA)
ESGOwTCGA_LNM_range = getRangeResults(ESGOwTCGA)

Define a function to make a 3d matrix of metrics for each threshold

In [None]:
# Create a function to make a 3d matrix of metrics for each threshold
def getMetricsRange(results, targets):
    # Define thresholds
    thresholds = [0.05, 0.10, 0.20, 0.30, 0.40, 0.50, 0.60 ,0.70, 0.80, 0.90]
    # Create empty list
    metrics = []
    # Metricnames
    metricnames = ["Accuracy", "Precision (PPV)", "TPR (Recall/Sens)","TNR (Spec)", "F1", "Brier", "Log Loss"]
    
    for threshold in thresholds:
        res = results[threshold].values
        
        metrics.append(round(getMetrics(res, targets).loc[metricnames],3))
    
    metrics = pd.concat(metrics, axis=1)
    metrics.columns = thresholds
    return metrics

Get the metrics for each of the cutoff points

In [None]:
# Get the metrics for each of the cutoff points
# Get the metrics for the original network
org_LNM_metrics_range = getMetricsRange(org_LNM_res_range, targets["LNM"])
org_Surv_metrics_range = getMetricsRange(org_Surv_res_range, targets[targetSurv])

noPlat_noRest_LNM_metrics_range = getMetricsRange(noPlat_noRest_LNM_range, targets["LNM"])
noPlat_noRest_Surv_metrics_range = getMetricsRange(noPlat_noRest_Surv_range, targets[targetSurv])

noPlat_MRI_LNM_metrics_range = getMetricsRange(noPlat_MRI_LNM_range, targets["LNM"])
noPlat_MRI_Surv_metrics_range = getMetricsRange(noPlat_MRI_Surv_range, targets[targetSurv])

noPlat_TCGA_LNM_metrics_range = getMetricsRange(noPlat_TCGA_LNM_range, targets["LNM"])
noPlat_TCGA_Surv_metrics_range = getMetricsRange(noPlat_TCGA_Surv_range, targets[targetSurv])

noPlat_TCGA_MRI_LNM_metrics_range = getMetricsRange(noPlat_TCGA_MRI_LNM_range, targets["LNM"])
noPlat_TCGA_MRI_Surv_metrics_range = getMetricsRange(noPlat_TCGA_MRI_Surv_range, targets[targetSurv])

Plat_noRest_LNM_metrics_range = getMetricsRange(Plat_noRest_LNM_range, targets["LNM"])
Plat_noRest_Surv_metrics_range = getMetricsRange(Plat_noRest_Surv_range, targets[targetSurv])

Plat_all_LNM_metrics_range = getMetricsRange(Plat_all_LNM_range, targets["LNM"])
Plat_all_Surv_metrics_range = getMetricsRange(Plat_all_Surv_range, targets[targetSurv])

ESGOwoTCGA_LNM_metrics_range = getMetricsRange(ESGOwoTCGA_LNM_range, ESGOwoTCGA_LNMTruth)
ESGOwTCGA_LNM_metrics_range = getMetricsRange(ESGOwTCGA_LNM_range, ESGOwTCGA_LNMTruth)

Plot the recall vs precision for the LNM and Survival

In [None]:
# Recall vs Precision
# LNM
fig, ax = plt.subplots(1,1, figsize=(15,5))

fig.suptitle("LNM Recall vs Precision per Model", fontsize=16)

ax.plot(org_LNM_metrics_range.loc["TPR (Recall/Sens)"], org_LNM_metrics_range.loc["Precision (PPV)"], label="Origineel Netwerk")

ax.plot(noPlat_noRest_LNM_metrics_range.loc["TPR (Recall/Sens)"], noPlat_noRest_LNM_metrics_range.loc["Precision (PPV)"], label="-Plat -TCGA -MRI")
ax.plot(noPlat_MRI_LNM_metrics_range.loc["TPR (Recall/Sens)"], noPlat_MRI_LNM_metrics_range.loc["Precision (PPV)"], label="-Plat -TCGA +MRI")
ax.plot(noPlat_TCGA_LNM_metrics_range.loc["TPR (Recall/Sens)"], noPlat_TCGA_LNM_metrics_range.loc["Precision (PPV)"], label="-Plat +TCGA -MRI")
ax.plot(noPlat_TCGA_MRI_LNM_metrics_range.loc["TPR (Recall/Sens)"], noPlat_TCGA_MRI_LNM_metrics_range.loc["Precision (PPV)"], label="-Plat +TCGA +MRI")
ax.plot(Plat_noRest_LNM_metrics_range.loc["TPR (Recall/Sens)"], Plat_noRest_LNM_metrics_range.loc["Precision (PPV)"], label="+Plat -TCGA -MRI")
ax.plot(Plat_all_LNM_metrics_range.loc["TPR (Recall/Sens)"], Plat_all_LNM_metrics_range.loc["Precision (PPV)"], label="+Plat +TCGA +MRI")
# Add ESGOwoTCGA and ESGOwTCGA
ax.plot(ESGOwoTCGA_LNM_metrics_range.loc["TPR (Recall/Sens)"], ESGOwoTCGA_LNM_metrics_range.loc["Precision (PPV)"], label="ESGOwoTCGA")
ax.plot(ESGOwTCGA_LNM_metrics_range.loc["TPR (Recall/Sens)"], ESGOwTCGA_LNM_metrics_range.loc["Precision (PPV)"], label="ESGOwTCGA")

ax.legend(loc="upper right")
ax.set_xlabel("Recall")
ax.set_ylabel("Precision")

# Survival
fig, ax = plt.subplots(1,1, figsize=(15,5))

fig.suptitle("Survival Recall vs Precision per Model", fontsize=16)
    
ax.plot(org_Surv_metrics_range.loc["TPR (Recall/Sens)"], org_Surv_metrics_range.loc["Precision (PPV)"], label="Origineel Netwerk")
#ax.plot(inter_Surv_metrics_range.loc["TPR (Recall/Sens)"], inter_Surv_metrics_range.loc["Precision (PPV)"], label="Endomcancer3")
ax.plot(noPlat_noRest_Surv_metrics_range.loc["TPR (Recall/Sens)"], noPlat_noRest_Surv_metrics_range.loc["Precision (PPV)"], label="-Plat -TCGA -MRI")
ax.plot(noPlat_MRI_Surv_metrics_range.loc["TPR (Recall/Sens)"], noPlat_MRI_Surv_metrics_range.loc["Precision (PPV)"], label="-Plat -TCGA +MRI")
ax.plot(noPlat_TCGA_Surv_metrics_range.loc["TPR (Recall/Sens)"], noPlat_TCGA_Surv_metrics_range.loc["Precision (PPV)"], label="-Plat +TCGA -MRI")
ax.plot(noPlat_TCGA_MRI_Surv_metrics_range.loc["TPR (Recall/Sens)"], noPlat_TCGA_MRI_Surv_metrics_range.loc["Precision (PPV)"], label="-Plat +TCGA +MRI")
ax.plot(Plat_noRest_Surv_metrics_range.loc["TPR (Recall/Sens)"], Plat_noRest_Surv_metrics_range.loc["Precision (PPV)"], label="+Plat -TCGA -MRI")
ax.plot(Plat_all_Surv_metrics_range.loc["TPR (Recall/Sens)"], Plat_all_Surv_metrics_range.loc["Precision (PPV)"], label="+Plat +TCGA +MRI")

ax.legend(loc="lower left")
ax.set_xlabel("Recall")
ax.set_ylabel("Precision")



Plot the recall vs threshold for the LNM and Survival

In [None]:
# Recall
# LNM
fig, ax = plt.subplots(1,1, figsize=(15,5))

fig.suptitle("LNM Recall per Threshold per Model", fontsize=16)

ax.plot(org_LNM_metrics_range.loc["TPR (Recall/Sens)"], label="Origineel Netwerk")
#ax.plot(inter_LNM_metrics_range.loc["TPR (Recall/Sens)"], label="Endomcancer3")
ax.plot(noPlat_noRest_LNM_metrics_range.loc["TPR (Recall/Sens)"], label="-Plat -TCGA -MRI")
ax.plot(noPlat_MRI_LNM_metrics_range.loc["TPR (Recall/Sens)"], label="-Plat -TCGA +MRI")
ax.plot(noPlat_TCGA_LNM_metrics_range.loc["TPR (Recall/Sens)"], label="-Plat +TCGA -MRI")
ax.plot(noPlat_TCGA_MRI_LNM_metrics_range.loc["TPR (Recall/Sens)"], label="-Plat +TCGA +MRI")
ax.plot(Plat_noRest_LNM_metrics_range.loc["TPR (Recall/Sens)"], label="+Plat -TCGA -MRI")
ax.plot(Plat_all_LNM_metrics_range.loc["TPR (Recall/Sens)"], label="+Plat +TCGA +MRI")
# Add ESGOwoTCGA and ESGOwTCGA
ax.plot(ESGOwoTCGA_LNM_metrics_range.loc["TPR (Recall/Sens)"], label="ESGOwoTCGA")
ax.plot(ESGOwTCGA_LNM_metrics_range.loc["TPR (Recall/Sens)"], label="ESGOwTCGA")

ax.legend(loc="upper right")
ax.set_xlabel("Threshold")
ax.set_ylabel("Recall")
ax.set_xticks([0.05, 0.10, 0.20, 0.30, 0.40, 0.50, 0.60 ,0.70, 0.80, 0.90])
ax.set_xticklabels([0.05, 0.10, 0.20, 0.30, 0.40, 0.50, 0.60 ,0.70, 0.80, 0.90])

# Survival
fig, ax = plt.subplots(1,1, figsize=(15,5))

fig.suptitle("Survival Recall per Threshold per Model", fontsize=16)

ax.plot(org_Surv_metrics_range.loc["TPR (Recall/Sens)"], label="Origineel Netwerk")
#ax.plot(inter_Surv_metrics_range.loc["TPR (Recall/Sens)"], label="Endomcancer3")
ax.plot(noPlat_noRest_Surv_metrics_range.loc["TPR (Recall/Sens)"], label="-Plat -TCGA -MRI")
ax.plot(noPlat_MRI_Surv_metrics_range.loc["TPR (Recall/Sens)"], label="-Plat -TCGA +MRI")
ax.plot(noPlat_TCGA_Surv_metrics_range.loc["TPR (Recall/Sens)"], label="-Plat +TCGA -MRI")
ax.plot(noPlat_TCGA_MRI_Surv_metrics_range.loc["TPR (Recall/Sens)"], label="-Plat +TCGA +MRI")
ax.plot(Plat_noRest_Surv_metrics_range.loc["TPR (Recall/Sens)"], label="+Plat -TCGA -MRI")
ax.plot(Plat_all_Surv_metrics_range.loc["TPR (Recall/Sens)"], label="+Plat +TCGA +MRI")

ax.legend(loc="upper right")
ax.set_xlabel("Threshold")
ax.set_ylabel("Recall")
ax.set_xticks([0.05, 0.10, 0.20, 0.30, 0.40, 0.50, 0.60 ,0.70, 0.80, 0.90])
ax.set_xticklabels([0.05, 0.10, 0.20, 0.30, 0.40, 0.50, 0.60 ,0.70, 0.80, 0.90])


Plot the precision vs threshold for the LNM and Survival

In [None]:
# Precision
# LNM
fig, ax = plt.subplots(1,1, figsize=(15,5))

fig.suptitle("LNM Precision per Threshold per Model", fontsize=16)

ax.plot(org_LNM_metrics_range.loc["Precision (PPV)"], label="Origineel Netwerk")
#ax.plot(inter_LNM_metrics_range.loc["Precision (PPV)"], label="Endomcancer3")
ax.plot(noPlat_noRest_LNM_metrics_range.loc["Precision (PPV)"], label="-Plat -TCGA -MRI")
ax.plot(noPlat_MRI_LNM_metrics_range.loc["Precision (PPV)"], label="-Plat -TCGA +MRI")
ax.plot(noPlat_TCGA_LNM_metrics_range.loc["Precision (PPV)"], label="-Plat +TCGA -MRI")
ax.plot(noPlat_TCGA_MRI_LNM_metrics_range.loc["Precision (PPV)"], label="-Plat +TCGA +MRI")
ax.plot(Plat_noRest_LNM_metrics_range.loc["Precision (PPV)"], label="+Plat -TCGA -MRI")
ax.plot(Plat_all_LNM_metrics_range.loc["Precision (PPV)"], label="+Plat +TCGA +MRI")
# Add ESGOwoTCGA and ESGOwTCGA
ax.plot(ESGOwoTCGA_LNM_metrics_range.loc["Precision (PPV)"], label="ESGOwoTCGA")
ax.plot(ESGOwTCGA_LNM_metrics_range.loc["Precision (PPV)"], label="ESGOwTCGA")

ax.legend(loc="lower right")
ax.set_xlabel("Threshold")
ax.set_ylabel("Precision")
ax.set_xticks([0.05, 0.10, 0.20, 0.30, 0.40, 0.50, 0.60 ,0.70, 0.80, 0.90])
ax.set_xticklabels([0.05, 0.10, 0.20, 0.30, 0.40, 0.50, 0.60 ,0.70, 0.80, 0.90])

# Survival
fig, ax = plt.subplots(1,1, figsize=(15,5))
    
fig.suptitle("Survival Precision per Threshold per Model", fontsize=16)

ax.plot(org_Surv_metrics_range.loc["Precision (PPV)"], label="Origineel Netwerk")
#ax.plot(inter_Surv_metrics_range.loc["Precision (PPV)"], label="Endomcancer3")
ax.plot(noPlat_noRest_Surv_metrics_range.loc["Precision (PPV)"], label="-Plat -TCGA -MRI")
ax.plot(noPlat_MRI_Surv_metrics_range.loc["Precision (PPV)"], label="-Plat -TCGA +MRI")
ax.plot(noPlat_TCGA_Surv_metrics_range.loc["Precision (PPV)"], label="-Plat +TCGA -MRI")
ax.plot(noPlat_TCGA_MRI_Surv_metrics_range.loc["Precision (PPV)"], label="-Plat +TCGA +MRI")
ax.plot(Plat_noRest_Surv_metrics_range.loc["Precision (PPV)"], label="+Plat -TCGA -MRI")
    
ax.legend(loc="lower right")
ax.set_xlabel("Threshold")
ax.set_ylabel("Precision")
ax.set_xticks([0.05, 0.10, 0.20, 0.30, 0.40, 0.50, 0.60 ,0.70, 0.80, 0.90])
ax.set_xticklabels([0.05, 0.10, 0.20, 0.30, 0.40, 0.50, 0.60 ,0.70, 0.80, 0.90])



Plot the F1 vs threshold for the LNM and Survival

In [None]:
# F1
# LNM
fig, ax = plt.subplots(1,1, figsize=(15,5))

fig.suptitle("LNM F1 per Threshold per Model", fontsize=16)

ax.plot(org_LNM_metrics_range.loc["F1"], label="Origineel Netwerk")
#ax.plot(inter_LNM_metrics_range.loc["F1"], label="Endomcancer3")
ax.plot(noPlat_noRest_LNM_metrics_range.loc["F1"], label="-Plat -TCGA -MRI")
ax.plot(noPlat_MRI_LNM_metrics_range.loc["F1"], label="-Plat -TCGA +MRI")
ax.plot(noPlat_TCGA_LNM_metrics_range.loc["F1"], label="-Plat +TCGA -MRI")
ax.plot(noPlat_TCGA_MRI_LNM_metrics_range.loc["F1"], label="-Plat +TCGA +MRI")
ax.plot(Plat_noRest_LNM_metrics_range.loc["F1"], label="+Plat -TCGA -MRI")
ax.plot(Plat_all_LNM_metrics_range.loc["F1"], label="+Plat +TCGA +MRI")
# Add ESGOwoTCGA and ESGOwTCGA
ax.plot(ESGOwoTCGA_LNM_metrics_range.loc["F1"], label="ESGOwoTCGA")
ax.plot(ESGOwTCGA_LNM_metrics_range.loc["F1"], label="ESGOwTCGA")

ax.legend(loc="upper left")
ax.set_xlabel("Threshold")
ax.set_ylabel("F1")

# Survival
fig, ax = plt.subplots(1,1, figsize=(15,5))

fig.suptitle("Survival F1 per Threshold per Model", fontsize=16)

ax.plot(org_Surv_metrics_range.loc["F1"], label="Origineel Netwerk")
#ax.plot(inter_Surv_metrics_range.loc["F1"], label="Endomcancer3")
ax.plot(noPlat_noRest_Surv_metrics_range.loc["F1"], label="-Plat -TCGA -MRI")
ax.plot(noPlat_MRI_Surv_metrics_range.loc["F1"], label="-Plat -TCGA +MRI")
ax.plot(noPlat_TCGA_Surv_metrics_range.loc["F1"], label="-Plat +TCGA -MRI")
ax.plot(noPlat_TCGA_MRI_Surv_metrics_range.loc["F1"], label="-Plat +TCGA +MRI")
ax.plot(Plat_noRest_Surv_metrics_range.loc["F1"], label="+Plat -TCGA -MRI")
ax.plot(Plat_all_Surv_metrics_range.loc["F1"], label="+Plat +TCGA +MRI")

ax.legend(loc="upper left")
ax.set_xlabel("Threshold")
ax.set_ylabel("F1")


Plot the Brier vs threshold for the LNM and Survival

In [None]:
# Brier
# LNM
fig, ax = plt.subplots(1,1, figsize=(15,5))

fig.suptitle("LNM Brier per Threshold per Model", fontsize=16)

ax.plot(org_LNM_metrics_range.loc["Brier"], label="Origineel Netwerk")
#ax.plot(inter_LNM_metrics_range.loc["Brier"], label="Endomcancer3")
ax.plot(noPlat_noRest_LNM_metrics_range.loc["Brier"], label="-Plat -TCGA -MRI")
ax.plot(noPlat_MRI_LNM_metrics_range.loc["Brier"], label="-Plat -TCGA +MRI")
ax.plot(noPlat_TCGA_LNM_metrics_range.loc["Brier"], label="-Plat +TCGA -MRI")
ax.plot(noPlat_TCGA_MRI_LNM_metrics_range.loc["Brier"], label="-Plat +TCGA +MRI")
ax.plot(Plat_noRest_LNM_metrics_range.loc["Brier"], label="+Plat -TCGA -MRI")
ax.plot(Plat_all_LNM_metrics_range.loc["Brier"], label="+Plat +TCGA +MRI")
# Add ESGOwoTCGA and ESGOwTCGA
ax.plot(ESGOwoTCGA_LNM_metrics_range.loc["Brier"], label="ESGOwoTCGA")
ax.plot(ESGOwTCGA_LNM_metrics_range.loc["Brier"], label="ESGOwTCGA")

ax.legend(loc="upper right")
ax.set_xlabel("Threshold")
ax.set_ylabel("Brier")

# Survival
fig, ax = plt.subplots(1,1, figsize=(15,5))
    
fig.suptitle("Survival Brier per Threshold per Model", fontsize=16)

ax.plot(org_Surv_metrics_range.loc["Brier"], label="Origineel Netwerk")
#ax.plot(inter_Surv_metrics_range.loc["Brier"], label="Endomcancer3")
ax.plot(noPlat_noRest_Surv_metrics_range.loc["Brier"], label="-Plat -TCGA -MRI")
ax.plot(noPlat_MRI_Surv_metrics_range.loc["Brier"], label="-Plat -TCGA +MRI")
ax.plot(noPlat_TCGA_Surv_metrics_range.loc["Brier"], label="-Plat +TCGA -MRI")
ax.plot(noPlat_TCGA_MRI_Surv_metrics_range.loc["Brier"], label="-Plat +TCGA +MRI")
ax.plot(Plat_noRest_Surv_metrics_range.loc["Brier"], label="+Plat -TCGA -MRI")
ax.plot(Plat_all_Surv_metrics_range.loc["Brier"], label="+Plat +TCGA +MRI")

ax.legend(loc="lower right")
ax.set_xlabel("Threshold")
ax.set_ylabel("Brier")



Plot the Log Loss vs threshold for the LNM and Survival

In [None]:
# Log Loss
# LNM
fig, ax = plt.subplots(1,1, figsize=(15,5))

fig.suptitle("LNM Log Loss per Threshold per Model", fontsize=16)
    
ax.plot(org_LNM_metrics_range.loc["Log Loss"], label="Origineel Netwerk")
#ax.plot(inter_LNM_metrics_range.loc["Log Loss"], label="Endomcancer3")
ax.plot(noPlat_noRest_LNM_metrics_range.loc["Log Loss"], label="-Plat -TCGA -MRI")
ax.plot(noPlat_MRI_LNM_metrics_range.loc["Log Loss"], label="-Plat -TCGA +MRI")
ax.plot(noPlat_TCGA_LNM_metrics_range.loc["Log Loss"], label="-Plat +TCGA -MRI")
ax.plot(noPlat_TCGA_MRI_LNM_metrics_range.loc["Log Loss"], label="-Plat +TCGA +MRI")
ax.plot(Plat_noRest_LNM_metrics_range.loc["Log Loss"], label="+Plat -TCGA -MRI")
ax.plot(Plat_all_LNM_metrics_range.loc["Log Loss"], label="+Plat +TCGA +MRI")
# Add ESGOwoTCGA and ESGOwTCGA
ax.plot(ESGOwoTCGA_LNM_metrics_range.loc["Log Loss"], label="ESGOwoTCGA")
ax.plot(ESGOwTCGA_LNM_metrics_range.loc["Log Loss"], label="ESGOwTCGA")

ax.legend(loc="upper right")
ax.set_xlabel("Threshold")
ax.set_ylabel("Log Loss")

# Survival
fig, ax = plt.subplots(1,1, figsize=(15,5))

fig.suptitle("Survival Log Loss per Threshold per Model", fontsize=16)

ax.plot(org_Surv_metrics_range.loc["Log Loss"], label="Origineel Netwerk")
#ax.plot(inter_Surv_metrics_range.loc["Log Loss"], label="Endomcancer3")
ax.plot(noPlat_noRest_Surv_metrics_range.loc["Log Loss"], label="-Plat -TCGA -MRI")
ax.plot(noPlat_MRI_Surv_metrics_range.loc["Log Loss"], label="-Plat -TCGA +MRI")
ax.plot(noPlat_TCGA_Surv_metrics_range.loc["Log Loss"], label="-Plat +TCGA -MRI")
ax.plot(noPlat_TCGA_MRI_Surv_metrics_range.loc["Log Loss"], label="-Plat +TCGA +MRI")
ax.plot(Plat_noRest_Surv_metrics_range.loc["Log Loss"], label="+Plat -TCGA -MRI")
ax.plot(Plat_all_Surv_metrics_range.loc["Log Loss"], label="+Plat +TCGA +MRI")

ax.legend(loc="lower right")
ax.set_xlabel("Threshold")
ax.set_ylabel("Log Loss")


Define and plot the DCA curve for LNM

In [None]:
# Decision curve analysis
from dcurves import dca, plot_graphs
import pandas as pd
import numpy as np
import statsmodels.api as sm
import lifelines

In [None]:
# Concatenate all model results with targets for the DCA
LNM_DCA = pd.concat([targets["LNM"], org_LNM_res_prob,
                     noPlat_noRest_LNM_prob, noPlat_MRI_LNM_prob, noPlat_TCGA_LNM_prob, noPlat_TCGA_MRI_LNM_prob, Plat_noRest_LNM_prob, Plat_all_LNM_prob], axis=1)
LNM_DCA.columns = ["Target", "Origineel Netwerk","-Plat -TCGA -MRI", "-Plat -TCGA +MRI", "-Plat +TCGA -MRI", "-Plat +TCGA +MRI", "+Plat -TCGA -MRI", "+Plat +TCGA +MRI"]


dca_multi_LNM = \
    dca(
        data = LNM_DCA, 
        outcome = "Target", 
        modelnames = ["Origineel Netwerk", "-Plat -TCGA -MRI", "-Plat -TCGA +MRI", "-Plat +TCGA -MRI", "-Plat +TCGA +MRI", "+Plat -TCGA -MRI", "+Plat +TCGA +MRI"],
        thresholds = np.arange(0, 0.30,0.01),
        #harm={"Origineel Netwerk":0.03, "-Plat -TCGA -MRI":0.03, "-Plat -TCGA +MRI":0.03, "-Plat +TCGA -MRI":0.03, "-Plat +TCGA +MRI":0.03, "+Plat -TCGA -MRI":0.03, "+Plat +TCGA +MRI":0.03}
    )

ESGOwoTCGA_concat = pd.concat([ESGOwoTCGA, ESGOwoTCGA_LNMTruth], axis=1)
ESGOwoTCGA_concat.columns = ["ESGOwoTCGA", "Target"]

ESGOwTCGA_concat = pd.concat([ESGOwTCGA, ESGOwTCGA_LNMTruth], axis=1)
ESGOwTCGA_concat.columns = ["ESGOwTCGA", "Target"]

dca_EsgoWOTCGA = \
    dca(
        data = ESGOwoTCGA_concat,
        outcome = "Target",
        modelnames = ["ESGOwoTCGA"],
        thresholds = np.arange(0, 0.30,0.01),
    )

dca_EsgoWTCGA = \
    dca(
        data = ESGOwTCGA_concat,
        outcome = "Target",
        modelnames = ["ESGOwTCGA"],
        thresholds = np.arange(0, 0.30,0.01),
    )
# Select only the models from the last two dcas
dca_EsgoWOTCGA = dca_EsgoWOTCGA[0:30]
dca_EsgoWTCGA = dca_EsgoWTCGA[0:30]
figure = plt.figure(figsize=(10,7))
plt.rcParams.update({'font.size': 10})
plt.rcParams.update({'legend.loc': 'upper right', 'legend.borderaxespad':0})

plot_graphs(
        plot_df = pd.concat([dca_multi_LNM, dca_EsgoWOTCGA, dca_EsgoWTCGA]),
        y_limits=[-0.005, 0.1],
    
        graph_type="net_benefit",
        color_names= ["#1f77b4", "#ff7f0e", "#2ca02c", "#d62728", "#9467bd", "#8c564b", "#e377c2", "#7f7f7f", "#bcbd22", "#17becf", "#1f77b4", "#ff7f0e", "#2ca02c", "#d62728", "#9467bd", "#8c564b", "#e377c2", "#7f7f7f", "#bcbd22", "#17becf"]
    )


In [None]:
# Concatenate all model results with targets for the DCA
LNM_DCA = pd.concat([targets["LNM"], org_LNM_res_prob,
                     noPlat_noRest_LNM_prob, noPlat_MRI_LNM_prob, noPlat_TCGA_LNM_prob, noPlat_TCGA_MRI_LNM_prob, Plat_noRest_LNM_prob, Plat_all_LNM_prob], axis=1)
LNM_DCA.columns = ["Target", "Origineel Netwerk","-Plat -TCGA -MRI", "-Plat -TCGA +MRI", "-Plat +TCGA -MRI", "-Plat +TCGA +MRI", "+Plat -TCGA -MRI", "+Plat +TCGA +MRI"]


dca_multi_LNM = \
    dca(
        data = LNM_DCA, 
        outcome = "Target", 
        modelnames = ["Origineel Netwerk", "-Plat -TCGA -MRI", "-Plat -TCGA +MRI", "-Plat +TCGA -MRI", "-Plat +TCGA +MRI", "+Plat -TCGA -MRI", "+Plat +TCGA +MRI"],
        thresholds = np.arange(0, 0.30,0.01)
    )
plot_graphs(
        plot_df = dca_multi_LNM,
        y_limits=[-0.01, 1],
        graph_type="net_intervention_avoided",
    )

Create a barplot of the net benefit at different thresholds for the models

In [None]:
# Bar plot of the net benefit for each model at 0.05, 0.10, and 0.15 thresholds
# Create a set for each threshold
# For 0.05 threshold per model
ls_05 = {"Origineel Netwerk":0,"-Plat -TCGA -MRI": 0, "-Plat -TCGA +MRI": 0, "-Plat +TCGA -MRI": 0, "-Plat +TCGA +MRI": 0, "+Plat -TCGA -MRI": 0, "+Plat +TCGA +MRI": 0}
# For 0.10 threshold per model
ls_10 = {"Origineel Netwerk":0, "-Plat -TCGA -MRI": 0, "-Plat -TCGA +MRI": 0, "-Plat +TCGA -MRI": 0, "-Plat +TCGA +MRI": 0, "+Plat -TCGA -MRI": 0, "+Plat +TCGA +MRI": 0}
# For 0.15 threshold per model
ls_15 = {"Origineel Netwerk":0, "-Plat -TCGA -MRI": 0, "-Plat -TCGA +MRI": 0, "-Plat +TCGA -MRI": 0, "-Plat +TCGA +MRI": 0, "+Plat -TCGA -MRI": 0, "+Plat +TCGA +MRI": 0}

for i in range(len(dca_multi_LNM)):
    for column in ls_05.keys():
        if dca_multi_LNM.loc[i, "model"] == column:
            threshold = dca_multi_LNM.loc[i, "threshold"]
            if threshold == 0.05:
                ls_05[column] = dca_multi_LNM.loc[i, "net_benefit"]
            elif threshold == 0.10:
                ls_10[column] = dca_multi_LNM.loc[i, "net_benefit"]
            elif threshold == 0.15:
                ls_15[column] = dca_multi_LNM.loc[i, "net_benefit"]
    
# LNM
fig, ax = plt.subplots(1,1, figsize=(15,5))

fig.suptitle("LNM Net Benefit per Threshold per Model", fontsize=16)

ax.bar(ls_05.keys(), ls_05.values(), label="0.05")
ax.bar(ls_10.keys(), ls_10.values(), label="0.10")
ax.bar(ls_15.keys(), ls_15.values(), label="0.15")

ax.legend(loc="upper right")
ax.set_xlabel("Model")
ax.set_ylabel("Net Benefit")