### Author: Ally Sprik
### Last-updated: 25-02-2024

Goal of this notebook is to validate different networks with the Brno validation set



In [None]:
import pandas as pd
import numpy as np
import pyAgrum as gum
import pyAgrum.lib.notebook as gnb
from collections import Counter

# Loading in networks
origineel_net = gum.loadBN("../0.3. Original_Casper_files/Results/Casper_fitted_952.net")
WOPlat = gum.loadBN("../3. Model/Fitted_Networks/R_WOP_original_952.net")
WOPlat_MRI = gum.loadBN("../3. Model/Fitted_Networks/R_WOP_AddMRIMI_fitted_952.net")
WOPlat_TCGA = gum.loadBN("../3. Model/Fitted_Networks/R_WOP_AddTCGA_1_fitted_952.net")
WOPlat_TCGA_MRI = gum.loadBN("../3. Model/Fitted_Networks/R_WOP_AddMRIMITCGA_fitted_952.net")
WPlat = gum.loadBN("../3. Model/Fitted_Networks/R_WP_Original_952.net")
WPlat_all = gum.loadBN("../3. Model/Fitted_Networks/R_WP_all_train_952.net")


# Loading in data
df = pd.read_csv("../0.1. Cleaned_data/Cleaned_Brno_model_complete.csv")
df

In [None]:
df["LVSI"].replace({np.nan:"no"}, inplace=True) # Replace nan values with no for LVSI, according to Hanny if it is not said it is practically always no

Select the rows that can be used for validation, subset for the LNM and the Survival5yr

In [None]:
# Select where ENDORISK_LNM and FU is yes
df_LNM = df[(df["ENDORISK_LNM"] == "yes")].copy().dropna(subset=["LNM"])
df_Surv = df[(df["ENDORISK_FU"] == "yes")].copy().dropna(subset=["Survival5yr"])
df_LNM.reset_index(drop=True, inplace=True)
df_Surv.reset_index(drop=True, inplace=True)

Implement the ESGO classification groups for LNM

In [None]:
# Clinical model generation
# Create a function to get the results based on a threshold for the probability
# Simulate clinical decision making through 
# Construct a column ESGOwoTCGA, ESGOwTCGA based on classification chart 
# Low risk ESGOwoTCGA: FIGO Stage IA with grade 1/2, no LVSI
# Low risk ESGOwTCGA: FIGO Stage I-II with POLE mutation, no LVSI; 
#                               or Stage IA with grade 1/2, no LVSI, MSI or NSMP
# Intermediate risk ESGOwoTCGA: FIGO Stage IB with grade 1/2, no LVSI; 
#                               or Stage IA with grade 3, no LVSI; 
#                               or Stage IA with grade 1/2, LVSI;
#                               or Stage IA non-endometrioid. no MRI_MI
# Intermediate risk ESGOwTCGA: FIGO Stage IB with grade 1/2, no LVSI, MSI or NSMP;
#                               or Stage IA with grade 3, no LVSI, MSI or NSMP;
#                               or Stage IA with p53 abn and or non-endometrioid, LVSI, MSI or NSMP, no MRI_MI;
# High-intermediate risk ESGOwoTCGA: FIGO Stage I with LVSI;
#                               or Stage IB with grade 3, regardless of LVSI;
#                               or Stage II;
# High-intermediate risk ESGOwTCGA: FIGO Stage I with LVSI, MSI or NSMP;
#                               or Stage IB with grade 3, regardless of LVSI, MSI or NSMP;
#                               or Stage II with MSI or NSMP;
# High risk ESGOwoTCGA: FIGO Stage III-IV;
#                               or Stage I-IVA non-endometrioid, with MRI_MI;
# High risk ESGOwTCGA: FIGO Stage III-IV, with MSI or NSMP;
#                               or Stage I-IVA non-endometrioid, with MRI_MI, p53abn;
#                               or Stage I-IVA non-endometrioid, with MRI_MI;
# Advanced metastatic risk ESGOwoTCGA: FIGO Stage III-IVA with residual disease;
#                               or Stage IVB;
# Advanced metastatic risk ESGOwTCGA: FIGO Stage III-IVA with residual disease;
#                               or Stage IVB any molecular profile;

df_LNM["ESGOwoTCGA"] = np.nan
df_LNM["ESGOwTCGA"] = np.nan

not_included_woTCGA = pd.DataFrame(columns=df_LNM.columns)
not_included_wTCGA = pd.DataFrame(columns=df_LNM.columns)

for i in range(len(df_LNM)):
    FIGO = df_LNM["FIGO_surgical"][i]
    Grade = df_LNM["PostoperativeGrade"][i]
    LVSI = df_LNM["LVSI"][i]
    MRI_MI = df_LNM["MRI_MI"][i]
    p53_PREOP = df_LNM["p53"][i]
    Histology = df_LNM["Histology"][i]
    
    if df_LNM["MSI"][i] == "yes":
        MolClass = "MSI"
    elif df_LNM["POLE"][i] == "yes":
        MolClass = "POLE"
    elif df_LNM["MSI"][i] == "no" or df_LNM["POLE"][i] == "no":
        MolClass = "NSMP"
    else:
        MolClass = np.nan
    
   # woTCGA
    if FIGO == "IA" and (Grade == "grade 1" or Grade == "grade 2") and LVSI == "no" and Histology == "endometrioid":
        df_LNM["ESGOwoTCGA"][i] = "Low"
    elif FIGO == "IB" and (Grade == "grade 1" or Grade == "grade 2") and LVSI == "no" and Histology == "endometrioid":
        df_LNM["ESGOwoTCGA"][i] = "Intermediate"
    elif FIGO == "IA" and (Grade == "grade 3") and LVSI == "no" and Histology == "endometrioid":
        df_LNM["ESGOwoTCGA"][i] = "Intermediate"
    elif FIGO == "IA" and (Histology == "non-endometrioid") and MRI_MI == "lt_50":
        df_LNM["ESGOwoTCGA"][i] = "Intermediate"
    elif (FIGO == "IA" or FIGO == "IB") and Histology == "endometrioid" and LVSI == "yes":
        df_LNM["ESGOwoTCGA"][i] = "High-intermediate"
    elif FIGO == "IB" and (Grade == "grade 3") and Histology == "endometrioid":
        df_LNM["ESGOwoTCGA"][i] = "High-intermediate"
    elif FIGO == "II":
        df_LNM["ESGOwoTCGA"][i] = "High-intermediate"
    elif FIGO == "IIIA" or FIGO == "IIIB" or FIGO == "IIIC" or FIGO == "IVA":
        df_LNM["ESGOwoTCGA"][i] = "High"
    elif FIGO != "IVB" and (Histology == "non-endometrioid") and MRI_MI == "ge_50":
        df_LNM["ESGOwoTCGA"][i] = "High"
    elif FIGO == "IVB":
        df_LNM["ESGOwoTCGA"][i] = "Advanced metastatic"
    else:
        df_LNM["ESGOwoTCGA"][i] = np.nan
        
    
    if (FIGO == "IA" or FIGO == "IB" or FIGO == "II") and (MolClass == "POLE") and Histology == "endometrioid":
        df_LNM["ESGOwTCGA"][i] = "Low"
    elif FIGO == "IA" and Histology == "endometrioid" and LVSI == "no" and (MolClass == "MSI" or MolClass == "NSMP"):
        df_LNM["ESGOwTCGA"][i] = "Low"
    elif FIGO == "IB" and (Grade == "grade 1" or Grade == "grade 2") and LVSI == "no" and (MolClass == "MSI" or MolClass == "NSMP") and Histology == "endometrioid":
        df_LNM["ESGOwTCGA"][i] = "Intermediate"
    elif FIGO == "IA" and (Grade == "grade 3") and LVSI == "no" and Histology == "endometrioid" and (MolClass == "MSI" or MolClass == "NSMP"):
        df_LNM["ESGOwTCGA"][i] = "Intermediate"
    elif FIGO == "IA" and (Histology == "non-endometrioid" or p53_PREOP == "mutant") and MRI_MI == "lt_50":
        df_LNM["ESGOwTCGA"][i] = "Intermediate"
    elif (FIGO == "IA" or FIGO == "IB") and LVSI == "yes" and (MolClass == "MSI" or MolClass == "NSMP") and Histology == "endometrioid":
        df_LNM["ESGOwTCGA"][i] = "High-intermediate"
    elif FIGO == "IB" and (Grade == "grade 3") and (MolClass == "MSI" or MolClass == "NSMP") and Histology == "endometrioid":
        df_LNM["ESGOwTCGA"][i] = "High-intermediate"
    elif FIGO == "II" and (MolClass == "MSI" or MolClass == "NSMP") and Histology == "endometrioid":
        df_LNM["ESGOwTCGA"][i] = "High-intermediate"    
    elif (FIGO == "IIIA" or FIGO == "IIIB" or FIGO=="IIIC" or FIGO == "IVA") and (MolClass == "NSMP" or MolClass == "MSI") and Histology == "endometrioid":
        df_LNM["ESGOwTCGA"][i] = "High"
    elif FIGO != "IVB" and p53_PREOP == "mutant" and MRI_MI == "ge_50":
        df_LNM["ESGOwTCGA"][i] = "High"
    elif FIGO != "IVB" and (Histology == "non-endometrioid") and MRI_MI == "ge_50" and (MolClass == "NSMP" or MolClass == "MSI"):
        df_LNM["ESGOwTCGA"][i] = "High"
    elif FIGO == "IVB":
        df_LNM["ESGOwTCGA"][i] = "Advanced metastatic"
    else:
        df_LNM["ESGOwTCGA"][i] = np.nan

Insert probabilities corresponding with the risk groups

In [None]:
# High-intermediate, High, and Advanced metastatic risk groups are considered for LND in the ESGO classification

df_LNM["ESGOwoTCGA_LND"] = np.nan
df_LNM["ESGOwTCGA_LND"] = np.nan

for i in range(len(df_LNM)):
    ESGOwoTCGA = df_LNM["ESGOwoTCGA"][i]
    ESGOwTCGA = df_LNM["ESGOwTCGA"][i]
    
    # Fill in per category
    if ESGOwoTCGA == "Low":
        df_LNM["ESGOwoTCGA_LND"][i] = 0.01
    elif ESGOwoTCGA == "Intermediate":
        df_LNM["ESGOwoTCGA_LND"][i] = 0.05
    elif ESGOwoTCGA == "High-intermediate":
        df_LNM["ESGOwoTCGA_LND"][i] = 0.11
    elif ESGOwoTCGA == "High":
        df_LNM["ESGOwoTCGA_LND"][i] = 0.20
    elif ESGOwoTCGA == "Advanced metastatic":
        df_LNM["ESGOwoTCGA_LND"][i] = 0.20
    elif pd.isna(ESGOwoTCGA):
        df_LNM["ESGOwoTCGA_LND"][i] = np.nan
    else:
        df_LNM["ESGOwoTCGA_LND"][i] = 0
        
    if ESGOwTCGA == "Low":
        df_LNM["ESGOwTCGA_LND"][i] = 0.01
    elif ESGOwTCGA == "Intermediate":
        df_LNM["ESGOwTCGA_LND"][i] = 0.05
    elif ESGOwTCGA == "High-intermediate":
        df_LNM["ESGOwTCGA_LND"][i] = 0.11
    elif ESGOwTCGA == "High":
        df_LNM["ESGOwTCGA_LND"][i] = 0.20
    elif ESGOwTCGA == "Advanced metastatic":
        df_LNM["ESGOwTCGA_LND"][i] = 0.20
    elif pd.isna(ESGOwTCGA):
        df_LNM["ESGOwTCGA_LND"][i] = np.nan
    else:
        df_LNM["ESGOwTCGALND"][i] = 0

Generate the ESGO targets (the target rows that ESGO could classify)

In [None]:
# Get the target for both clinical models for LNM
# drop the rows that have nan in the ESGOwoTCGA_LND column and in the LNM column
ESGOwoTCGA = df_LNM["ESGOwoTCGA_LND"].copy()
ESGOwoTCGA.dropna(inplace=True)
indices = ESGOwoTCGA.index
ESGOwoTCGA.reset_index(inplace=True, drop=True)

ESGOwoTCGA_LNMTruth = df_LNM["LNM"].copy()
ESGOwoTCGA_LNMTruth = ESGOwoTCGA_LNMTruth.iloc[indices]
ESGOwoTCGA_LNMTruth.replace({"yes":1, "no":0}, inplace=True)
ESGOwoTCGA_LNMTruth.reset_index(inplace=True, drop=True)

# drop the rows that have nan in the ESGOwoTCGA_LND column and in the LNM column
ESGOwTCGA = df_LNM["ESGOwTCGA_LND"].copy()
ESGOwTCGA.dropna(inplace=True)
indices = ESGOwTCGA.index
ESGOwTCGA.reset_index(inplace=True, drop=True)

ESGOwTCGA_LNMTruth = df_LNM["LNM"].copy()
ESGOwTCGA_LNMTruth = ESGOwTCGA_LNMTruth.iloc[indices]
ESGOwTCGA_LNMTruth.replace({"yes":1, "no":0}, inplace=True)
ESGOwTCGA_LNMTruth.reset_index(inplace=True, drop=True)

# put in dfframes
ESGOwoTCGA = pd.DataFrame(ESGOwoTCGA)
ESGOwoTCGA_LNMTruth = pd.DataFrame(ESGOwoTCGA_LNMTruth)

ESGOwTCGA = pd.DataFrame(ESGOwTCGA)
ESGOwTCGA_LNMTruth = pd.DataFrame(ESGOwTCGA_LNMTruth)

Replace the labels for the targets with 1 and 0

In [None]:
target_LNM = df_LNM["LNM"].replace({"yes":1, "no":0})
target_Surv = df_Surv["Survival5yr"].replace({"yes":1, "no":0})

Define the overall evidence columns

In [None]:
evidence_columns = ["ER", "PR", "p53", "L1CAM", "CA125", "Platelets", "Cytology", "MRI_MI", "MSI", "POLE", "PreoperativeGrade"]

Create datasets for the different networks, based on the evidence columns and the nodes that are present in the network

In [None]:
# Create different datasets based on the nodes that are present in the network
df_origineel_LNM = df_LNM[list(Counter(evidence_columns) & Counter(list(origineel_net.names())))].copy()
df_origineel_Surv = df_Surv[list(Counter(evidence_columns) & Counter(list(origineel_net.names())))].copy()

df_WOPlat_LNM = df_LNM[list(Counter(evidence_columns) & Counter(list(WOPlat.names())))].copy()
df_WOPlat_Surv = df_Surv[list(Counter(evidence_columns) & Counter(list(WOPlat.names())))].copy()

df_WOPlat_MRI_LNM = df_LNM[list(Counter(evidence_columns) & Counter(list(WOPlat_MRI.names())))].copy()
df_WOPlat_MRI_Surv = df_Surv[list(Counter(evidence_columns) & Counter(list(WOPlat_MRI.names())))].copy()

df_WOPlat_TCGA_LNM = df_LNM[list(Counter(evidence_columns) & Counter(list(WOPlat_TCGA.names())))].copy()
df_WOPlat_TCGA_Surv = df_Surv[list(Counter(evidence_columns) & Counter(list(WOPlat_TCGA.names())))].copy()

df_WOPlat_TCGA_MRI_LNM = df_LNM[list(Counter(evidence_columns) & Counter(list(WOPlat_TCGA_MRI.names())))].copy()
df_WOPlat_TCGA_MRI_Surv = df_Surv[list(Counter(evidence_columns) & Counter(list(WOPlat_TCGA_MRI.names())))].copy()

df_WPlat_LNM = df_LNM[list(Counter(evidence_columns) & Counter(list(WPlat.names())))].copy()
df_WPlat_Surv = df_Surv[list(Counter(evidence_columns) & Counter(list(WPlat.names())))].copy()

df_WPlat_all_LNM = df_LNM[list(Counter(evidence_columns) & Counter(list(WPlat_all.names())))].copy()
df_WPlat_all_Surv = df_Surv[list(Counter(evidence_columns) & Counter(list(WPlat_all.names())))].copy()

Create a function to get the probabilities for the different networks

In [None]:
def getProbabilities(model, evidence_LNM, evidence_Surv, Surv = "Survival5yr", samples = 100):
    ls_result_LNM = []
    ls_result_Surv = []
    
    #for i in range(1, samples):
    resultsLNM = []
    resultsSurvival = []
    net = gum.LazyPropagation(model)
    net.getNumberOfThreads()
    net.setNumberOfThreads(10)
    
    for j in range(len(evidence_LNM)):
        evidencerow = evidence_LNM.iloc[j]
        evidencerow = evidencerow.dropna().to_dict()
        
        try:
            net.setEvidence(evidencerow)
            
            net.makeInference()

            resultLNM = net.posterior("LNM")
            
            resultsLNM.append(resultLNM)
        except Exception as error:
            print("Error at row regarding LNM", j)
            print(error)
            
            resultsLNM.append(resultLNM)

    
    for j in range(len(evidence_Surv)):
        evidencerow = evidence_Surv.iloc[j]
        evidencerow = evidencerow.dropna().to_dict()
        
        try:
            net.setEvidence(evidencerow)
            
            net.makeInference()

            resultSurvival = net.posterior("Survival5yr")
            
            resultsSurvival.append(resultSurvival)
        except Exception as error:
            print("Error at row regarding Survival", j)
            print(error)
            
            resultsSurvival.append(resultSurvival)

        
    return resultsLNM, resultsSurvival


Get the probabilities for the different networks

In [None]:
print("Started 1")
results_origineel_LNM, results_origineel_Surv = getProbabilities(origineel_net, df_origineel_LNM, df_origineel_Surv)

print("Started 2")
results_WOPlat_LNM, results_WOPlat_Surv = getProbabilities(WOPlat, df_WOPlat_LNM, df_WOPlat_Surv)

print("Started 3")
results_WOPlat_MRI_LNM, results_WOPlat_MRI_Surv = getProbabilities(WOPlat_MRI, df_WOPlat_MRI_LNM, df_WOPlat_MRI_Surv)

print("Started 4")
results_WOPlat_TCGA_LNM, results_WOPlat_TCGA_Surv = getProbabilities(WOPlat_TCGA, df_WOPlat_TCGA_LNM, df_WOPlat_TCGA_Surv)

print("Started 5")
results_WOPlat_TCGA_MRI_LNM, results_WOPlat_TCGA_MRI_Surv = getProbabilities(WOPlat_TCGA_MRI, df_WOPlat_TCGA_MRI_LNM, df_WOPlat_TCGA_MRI_Surv)

print("Started 6")
results_WPlat_LNM, results_WPlat_Surv = getProbabilities(WPlat, df_WPlat_LNM, df_WPlat_Surv)

print("Started 7")
results_WPlat_all_LNM, results_WPlat_all_Surv = getProbabilities(WPlat_all, df_WPlat_all_LNM, df_WPlat_all_Surv)



Create a function to extract the probability values from the results

In [None]:
def getProbResults(results, target):
    res = []
    
    for i in range(len(results)):
        res.append(results[i][target])
    return pd.DataFrame(res)

Get the probabilities for the targets, LNM and surival yes

In [None]:
org_LNM = getProbResults(results_origineel_LNM, 1)
org_Surv = getProbResults(results_origineel_Surv, 1)

WOPlat_LNM = getProbResults(results_WOPlat_LNM, 1)
WOPlat_Surv = getProbResults(results_WOPlat_Surv, 1)

WOPlat_MRI_LNM = getProbResults(results_WOPlat_MRI_LNM, 1)
WOPlat_MRI_Surv = getProbResults(results_WOPlat_MRI_Surv, 1)

WOPlat_TCGA_LNM = getProbResults(results_WOPlat_TCGA_LNM, 1)
WOPlat_TCGA_Surv = getProbResults(results_WOPlat_TCGA_Surv, 1)

WOPlat_TCGA_MRI_LNM = getProbResults(results_WOPlat_TCGA_MRI_LNM, 1)
WOPlat_TCGA_MRI_Surv = getProbResults(results_WOPlat_TCGA_MRI_Surv, 1)

WPlat_LNM = getProbResults(results_WPlat_LNM, 1)
WPlat_Surv = getProbResults(results_WPlat_Surv, 1)

WPlat_all_LNM = getProbResults(results_WPlat_all_LNM, 1)
WPlat_all_Surv = getProbResults(results_WPlat_all_Surv, 1)


Define functions to retrieve the metrics

In [None]:
from sklearn.metrics import roc_curve, f1_score
# Find the accuracy, roc auc, precision and recall for the results and the targets data
from sklearn.metrics import accuracy_score, roc_auc_score, precision_score, recall_score, confusion_matrix, log_loss, brier_score_loss

def getMetrics(results, targets):
    TP = confusion_matrix(targets, results)[1][1]
    TN = confusion_matrix(targets, results)[0][0]
    FP = confusion_matrix(targets, results)[0][1]
    FN = confusion_matrix(targets, results)[1][0]
    
    accuracy = accuracy_score(targets, results)
    roc_auc = roc_auc_score(targets, results)
    precision = precision_score(targets, results)
    TPR = recall_score(targets, results)
    TNR = TN / (TN + FP)
    
    f1 = f1_score(targets, results)
    brier = brier_score_loss(targets, results)
    loglike = log_loss(targets, results)
    
    x = pd.DataFrame([accuracy, roc_auc, precision, TPR,TNR, f1, brier, loglike], index=["Accuracy", "ROC AUC", "Precision (PPV)", "TPR (Recall/Sens)","TNR (Spec)", "F1", "Brier", "Log Loss"])
    return x

def getSlimMetrics(results, targets):
    
    if not len(results) == len(targets):
        raise Exception('Results and targets are not the same length. Results:', len(results), ' Targets:', len(targets))
    
    # Get ROC AUC, Log Loss, Brier, and N Predicted/N Observed
    curve = roc_curve(targets, results, pos_label=1)
    ROC = round(roc_auc_score(targets, results), 4)
    LL = round(log_loss(targets, results), 4)
    Brier = round(brier_score_loss(targets, results), 4)
    N_pred = results.sum()[0]
    N_obs = int(targets.sum())
    N_br = f"{int(N_pred)}/{int(N_obs)}"
    Ratio = round(N_pred/N_obs, 4)
    
    x = pd.DataFrame([ROC, LL, Brier, N_br, Ratio], index=["ROC AUC", "Log Loss", "Brier", "N Predicted/N Observed", "Ratio"])
    return x


Get the slim (non thresholded) metrics for the different models

In [None]:
# Get the Slim metrics for the different models
print("Started 1")
org_LNM_metrics = getSlimMetrics(org_LNM, target_LNM)
org_Surv_metrics = getSlimMetrics(org_Surv, target_Surv)

print("Started 2")
WOPlat_LNM_metrics = getSlimMetrics(WOPlat_LNM, target_LNM)
WOPlat_Surv_metrics = getSlimMetrics(WOPlat_Surv, target_Surv)

print("Started 3")
WOPlat_MRI_LNM_metrics = getSlimMetrics(WOPlat_MRI_LNM, target_LNM)
WOPlat_MRI_Surv_metrics = getSlimMetrics(WOPlat_MRI_Surv, target_Surv)

print("Started 4")
WOPlat_TCGA_LNM_metrics = getSlimMetrics(WOPlat_TCGA_LNM, target_LNM)
WOPlat_TCGA_Surv_metrics = getSlimMetrics(WOPlat_TCGA_Surv, target_Surv)

print("Started 5")
WOPlat_TCGA_MRI_LNM_metrics = getSlimMetrics(WOPlat_TCGA_MRI_LNM, target_LNM)
WOPlat_TCGA_MRI_Surv_metrics = getSlimMetrics(WOPlat_TCGA_MRI_Surv, target_Surv)

print("Started 6")
WPlat_LNM_metrics = getSlimMetrics(WPlat_LNM, target_LNM)
WPlat_Surv_metrics = getSlimMetrics(WPlat_Surv, target_Surv)

print("Started 7")
WPlat_all_LNM_metrics = getSlimMetrics(WPlat_all_LNM, target_LNM)
WPlat_all_Surv_metrics = getSlimMetrics(WPlat_all_Surv, target_Surv)

# Slim metrics for the clinical categories of ESGOwoTCGA and ESGOwTCGA
print("Started 8")
ESGOwoTCGA_LNM_metrics = getSlimMetrics(ESGOwoTCGA, ESGOwoTCGA_LNMTruth)
ESGOwTCGA_LNM_metrics = getSlimMetrics(ESGOwTCGA, ESGOwTCGA_LNMTruth)


Concate the metrics to display a table comparing the different models LNM

In [None]:
# Concatenate the metrics to between the models LNM
LNM_metrics = pd.concat([ESGOwoTCGA_LNM_metrics, ESGOwoTCGA_LNM_metrics, org_LNM_metrics,  
                         WOPlat_LNM_metrics, WOPlat_MRI_LNM_metrics, WOPlat_TCGA_LNM_metrics, WOPlat_TCGA_MRI_LNM_metrics, WPlat_LNM_metrics, WPlat_all_LNM_metrics], axis=1)

LNM_metrics.columns = ["ESGOwoTCGA", "ESGOwTCGA","Origineel Netwerk",  
                       "-Plat -TCGA -MRI", "-Plat -TCGA +MRI", "-Plat +TCGA -MRI", "-Plat +TCGA +MRI", "+Plat -TCGA -MRI", "+Plat +TCGA +MRI"]
LNM_metrics


Concate the metrics to display a table comparing the different models Survival

In [None]:
# Concatenate the metrics to between the models Survival
Surv_metrics = pd.concat([org_Surv_metrics, 
                          WOPlat_Surv_metrics, WOPlat_MRI_Surv_metrics, WOPlat_TCGA_Surv_metrics, WOPlat_TCGA_MRI_Surv_metrics, WPlat_Surv_metrics, WPlat_all_Surv_metrics], axis=1)
Surv_metrics = Surv_metrics.round(3)
Surv_metrics.columns = ["Origineel Netwerk",  
                        "-Plat -TCGA -MRI", "-Plat -TCGA +MRI", "-Plat +TCGA -MRI", "-Plat +TCGA +MRI", "+Plat -TCGA -MRI", "+Plat +TCGA +MRI"]
Surv_metrics

Plot the ROC curves for the different models

In [None]:
from matplotlib_inline.backend_inline import set_matplotlib_formats
import matplotlib.pyplot as plt
import seaborn as sns

# Set the style
sns.set_style("whitegrid")
# Set the output as png
set_matplotlib_formats('png', 'pdf')
plt.rcParams['figure.dpi'] = 150

# plot all the ROC curves in one plot (LNM and Survival seperate)
fig, ax = plt.subplots(1,2, figsize=(15,5))

fig.suptitle("ROC Curves", fontsize=16)

# LNM
ax[0].plot([0, 1], [0, 1], linestyle='--', lw=2, color='r', alpha=.8)

fpr, tpr, _ = roc_curve(target_LNM, org_LNM, pos_label=1)
ax[0].plot(fpr, tpr, label='Origineel Netwerk (area = %0.2f)' % roc_auc_score(target_LNM, org_LNM))

fpr, tpr, _ = roc_curve(target_LNM, org_LNM, pos_label=1)
ax[0].plot(fpr, tpr, label='-Plat -TCGA -MRI (area = %0.2f)' % roc_auc_score(target_LNM, WOPlat_LNM))

fpr, tpr, _ = roc_curve(target_LNM, WOPlat_MRI_LNM, pos_label=1)
ax[0].plot(fpr, tpr, label='-Plat -TCGA +MRI (area = %0.2f)' % roc_auc_score(target_LNM, WOPlat_MRI_LNM))

fpr, tpr, _ = roc_curve(target_LNM, WOPlat_TCGA_LNM, pos_label=1)
ax[0].plot(fpr, tpr, label='-Plat +TCGA -MRI (area = %0.2f)' % roc_auc_score(target_LNM, WOPlat_TCGA_LNM))

fpr, tpr, _ = roc_curve(target_LNM, WOPlat_TCGA_MRI_LNM, pos_label=1)
ax[0].plot(fpr, tpr, label='-Plat +TCGA +MRI (area = %0.2f)' % roc_auc_score(target_LNM, WOPlat_TCGA_MRI_LNM))

fpr, tpr, _ = roc_curve(target_LNM, WPlat_LNM, pos_label=1)
ax[0].plot(fpr, tpr, label='+Plat -TCGA -MRI (area = %0.2f)' % roc_auc_score(target_LNM, WPlat_LNM))

fpr, tpr, _ = roc_curve(target_LNM, WPlat_all_LNM, pos_label=1)
ax[0].plot(fpr, tpr, label='+Plat +TCGA +MRI (area = %0.2f)' % roc_auc_score(target_LNM, WPlat_all_LNM))

# Add the clinical classifications
fpr, tpr, _ = roc_curve(ESGOwoTCGA_LNMTruth, ESGOwoTCGA, pos_label=1)
ax[0].plot(fpr, tpr, label='ESGOwoTCGA (area = %0.2f)' % roc_auc_score(ESGOwoTCGA_LNMTruth, ESGOwoTCGA))

fpr, tpr, _ = roc_curve(ESGOwTCGA_LNMTruth, ESGOwTCGA, pos_label=1)
ax[0].plot(fpr, tpr, label='ESGOwTCGA (area = %0.2f)' % roc_auc_score(ESGOwTCGA_LNMTruth, ESGOwTCGA))

ax[0].title.set_text("LNM")
ax[0].legend(loc="lower right")
# Print AUC in the plot
    
# Survival
ax[1].plot([0, 1], [0, 1], linestyle='--', lw=2, color='r', alpha=.8)

fpr, tpr, _ = roc_curve(target_Surv, org_Surv, pos_label=1)
ax[1].plot(fpr, tpr, label='Origineel Netwerk (area = %0.2f)' % roc_auc_score(target_Surv, org_Surv))

fpr, tpr, _ = roc_curve(target_Surv, WOPlat_Surv, pos_label=1)
ax[1].plot(fpr, tpr, label='-Plat -TCGA -MRI (area = %0.2f)' % roc_auc_score(target_Surv, WOPlat_Surv))

fpr, tpr, _ = roc_curve(target_Surv, WOPlat_MRI_Surv, pos_label=1)
ax[1].plot(fpr, tpr, label='-Plat -TCGA +MRI (area = %0.2f)' % roc_auc_score(target_Surv, WOPlat_MRI_Surv))

fpr, tpr, _ = roc_curve(target_Surv, WOPlat_TCGA_Surv, pos_label=1)
ax[1].plot(fpr, tpr, label='-Plat +TCGA -MRI (area = %0.2f)' % roc_auc_score(target_Surv, WOPlat_TCGA_Surv))

fpr, tpr, _ = roc_curve(target_Surv, WOPlat_TCGA_MRI_Surv, pos_label=1)
ax[1].plot(fpr, tpr, label='-Plat +TCGA +MRI (area = %0.2f)' % roc_auc_score(target_Surv, WOPlat_TCGA_MRI_Surv))

fpr, tpr, _ = roc_curve(target_Surv, WPlat_Surv, pos_label=1)
ax[1].plot(fpr, tpr, label='+Plat -TCGA -MRI (area = %0.2f)' % roc_auc_score(target_Surv, WPlat_Surv))

fpr, tpr, _ = roc_curve(target_Surv, WPlat_all_Surv, pos_label=1)
ax[1].plot(fpr, tpr, label='+Plat +TCGA +MRI (area = %0.2f)' % roc_auc_score(target_Surv, WPlat_all_Surv))

ax[1].title.set_text("Survival")
ax[1].legend(loc="lower right")

Define a function to get the diagnoses on a range of thresholds

In [None]:
# Define a function to get the results based on a threshold for the probability
def getRangeResults(probResults):
    # Define thresholds
    thresholds = [0.05, 0.10, 0.20, 0.30, 0.40, 0.50, 0.60 ,0.70, 0.80, 0.90]
    # Create empty list
    rows = len(probResults)

    cols = len(thresholds)
    
    res = [[0 for i in range(cols)] for j in range(rows)]
    
    for i in range(len(probResults)):
        targetcol = 0
        
        for col in probResults.columns:
            if isinstance(col, str) and col.__contains__("ESGO"):
                targetcol = col
            
        for j in range(len(thresholds)):
            if probResults[targetcol][i] >= thresholds[j]:
                res[i][j] = 1
            else:
                res[i][j] = 0
                
    df = pd.DataFrame(res, columns=thresholds)
    
    return df



Get the ranges for the different models

In [None]:
org_LNM_range = getRangeResults(org_LNM)
org_Surv_range = getRangeResults(org_Surv)

WOPlat_LNM_range = getRangeResults(WOPlat_LNM)
WOPlat_Surv_range = getRangeResults(WOPlat_Surv)

WOPlat_MRI_LNM_range = getRangeResults(WOPlat_MRI_LNM)
WOPlat_MRI_Surv_range = getRangeResults(WOPlat_MRI_Surv)

WOPlat_TCGA_LNM_range = getRangeResults(WOPlat_TCGA_LNM)
WOPlat_TCGA_Surv_range = getRangeResults(WOPlat_TCGA_Surv)

WOPlat_TCGA_MRI_LNM_range = getRangeResults(WOPlat_TCGA_MRI_LNM)
WOPlat_TCGA_MRI_Surv_range = getRangeResults(WOPlat_TCGA_MRI_Surv)

WPlat_LNM_range = getRangeResults(WPlat_LNM)
WPlat_Surv_range = getRangeResults(WPlat_Surv)

WPlat_all_LNM_range = getRangeResults(WPlat_all_LNM)
WPlat_all_Surv_range = getRangeResults(WPlat_all_Surv)

# Get the ranges for the clinical categories of ESGOwoTCGA and ESGOwTCGA
ESGOwoTCGA_range = getRangeResults(ESGOwoTCGA)
ESGOwTCGA_range = getRangeResults(ESGOwTCGA)


Create a function to make a 3d matrix of metrics for each threshold

In [None]:
# Create a function to make a 3d matrix of metrics for each threshold
def getMetricsRange(results, targets):
    # Define thresholds
    thresholds = [0.05, 0.10, 0.20, 0.30, 0.40, 0.50, 0.60 ,0.70, 0.80, 0.90]
    # Create empty list
    metrics = []
    # Metricnames
    metricnames = ["Accuracy", "Precision (PPV)", "TPR (Recall/Sens)","TNR (Spec)", "F1", "Brier", "Log Loss"]
    
    for threshold in thresholds:
        res = results[threshold].values
        
        metrics.append(round(getMetrics(res, targets).loc[metricnames],3))
    
    metrics = pd.concat(metrics, axis=1)
    metrics.columns = thresholds
    return metrics

Get the metric ranges for the different models

In [None]:
print("Started 1")
org_LNM_metrics_range = getMetricsRange(org_LNM_range, target_LNM)
org_Surv_metrics_range = getMetricsRange(org_Surv_range, target_Surv)

print("Started 2")
WOPlat_LNM_metrics_range = getMetricsRange(WOPlat_LNM_range, target_LNM)
WOPlat_Surv_metrics_range = getMetricsRange(WOPlat_Surv_range, target_Surv)

print("Started 3")  
WOPlat_MRI_LNM_metrics_range = getMetricsRange(WOPlat_MRI_LNM_range, target_LNM)
WOPlat_MRI_Surv_metrics_range = getMetricsRange(WOPlat_MRI_Surv_range, target_Surv)

print("Started 4")
WOPlat_TCGA_LNM_metrics_range = getMetricsRange(WOPlat_TCGA_LNM_range, target_LNM)
WOPlat_TCGA_Surv_metrics_range = getMetricsRange(WOPlat_TCGA_Surv_range, target_Surv)

print("Started 5")
WOPlat_TCGA_MRI_LNM_metrics_range = getMetricsRange(WOPlat_TCGA_MRI_LNM_range, target_LNM)
WOPlat_TCGA_MRI_Surv_metrics_range = getMetricsRange(WOPlat_TCGA_MRI_Surv_range, target_Surv)

print("Started 6")
WPlat_LNM_metrics_range = getMetricsRange(WPlat_LNM_range, target_LNM)
WPlat_Surv_metrics_range = getMetricsRange(WPlat_Surv_range, target_Surv)

print("Started 7")
WPlat_all_LNM_metrics_range = getMetricsRange(WPlat_all_LNM_range, target_LNM)
WPlat_all_Surv_metrics_range = getMetricsRange(WPlat_all_Surv_range, target_Surv)

# Get the metric ranges for the clinical categories of ESGOwoTCGA and ESGOwTCGA
print("Started 8")
ESGOwoTCGA_LNM_metrics_range = getMetricsRange(ESGOwoTCGA_range, ESGOwoTCGA_LNMTruth)
ESGOwTCGA_LNM_metrics_range = getMetricsRange(ESGOwTCGA_range, ESGOwTCGA_LNMTruth)

Plot the recall vs precision for the different models

In [None]:
from matplotlib_inline.backend_inline import set_matplotlib_formats
import matplotlib.pyplot as plt
import seaborn as sns

# Set the style
sns.set_style("whitegrid")
# Set the output as png
set_matplotlib_formats('png', 'pdf')
plt.rcParams['figure.dpi'] = 150

# Recall vs Precision
# LNM
fig, ax = plt.subplots(1,1, figsize=(15,5))

fig.suptitle("LNM Recall vs Precision per Model", fontsize=16)

ax.plot(org_LNM_metrics_range.loc["TPR (Recall/Sens)"], org_LNM_metrics_range.loc["Precision (PPV)"], label="Origineel Netwerk")
ax.plot(WOPlat_LNM_metrics_range.loc["TPR (Recall/Sens)"], WOPlat_LNM_metrics_range.loc["Precision (PPV)"], label="-Plat -TCGA -MRI")
ax.plot(WOPlat_MRI_LNM_metrics_range.loc["TPR (Recall/Sens)"], WOPlat_MRI_LNM_metrics_range.loc["Precision (PPV)"], label="-Plat -TCGA +MRI")
ax.plot(WOPlat_TCGA_LNM_metrics_range.loc["TPR (Recall/Sens)"], WOPlat_TCGA_LNM_metrics_range.loc["Precision (PPV)"], label="-Plat +TCGA -MRI")
ax.plot(WOPlat_TCGA_MRI_LNM_metrics_range.loc["TPR (Recall/Sens)"], WOPlat_TCGA_MRI_LNM_metrics_range.loc["Precision (PPV)"], label="-Plat +TCGA +MRI")
ax.plot(WPlat_LNM_metrics_range.loc["TPR (Recall/Sens)"], WPlat_LNM_metrics_range.loc["Precision (PPV)"], label="+Plat -TCGA -MRI")
ax.plot(WPlat_all_LNM_metrics_range.loc["TPR (Recall/Sens)"], WPlat_all_LNM_metrics_range.loc["Precision (PPV)"], label="+Plat +TCGA +MRI")
# Add the clinical categories
ax.plot(ESGOwoTCGA_LNM_metrics_range.loc["TPR (Recall/Sens)"], ESGOwoTCGA_LNM_metrics_range.loc["Precision (PPV)"], label="ESGOwoTCGA")
ax.plot(ESGOwTCGA_LNM_metrics_range.loc["TPR (Recall/Sens)"], ESGOwTCGA_LNM_metrics_range.loc["Precision (PPV)"], label="ESGOwTCGA")

ax.legend(loc="upper right")
ax.set_xlabel("Recall")
ax.set_ylabel("Precision")

# Survival
fig, ax = plt.subplots(1,1, figsize=(15,5))

fig.suptitle("Survival Recall vs Precision per Model", fontsize=16)
    
ax.plot(org_Surv_metrics_range.loc["TPR (Recall/Sens)"], org_Surv_metrics_range.loc["Precision (PPV)"], label="Origineel Netwerk")
ax.plot(WOPlat_Surv_metrics_range.loc["TPR (Recall/Sens)"], WOPlat_Surv_metrics_range.loc["Precision (PPV)"], label="-Plat -TCGA -MRI")
ax.plot(WOPlat_MRI_Surv_metrics_range.loc["TPR (Recall/Sens)"], WOPlat_MRI_Surv_metrics_range.loc["Precision (PPV)"], label="-Plat -TCGA +MRI")
ax.plot(WOPlat_TCGA_Surv_metrics_range.loc["TPR (Recall/Sens)"], WOPlat_TCGA_Surv_metrics_range.loc["Precision (PPV)"], label="-Plat +TCGA -MRI")
ax.plot(WOPlat_TCGA_MRI_Surv_metrics_range.loc["TPR (Recall/Sens)"], WOPlat_TCGA_MRI_Surv_metrics_range.loc["Precision (PPV)"], label="-Plat +TCGA +MRI")
ax.plot(WPlat_Surv_metrics_range.loc["TPR (Recall/Sens)"], WPlat_Surv_metrics_range.loc["Precision (PPV)"], label="+Plat -TCGA -MRI")
ax.plot(WPlat_all_Surv_metrics_range.loc["TPR (Recall/Sens)"], WPlat_all_Surv_metrics_range.loc["Precision (PPV)"], label="+Plat +TCGA +MRI")

ax.legend(loc="upper right")
ax.set_xlabel("Recall")
ax.set_ylabel("Precision")



Plot the recall vs threshold for the different models

In [None]:
# Recall vs threshold
# LNM
fig, ax = plt.subplots(1,1, figsize=(15,5))

fig.suptitle("LNM Recall vs Threshold per Model", fontsize=16)

# Recall on the x-axis
ax.plot(org_LNM_metrics_range.loc["TPR (Recall/Sens)"], label="Origineel Netwerk")
ax.plot(WOPlat_LNM_metrics_range.loc["TPR (Recall/Sens)"], label="-Plat -TCGA -MRI")
ax.plot(WOPlat_MRI_LNM_metrics_range.loc["TPR (Recall/Sens)"], label="-Plat -TCGA +MRI")
ax.plot(WOPlat_TCGA_LNM_metrics_range.loc["TPR (Recall/Sens)"], label="-Plat +TCGA -MRI")
ax.plot(WOPlat_TCGA_MRI_LNM_metrics_range.loc["TPR (Recall/Sens)"], label="-Plat +TCGA +MRI")
ax.plot(WPlat_LNM_metrics_range.loc["TPR (Recall/Sens)"], label="+Plat -TCGA -MRI")
ax.plot(WPlat_all_LNM_metrics_range.loc["TPR (Recall/Sens)"], label="+Plat +TCGA +MRI")
# Add the clinical categories
ax.plot(ESGOwoTCGA_LNM_metrics_range.loc["TPR (Recall/Sens)"], label="ESGOwoTCGA")
ax.plot(ESGOwTCGA_LNM_metrics_range.loc["TPR (Recall/Sens)"], label="ESGOwTCGA")

ax.legend(loc="upper right")
ax.set_xlabel("Threshold")
ax.set_ylabel("Recall")
ax.set_xticks([0.05, 0.10, 0.20, 0.30, 0.40, 0.50, 0.60 ,0.70, 0.80, 0.90])
ax.set_xticklabels([0.05, 0.10, 0.20, 0.30, 0.40, 0.50, 0.60 ,0.70, 0.80, 0.90])

# Survival
fig, ax = plt.subplots(1,1, figsize=(15,5))
    
fig.suptitle("Survival Recall per Threshold per Model", fontsize=16)

ax.plot(org_Surv_metrics_range.loc["TPR (Recall/Sens)"], label="Origineel Netwerk")
ax.plot(WOPlat_Surv_metrics_range.loc["TPR (Recall/Sens)"], label="-Plat -TCGA -MRI")
ax.plot(WOPlat_MRI_Surv_metrics_range.loc["TPR (Recall/Sens)"], label="-Plat -TCGA +MRI")
ax.plot(WOPlat_TCGA_Surv_metrics_range.loc["TPR (Recall/Sens)"], label="-Plat +TCGA -MRI")
ax.plot(WOPlat_TCGA_MRI_Surv_metrics_range.loc["TPR (Recall/Sens)"], label="-Plat +TCGA +MRI")
ax.plot(WPlat_Surv_metrics_range.loc["TPR (Recall/Sens)"], label="+Plat -TCGA -MRI")
ax.plot(WPlat_all_Surv_metrics_range.loc["TPR (Recall/Sens)"], label="+Plat +TCGA +MRI")
    
ax.legend(loc="lower left")
ax.set_xlabel("Threshold")
ax.set_ylabel("Recall")
ax.set_xticks([0.05, 0.10, 0.20, 0.30, 0.40, 0.50, 0.60 ,0.70, 0.80, 0.90])
ax.set_xticklabels([0.05, 0.10, 0.20, 0.30, 0.40, 0.50, 0.60 ,0.70, 0.80, 0.90])



Plot the precision vs threshold for the different models

In [None]:
# Precision vs threshold
# LNM
fig, ax = plt.subplots(1,1, figsize=(15,5))

fig.suptitle("LNM Precision vs Threshold per Model", fontsize=16)

ax.plot(org_LNM_metrics_range.loc["Precision (PPV)"], label="Origineel Netwerk")
ax.plot(WOPlat_LNM_metrics_range.loc["Precision (PPV)"], label="-Plat -TCGA -MRI")
ax.plot(WOPlat_MRI_LNM_metrics_range.loc["Precision (PPV)"], label="-Plat -TCGA +MRI")
ax.plot(WOPlat_TCGA_LNM_metrics_range.loc["Precision (PPV)"], label="-Plat +TCGA -MRI")
ax.plot(WOPlat_TCGA_MRI_LNM_metrics_range.loc["Precision (PPV)"], label="-Plat +TCGA +MRI")
ax.plot(WPlat_LNM_metrics_range.loc["Precision (PPV)"], label="+Plat -TCGA -MRI")
ax.plot(WPlat_all_LNM_metrics_range.loc["Precision (PPV)"], label="+Plat +TCGA +MRI")
# Add the clinical categories
ax.plot(ESGOwoTCGA_LNM_metrics_range.loc["Precision (PPV)"], label="ESGOwoTCGA")
ax.plot(ESGOwTCGA_LNM_metrics_range.loc["Precision (PPV)"], label="ESGOwTCGA")

ax.legend(loc="upper left")
ax.set_xlabel("Threshold")
ax.set_ylabel("Precision")
ax.set_xticks([0.05, 0.10, 0.20, 0.30, 0.40, 0.50, 0.60 ,0.70, 0.80, 0.90])
ax.set_xticklabels([0.05, 0.10, 0.20, 0.30, 0.40, 0.50, 0.60 ,0.70, 0.80, 0.90])

# Survival
fig, ax = plt.subplots(1,1, figsize=(15,5))

fig.suptitle("Survival Precision vs Threshold per Model", fontsize=16)

ax.plot(org_Surv_metrics_range.loc["Precision (PPV)"], label="Origineel Netwerk")
ax.plot(WOPlat_Surv_metrics_range.loc["Precision (PPV)"], label="-Plat -TCGA -MRI")
ax.plot(WOPlat_MRI_Surv_metrics_range.loc["Precision (PPV)"], label="-Plat -TCGA +MRI")
ax.plot(WOPlat_TCGA_Surv_metrics_range.loc["Precision (PPV)"], label="-Plat +TCGA -MRI")
ax.plot(WOPlat_TCGA_MRI_Surv_metrics_range.loc["Precision (PPV)"], label="-Plat +TCGA +MRI")
ax.plot(WPlat_Surv_metrics_range.loc["Precision (PPV)"], label="+Plat -TCGA -MRI")
ax.plot(WPlat_all_Surv_metrics_range.loc["Precision (PPV)"], label="+Plat +TCGA +MRI")

ax.legend(loc="upper left")
ax.set_xlabel("Threshold")
ax.set_ylabel("Precision")
ax.set_xticks([0.05, 0.10, 0.20, 0.30, 0.40, 0.50, 0.60 ,0.70, 0.80, 0.90])
ax.set_xticklabels([0.05, 0.10, 0.20, 0.30, 0.40, 0.50, 0.60 ,0.70, 0.80, 0.90])


Create the decision curve analysis for the different models

In [None]:
# Decision curve analysis
from dcurves import dca, plot_graphs
import pandas as pd
import numpy as np
import statsmodels.api as sm
import lifelines

In [None]:
# Concatenate all model results with targets for the DCA
LNM_DCA = pd.concat([target_LNM, org_LNM,
                     WOPlat_LNM, WOPlat_MRI_LNM, WOPlat_TCGA_LNM, WOPlat_TCGA_MRI_LNM, WPlat_LNM, WPlat_all_LNM], axis=1)
LNM_DCA.columns = ["Target", "Origineel Netwerk","-Plat -TCGA -MRI", "-Plat -TCGA +MRI", "-Plat +TCGA -MRI", "-Plat +TCGA +MRI", "+Plat -TCGA -MRI", "+Plat +TCGA +MRI"]


dca_multi_LNM = \
    dca(
        data = LNM_DCA, 
        outcome = "Target", 
        modelnames = ["Origineel Netwerk", "-Plat -TCGA -MRI", "-Plat -TCGA +MRI", "-Plat +TCGA -MRI", "-Plat +TCGA +MRI", "+Plat -TCGA -MRI", "+Plat +TCGA +MRI"],
        thresholds = np.arange(0, 0.30,0.02),
        #harm={"Origineel Netwerk":0.03, "-Plat -TCGA -MRI":0.03, "-Plat -TCGA +MRI":0.03, "-Plat +TCGA -MRI":0.03, "-Plat +TCGA +MRI":0.03, "+Plat -TCGA -MRI":0.03, "+Plat +TCGA +MRI":0.03},
    )

ESGOwoTCGA_concat = pd.concat([ESGOwoTCGA, ESGOwoTCGA_LNMTruth], axis=1)
ESGOwoTCGA_concat.columns = ["ESGOwoTCGA", "Target"]

ESGOwTCGA_concat = pd.concat([ESGOwTCGA, ESGOwTCGA_LNMTruth], axis=1)
ESGOwTCGA_concat.columns = ["ESGOwTCGA", "Target"]

dca_EsgoWOTCGA = \
    dca(
        data = ESGOwoTCGA_concat,
        outcome = "Target",
        modelnames = ["ESGOwoTCGA"],
        thresholds = np.arange(0, 0.30,0.01),
    )

dca_EsgoWTCGA = \
    dca(
        data = ESGOwTCGA_concat,
        outcome = "Target",
        modelnames = ["ESGOwTCGA"],
        thresholds = np.arange(0, 0.30,0.01),
    )
# Select only the models from the last two dcas
dca_EsgoWOTCGA = dca_EsgoWOTCGA[0:30]
dca_EsgoWTCGA = dca_EsgoWTCGA[0:30]
figure = plt.figure(figsize=(10,7))
plt.rcParams.update({'font.size': 10})
plt.rcParams.update({'legend.loc': 'upper right', 'legend.borderaxespad':0})

plot_graphs(
        plot_df = pd.concat([dca_multi_LNM, dca_EsgoWOTCGA, dca_EsgoWTCGA]),
        y_limits=[-0.005, 0.2],
        graph_type="net_benefit",
        color_names= ["#1f77b4", "#ff7f0e", "#2ca02c", "#d62728", "#9467bd", "#8c564b", "#e377c2", "#7f7f7f", "#bcbd22", "#17becf", "#1f77b4", "#ff7f0e", "#2ca02c", "#d62728", "#9467bd", "#8c564b", "#e377c2", "#7f7f7f", "#bcbd22", "#17becf"]
    )


Hypothesis: The ESGO models only predict the patients that are clear cases
Compare the models using only the patients that do not have NaN values in the ESGOwoTCGA and ESGOwTCGA columns.

In [None]:
# Create the dataset 
new_df = df_LNM[df_LNM["ESGOwoTCGA"].notna()]
new_df = new_df[new_df["ESGOwTCGA"].notna()]

new_df

Select the results for these cases

In [None]:
# Create new results for the models
indices = new_df.index
new_org_LNM = org_LNM.loc[indices]
new_WOPlat_LNM = WOPlat_LNM.loc[indices]
new_WOPlat_MRI_LNM = WOPlat_MRI_LNM.loc[indices]
new_WOPlat_TCGA_LNM = WOPlat_TCGA_LNM.loc[indices]
new_WOPlat_TCGA_MRI_LNM = WOPlat_TCGA_MRI_LNM.loc[indices]
new_WPlat_LNM = WPlat_LNM.loc[indices]
new_WPlat_all_LNM = WPlat_all_LNM.loc[indices]
new_ESGOwoTCGA = pd.DataFrame(df_LNM["ESGOwoTCGA_LND"].loc[indices])
new_ESGOwTCGA = pd.DataFrame(df_LNM["ESGOwTCGA_LND"].loc[indices])

new_target_LNM = target_LNM.loc[indices]

# Reset all indices
new_org_LNM = new_org_LNM.reset_index(drop=True)
new_WOPlat_LNM = new_WOPlat_LNM.reset_index(drop=True)
new_WOPlat_MRI_LNM = new_WOPlat_MRI_LNM.reset_index(drop=True)
new_WOPlat_TCGA_LNM = new_WOPlat_TCGA_LNM.reset_index(drop=True)
new_WOPlat_TCGA_MRI_LNM = new_WOPlat_TCGA_MRI_LNM.reset_index(drop=True)
new_WPlat_LNM = new_WPlat_LNM.reset_index(drop=True)
new_WPlat_all_LNM = new_WPlat_all_LNM.reset_index(drop=True)
new_ESGOwoTCGA = new_ESGOwoTCGA.reset_index(drop=True)
new_ESGOwTCGA = new_ESGOwTCGA.reset_index(drop=True)
new_target_LNM = new_target_LNM.reset_index(drop=True)


Get the metrics for these new models

In [None]:
# Get the metrics for the new models
print("Started 1")
new_org_LNM_metrics = getSlimMetrics(new_org_LNM, new_target_LNM)

print("Started 2")
new_WOPlat_LNM_metrics = getSlimMetrics(new_WOPlat_LNM, new_target_LNM)

print("Started 3")
new_WOPlat_MRI_LNM_metrics = getSlimMetrics(new_WOPlat_MRI_LNM, new_target_LNM)

print("Started 4")
new_WOPlat_TCGA_LNM_metrics = getSlimMetrics(new_WOPlat_TCGA_LNM, new_target_LNM)

print("Started 5")
new_WOPlat_TCGA_MRI_LNM_metrics = getSlimMetrics(new_WOPlat_TCGA_MRI_LNM, new_target_LNM)

print("Started 6")
new_WPlat_LNM_metrics = getSlimMetrics(new_WPlat_LNM, new_target_LNM)

print("Started 7")
new_WPlat_all_LNM_metrics = getSlimMetrics(new_WPlat_all_LNM, new_target_LNM)

# Get the metrics for the clinical categories of ESGOwoTCGA and ESGOwTCGA
print("Started 8")
new_ESGOwoTCGA_LNM_metrics = getSlimMetrics(new_ESGOwoTCGA, new_target_LNM)
new_ESGOwTCGA_LNM_metrics = getSlimMetrics(new_ESGOwTCGA, new_target_LNM)

Concatenate the metrics to between the models LNM

In [None]:
# Concatenate the metrics to between the models LNM
new_LNM_metrics = pd.concat([new_ESGOwoTCGA_LNM_metrics, new_ESGOwTCGA_LNM_metrics, new_org_LNM_metrics,  
                         new_WOPlat_LNM_metrics, new_WOPlat_MRI_LNM_metrics, new_WOPlat_TCGA_LNM_metrics, new_WOPlat_TCGA_MRI_LNM_metrics, new_WPlat_LNM_metrics, new_WPlat_all_LNM_metrics], axis=1)

new_LNM_metrics.columns = ["ESGOwoTCGA", "ESGOwTCGA", "Origineel Netwerk",
                          "-Plat -TCGA -MRI", "-Plat -TCGA +MRI", "-Plat +TCGA -MRI", "-Plat +TCGA +MRI", "+Plat -TCGA -MRI", "+Plat +TCGA +MRI"]
new_LNM_metrics


Get the range results for the new models

In [None]:
# Get range results for the new models
new_org_LNM_range = getRangeResults(new_org_LNM)
new_WOPlat_LNM_range = getRangeResults(new_WOPlat_LNM)
new_WOPlat_MRI_LNM_range = getRangeResults(new_WOPlat_MRI_LNM)
new_WOPlat_TCGA_LNM_range = getRangeResults(new_WOPlat_TCGA_LNM)
new_WOPlat_TCGA_MRI_LNM_range = getRangeResults(new_WOPlat_TCGA_MRI_LNM)
new_WPlat_LNM_range = getRangeResults(new_WPlat_LNM)
new_WPlat_all_LNM_range = getRangeResults(new_WPlat_all_LNM)

# Get the ranges for the clinical categories of ESGOwoTCGA and ESGOwTCGA
new_ESGOwoTCGA_range = getRangeResults(new_ESGOwoTCGA)
new_ESGOwTCGA_range = getRangeResults(new_ESGOwTCGA)

# Metric ranges for the new models
new_org_LNM_metrics_range = getMetricsRange(new_org_LNM_range, new_target_LNM)
new_WOPlat_LNM_metrics_range = getMetricsRange(new_WOPlat_LNM_range, new_target_LNM)
new_WOPlat_MRI_LNM_metrics_range = getMetricsRange(new_WOPlat_MRI_LNM_range, new_target_LNM)
new_WOPlat_TCGA_LNM_metrics_range = getMetricsRange(new_WOPlat_TCGA_LNM_range, new_target_LNM)
new_WOPlat_TCGA_MRI_LNM_metrics_range = getMetricsRange(new_WOPlat_TCGA_MRI_LNM_range, new_target_LNM)
new_WPlat_LNM_metrics_range = getMetricsRange(new_WPlat_LNM_range, new_target_LNM)
new_WPlat_all_LNM_metrics_range = getMetricsRange(new_WPlat_all_LNM_range, new_target_LNM)

# Get the metric ranges for the clinical categories of ESGOwoTCGA and ESGOwTCGA
new_ESGOwoTCGA_LNM_metrics_range = getMetricsRange(new_ESGOwoTCGA_range, new_target_LNM)
new_ESGOwTCGA_LNM_metrics_range = getMetricsRange(new_ESGOwTCGA_range, new_target_LNM)


Plot the recall vs precision for the new models

Plot the recall vs threshold for the new models

In [None]:
# Recall vs threshold
# LNM
fig, ax = plt.subplots(1,1, figsize=(15,5))

fig.suptitle("LNM Recall vs Threshold per Model", fontsize=16)

# Recall on the y-axis
ax.plot(new_org_LNM_metrics_range.loc["TPR (Recall/Sens)"], label="Origineel Netwerk")
ax.plot(new_WOPlat_LNM_metrics_range.loc["TPR (Recall/Sens)"], label="-Plat -TCGA -MRI")
ax.plot(new_WOPlat_MRI_LNM_metrics_range.loc["TPR (Recall/Sens)"], label="-Plat -TCGA +MRI")
ax.plot(new_WOPlat_TCGA_LNM_metrics_range.loc["TPR (Recall/Sens)"], label="-Plat +TCGA -MRI")
ax.plot(new_WOPlat_TCGA_MRI_LNM_metrics_range.loc["TPR (Recall/Sens)"], label="-Plat +TCGA +MRI")
ax.plot(new_WPlat_LNM_metrics_range.loc["TPR (Recall/Sens)"], label="+Plat -TCGA -MRI")
ax.plot(new_WPlat_all_LNM_metrics_range.loc["TPR (Recall/Sens)"], label="+Plat +TCGA +MRI")
# Add the clinical categories
ax.plot(new_ESGOwoTCGA_LNM_metrics_range.loc["TPR (Recall/Sens)"], label="ESGOwoTCGA")
ax.plot(new_ESGOwTCGA_LNM_metrics_range.loc["TPR (Recall/Sens)"], label="ESGOwTCGA")

ax.legend(loc="upper right")
ax.set_xlabel("Threshold")
ax.set_ylabel("Recall")


Plot the precision vs threshold for the new models

In [None]:
# Precision vs threshold
# LNM
fig, ax = plt.subplots(1,1, figsize=(15,5))

fig.suptitle("LNM Precision vs Threshold per Model", fontsize=16)

ax.plot(new_org_LNM_metrics_range.loc["Precision (PPV)"], label="Origineel Netwerk")
ax.plot(new_WOPlat_LNM_metrics_range.loc["Precision (PPV)"], label="-Plat -TCGA -MRI")
ax.plot(new_WOPlat_MRI_LNM_metrics_range.loc["Precision (PPV)"], label="-Plat -TCGA +MRI")
ax.plot(new_WOPlat_TCGA_LNM_metrics_range.loc["Precision (PPV)"], label="-Plat +TCGA -MRI")
ax.plot(new_WOPlat_TCGA_MRI_LNM_metrics_range.loc["Precision (PPV)"], label="-Plat +TCGA +MRI")
ax.plot(new_WPlat_LNM_metrics_range.loc["Precision (PPV)"], label="+Plat -TCGA -MRI")
ax.plot(new_WPlat_all_LNM_metrics_range.loc["Precision (PPV)"], label="+Plat +TCGA +MRI")
# Add the clinical categories
ax.plot(new_ESGOwoTCGA_LNM_metrics_range.loc["Precision (PPV)"], label="ESGOwoTCGA")
ax.plot(new_ESGOwTCGA_LNM_metrics_range.loc["Precision (PPV)"], label="ESGOwTCGA")

ax.legend(loc="upper right")
ax.set_xlabel("Threshold")
ax.set_ylabel("Precision")



Create DCA for the new models

In [None]:
# Create DCA for the new models
new_LNM_DCA = pd.concat([new_target_LNM, new_ESGOwTCGA, new_ESGOwoTCGA, new_org_LNM,
                     new_WOPlat_LNM, new_WOPlat_MRI_LNM, new_WOPlat_TCGA_LNM, new_WOPlat_TCGA_MRI_LNM, new_WPlat_LNM, new_WPlat_all_LNM], axis=1)

new_LNM_DCA.columns = ["Target", "ESGOwTCGA", "ESGOwoTCGA", "Origineel Netwerk","-Plat -TCGA -MRI", "-Plat -TCGA +MRI", "-Plat +TCGA -MRI", "-Plat +TCGA +MRI", "+Plat -TCGA -MRI", "+Plat +TCGA +MRI"]

new_LNM_DCA = new_LNM_DCA.reset_index(drop=True)

new_dca_multi_LNM = \
    dca(
        data = new_LNM_DCA, 
        outcome = "Target", 
        modelnames = ["ESGOwTCGA", "ESGOwoTCGA", "Origineel Netwerk", "-Plat -TCGA -MRI", "-Plat -TCGA +MRI", "-Plat +TCGA -MRI", "-Plat +TCGA +MRI", "+Plat -TCGA -MRI", "+Plat +TCGA +MRI"],
        thresholds = np.arange(0, 0.30,0.02),
        #harm={"Origineel Netwerk":0.03, "-Plat -TCGA -MRI":0.03, "-Plat -TCGA +MRI":0.03, "-Plat +TCGA -MRI":0.03, "-Plat +TCGA +MRI":0.03, "+Plat -TCGA -MRI":0.03, "+Plat +TCGA +MRI":0.03},
    )

# Plot the new DCA
figure = plt.figure(figsize=(10,7))
plt.rcParams.update({'font.size': 10})
plt.rcParams.update({'legend.loc': 'upper right', 'legend.borderaxespad':0})

plot_graphs(
        plot_df = new_dca_multi_LNM,
        y_limits=[-0.005, 0.2],
        graph_type="net_benefit",
        color_names= ["#1f77b4", "#ff7f0e", "#2ca02c", "#d62728", "#9467bd", "#8c564b", "#e377c2", "#7f7f7f", "#bcbd22", "#17becf", "#1f77b4", "#ff7f0e", "#2ca02c", "#d62728", "#9467bd", "#8c564b", "#e377c2", "#7f7f7f", "#bcbd22", "#17becf"]
    )