### Author: Ally Sprik
### Last-updated: 25-02-2024

Goal of this notebook is to evaluate the models on a wide variety of imputed data, to compare imputation methods.

In [None]:
import pyAgrum as gum
import pyAgrum.lib.notebook as gnb
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns


origineel_net = gum.loadBN("../../0. Source_files/0.4. Original_Casper_files/Results/Casper_fitted_952.net")
woPlat_woRest = gum.loadBN("../1.3. Model/Fitted_Networks/R_WOP_original_952.net")
WOPlat_MRI = gum.loadBN("../1.3. Model/Fitted_Networks/R_WOP_AddMRIMI_fitted_952.net")
WOPlat_TCGA = gum.loadBN("../1.3. Model/Fitted_Networks/R_WOP_AddTCGA_1_fitted_952.net")
WOPlat_TCGA_MRI = gum.loadBN("../1.3. Model/Fitted_Networks/R_WOP_AddMRIMITCGA_fitted_952.net")
wPlat_woRest = gum.loadBN("../1.3. Model/Fitted_Networks/R_WP_952.net")
WPlat_all = gum.loadBN("../1.3. Model/Fitted_Networks/R_WP_all_952.net")

pd.options.mode.copy_on_write = True  # This will allow the code to run faster and keep Pandas happy. Technical detail: https://pandas.pydata.org/pandas-docs/stable/user_guide/copy_on_write.html#

Select the data to use 

In [None]:
#data = pd.read_csv("../../0. Source_files/0.3. Imputed_data/Combined_P53_imputed_Peter.csv", sep=";")
#data = pd.read_csv("../../0. Source_files/0.3. Imputed_data/TrainingTCGA_EM_imp.csv", sep=",")
#data = pd.read_csv("../../0. Source_files/0.3. Imputed_data/Peter_imputeddata.csv", sep=";")
#data = pd.read_csv("../../0. Source_files/0.3. Imputed_data/Peter-imputeddata-originalset.csv", sep=";")
#data = pd.read_csv("../../0. Source_files/0.4. Original_Casper_files/Results/Casper_imputed.csv")
#data = pd.read_csv("../../0. Source_files/0.3. Imputed_data/MIDAS_Imputed_TCGATraining_filled5yr.csv", sep=";")
#data = pd.read_csv("../../0. Source_files/0.3. Imputed_data/TrainingTCGA_JAMA_imputed.csv")
#casper_data = pd.read_csv("../../0. Source_files/0.4. Original_Casper_files/Results/Casper_imputed.csv")
#data = pd.read_csv("../../0. Source_files/0.3. Imputed_data/MIDAS_Imputed_TCGATraining_JAMA.csv")
data = pd.read_csv("../../0. Source_files/0.3. Imputed_data/Training_JAMA_EM_imputed.csv")

# select the data that has yes in Included_in_training_cohort column
#data = data[data["Included_in_training_cohort"] == "yes"]

data.reset_index(inplace=True, drop=True)
data

Define the targets and change the labels

In [None]:
targets = data[["LNM", "Survival5yr"]].copy()
#targets = data[["LNM", "X5YR"]].copy()
targets.replace({"yes":1, "no":0}, inplace=True)
targets.replace({'negative':0, 'positive':1}, inplace=True)



Define evidence set

In [None]:
evidence = data.copy().drop(columns=["LNM", "Survival5yr"])


Create specific datasets for networks evaluation

In [None]:
# Create specific datasets
data_or = evidence.copy().drop(columns=['POLE', 'MSI', 'Platelets', 'MRI_MI', "FIGO", "Therapy"])
data_or["Recurrence"] = data["Recurrence_location"]
data_noMRI_NoTCGA_noPlat = evidence.copy().drop(columns=['POLE', 'MSI', 'Platelets', 'MRI_MI'])
data_noMRI_NoPlat = evidence.copy().drop(columns=['Platelets', 'MRI_MI'])
data_NoPlat = evidence.copy().drop(columns=['Platelets'])
data_noMRI = evidence.copy().drop(columns=['MRI_MI'])
data_noMRI_NoTCGA = evidence.copy().drop(columns=['POLE', 'MSI', 'MRI_MI'])
data_noTCGA_NoPlat = evidence.copy().drop(columns=['POLE', 'MSI', 'Platelets'])

Define a function to loop through the evidence and get the results

Pseudocode:
- Define getPribabilities function:
    - Loop through the evidence
        - Get the evidence row
        - Get the result for LNM
        - Get the result for Survival
        - Append the results to a list
    - Return the list

In [None]:
def getProbabilities(model,evidence, Surv = "Survival5yr", samples = 100):
    ls_result_LNM = []
    ls_result_Surv = []
    
    #for i in range(1, samples):
    resultsLNM = []
    resultsSurvival = []
    net = gum.LazyPropagation(model)
    net.getNumberOfThreads()
    
    for j in range(len(evidence)):
        evidencerow = evidence.iloc[j]
        evidencerow = evidencerow.dropna().to_dict()
        
        try:
            net.setEvidence(evidencerow)
            
            net.makeInference()

            resultLNM = net.posterior("LNM")
            resultSurvival = net.posterior(Surv)
        except Exception as error:
            print("Error at row", j)
            print(error)
            
            resultLNM = resultsLNM[-1]
            resultSurvival = resultsSurvival[-1]

        #result = gum.getPosterior(model, evs = evidencerow, target = "LNM")
        #resultLNM, resultSurvival = bn.inference.fit(model, variables=["LNM", Surv], evidence=evidencerow, verbose=1)
        resultsLNM.append(resultLNM)
        
        #result = gum.getPosterior(model, evs = evidencerow, target = Surv)
        resultsSurvival.append(resultSurvival)

        #ls_result_LNM.append(resultsLNM)
        #ls_result_Surv.append(resultsSurvival)

    return resultsLNM, resultsSurvival

Get the results the networks

Pseudocode:
- Example for one network
    - Get the node names of the network
    - Remove the targets from the node names
    - Get the evidence for the network by combining the evidence and the node names
    - Get the results for the network
- repeat for all networks

In [None]:
# Get the results for the original network
print("Started 1")
# Select the data to use in the model, through nodenames
names = origineel_net.names()
names.remove("LNM")
names.remove("Survival5yr")
names = list(names)
ev_temp = evidence.copy()[names]
# Define Recurrence_location as Recurrence since this is how it is defined in this model
ev_temp["Recurrence"] = data["Recurrence_location"]

org_LNM_res, org_Surv_res = getProbabilities(origineel_net, ev_temp)

print("Started 2")
# Select the data and get the results
names = woPlat_woRest.names()
names.remove("LNM")
names.remove("Survival5yr")
names = list(names)
ev_temp = evidence.copy()[names]
noPlat_noRest_LNM, noPlat_noRest_Surv = getProbabilities(woPlat_woRest, ev_temp)

print("Started 3")
# Select the data and get the results
names = WOPlat_MRI.names()
names.remove("LNM")
names.remove("Survival5yr")
names = list(names)
ev_temp = evidence.copy()[names]
noPlat_MRI_LNM, noPlat_MRI_Surv = getProbabilities(WOPlat_MRI, ev_temp)

print("Started 4")
# Select the data and get the results
names = WOPlat_TCGA.names()
names.remove("LNM")
names.remove("Survival5yr")
names = list(names)
ev_temp = evidence.copy()[names]
noPlat_TCGA_LNM, noPlat_TCGA_Surv = getProbabilities(WOPlat_TCGA, ev_temp)

print("Started 5")
# Select the data and get the results
names = WOPlat_TCGA_MRI.names()
names.remove("LNM")
names.remove("Survival5yr")
names = list(names)
ev_temp = evidence.copy()[names]
noPlat_TCGA_MRI_LNM, noPlat_TCGA_MRI_Surv = getProbabilities(WOPlat_TCGA_MRI, ev_temp)

print("Started 6")
# Select the data and get the results
names = wPlat_woRest.names()
names.remove("LNM")
names.remove("Survival5yr")
names = list(names)
ev_temp = evidence.copy()[names]
Plat_noRest_LNM, Plat_noRest_Surv = getProbabilities(wPlat_woRest, ev_temp)

print("Started 7")
# Select the data and get the results
names = WPlat_all.names()
names.remove("LNM")
names.remove("Survival5yr")
names = list(names)
ev_temp = evidence.copy()[names]
Plat_all_LNM, Plat_all_Surv = getProbabilities(WPlat_all, ev_temp)


Define the functions to unpack the results

In [None]:
def getProbResults(results, target):
    res = []
    
    for i in range(len(results)):
        try:
            res.append(results[i][target])
        except Exception as error:
            print("Error at row", i)
            print(error)
            res.append(0)
    
    return pd.DataFrame(res)

Unpack the probabilities

In [None]:
# results
org_LNM_res_prob = getProbResults(org_LNM_res, 1)
org_Surv_res_prob = getProbResults(org_Surv_res, 1)

noPlat_noRest_LNM_prob = getProbResults(noPlat_noRest_LNM, 1)
noPlat_noRest_Surv_prob = getProbResults(noPlat_noRest_Surv, 1)

noPlat_MRI_LNM_prob = getProbResults(noPlat_MRI_LNM, 1)
noPlat_MRI_Surv_prob = getProbResults(noPlat_MRI_Surv, 1)

noPlat_TCGA_LNM_prob = getProbResults(noPlat_TCGA_LNM, 1)
noPlat_TCGA_Surv_prob = getProbResults(noPlat_TCGA_Surv, 1)

noPlat_TCGA_MRI_LNM_prob = getProbResults(noPlat_TCGA_MRI_LNM, 1)
noPlat_TCGA_MRI_Surv_prob = getProbResults(noPlat_TCGA_MRI_Surv, 1)

Plat_noRest_LNM_prob = getProbResults(Plat_noRest_LNM, 1)
Plat_noRest_Surv_prob = getProbResults(Plat_noRest_Surv, 1)

Plat_all_LNM_prob = getProbResults(Plat_all_LNM, 1)
Plat_all_Surv_prob = getProbResults(Plat_all_Surv, 1)


Define the metric functions

Pseudocode:
- Define getMetrics function:
    - Get the confusion matrix
    - Get the accuracy
    - Get the ROC AUC
    - Get the precision
    - Get the recall
    - Get the specificity
    - Get the F1 score
    - Get the Brier score
    - Get the log loss
    - Return the metrics
- Define getSlimMetrics function:
    - Get the ROC AUC
    - Get the Log Loss
    - Get the Brier score
    - Get the N Predicted/N Observed
    - Get the Ratio
    - Return the metrics

In [None]:
from sklearn.metrics import roc_curve
# Find the accuracy, roc auc, precision and recall for the results and the targets data
from sklearn.metrics import accuracy_score, roc_auc_score, precision_score, recall_score, confusion_matrix, log_loss, brier_score_loss

def getMetrics(results, targets):
    TP = confusion_matrix(targets, results)[1][1]
    TN = confusion_matrix(targets, results)[0][0]
    FP = confusion_matrix(targets, results)[0][1]
    FN = confusion_matrix(targets, results)[1][0]
    
    accuracy = accuracy_score(targets, results)
    roc_auc = roc_auc_score(targets, results)
    precision = precision_score(targets, results)
    TPR = recall_score(targets, results)
    TNR = TN / (TN + FP)
    
    f1 = 2 * (precision * TPR) / (precision + TPR)
    brier = brier_score_loss(targets, results)
    loglike = log_loss(targets, results)
    
    x = pd.DataFrame([accuracy, roc_auc, precision, TPR,TNR, f1, brier, loglike], index=["Accuracy", "ROC AUC", "Precision (PPV)", "TPR (Recall/Sens)","TNR (Spec)", "F1", "Brier", "Log Loss"])
    return x

def getSlimMetrics(results, targets):
    
    if not len(results) == len(targets):
        raise Exception('Results and targets are not the same length. Results:', len(results), ' Targets:', len(targets))
    
    # Get ROC AUC, Log Loss, Brier, and N Predicted/N Observed
    curve = roc_curve(targets, results, pos_label=1)
    ROC = round(roc_auc_score(targets, results), 4)
    LL = round(log_loss(targets, results), 4)
    Brier = round(brier_score_loss(targets, results), 4)
    N_pred = results.sum()[0]
    N_obs = int(targets.sum())
    N_br = f"{int(N_pred)}/{int(N_obs)}"
    Ratio = round(N_pred/N_obs, 4)
    
    x = pd.DataFrame([ROC, LL, Brier, N_br, Ratio], index=["ROC AUC", "Log Loss", "Brier", "N Predicted/N Observed", "Ratio"])
    return x


In [None]:
# Get slim metrics
# Get the metrics for the original network
targetSurv = "Survival5yr"

print("Started 1")
org_LNM_metrics = getSlimMetrics(org_LNM_res_prob, targets["LNM"])
org_Surv_metrics = getSlimMetrics(org_Surv_res_prob, targets[targetSurv])

print("Started 2")
woPlat_noRest_LNM_metrics = getSlimMetrics(noPlat_noRest_LNM_prob, targets["LNM"])
woPlat_noRest_Surv_metrics = getSlimMetrics(noPlat_noRest_Surv_prob, targets[targetSurv])

print("Started 3")
woPlat_MRI_LNM_metrics = getSlimMetrics(noPlat_MRI_LNM_prob, targets["LNM"])
woPlat_MRI_Surv_metrics = getSlimMetrics(noPlat_MRI_Surv_prob, targets[targetSurv])

print("Started 4")
woPlat_TCGA_LNM_metrics = getSlimMetrics(noPlat_TCGA_LNM_prob, targets["LNM"])
woPlat_TCGA_Surv_metrics = getSlimMetrics(noPlat_TCGA_Surv_prob, targets[targetSurv])

print("Started 5")
woPlat_TCGA_MRI_LNM_metrics = getSlimMetrics(noPlat_TCGA_MRI_LNM_prob, targets["LNM"])
woPlat_TCGA_MRI_Surv_metrics = getSlimMetrics(noPlat_TCGA_MRI_Surv_prob, targets[targetSurv])

print("Started 6")
Plat_noRest_LNM_metrics = getSlimMetrics(Plat_noRest_LNM_prob, targets["LNM"])
Plat_noRest_Surv_metrics = getSlimMetrics(Plat_noRest_Surv_prob, targets[targetSurv])

print("Started 7")
Plat_all_LNM_metrics = getSlimMetrics(Plat_all_LNM_prob, targets["LNM"])
Plat_all_Surv_metrics = getSlimMetrics(Plat_all_Surv_prob, targets[targetSurv])


Concatenate the metrics, to create a table with all the metrics, for comparison of LNM

In [None]:
LNM_metrics = pd.concat([org_LNM_metrics, 
                         woPlat_noRest_LNM_metrics, woPlat_MRI_LNM_metrics, woPlat_TCGA_LNM_metrics, woPlat_TCGA_MRI_LNM_metrics, Plat_noRest_LNM_metrics, Plat_all_LNM_metrics], axis=1)

LNM_metrics.columns = ["Origineel Netwerk",
                       "-Plat -TCGA -MRI", "-Plat -TCGA +MRI", "-Plat +TCGA -MRI", "-Plat +TCGA +MRI", "+Plat -TCGA -MRI", "+Plat +TCGA +MRI"]
LNM_metrics


Concatenate the metrics, to create a table with all the metrics, for comparison of Survival

In [None]:
# Concatenate the metrics to between the models Survival
Surv_metrics = pd.concat([org_Surv_metrics,
                          woPlat_noRest_Surv_metrics, woPlat_MRI_Surv_metrics, woPlat_TCGA_Surv_metrics, woPlat_TCGA_MRI_Surv_metrics, Plat_noRest_Surv_metrics, Plat_all_Surv_metrics], axis=1)
Surv_metrics = Surv_metrics.round(3)
Surv_metrics.columns = ["Origineel Netwerk", 
                        "-Plat -TCGA -MRI", "-Plat -TCGA +MRI", "-Plat +TCGA -MRI", "-Plat +TCGA +MRI", "+Plat -TCGA -MRI", "+Plat +TCGA +MRI"]
Surv_metrics

Plot the ROC curves for the LNM and Survival

In [None]:
from matplotlib_inline.backend_inline import set_matplotlib_formats
import matplotlib.pyplot as plt
import seaborn as sns

# Set the style
sns.set_style("whitegrid")
# Set the output as png
set_matplotlib_formats('png', 'pdf')
plt.rcParams['figure.dpi'] = 150

# Recall vs Precision
# LNM
# plot all the ROC curves in one plot (LNM and Survival seperate)
fig, ax = plt.subplots(1,2, figsize=(15,5))

fig.suptitle("ROC Curves", fontsize=16)

# LNM
ax[0].plot([0, 1], [0, 1], linestyle='--', lw=2, color='r', alpha=.8)

fpr, tpr, _ = roc_curve(targets["LNM"], org_LNM_res_prob, pos_label=1)
ax[0].plot(fpr, tpr, label='Origineel Netwerk (area = %0.2f)' % roc_auc_score(targets["LNM"], org_LNM_res_prob))

fpr, tpr, _ = roc_curve(targets["LNM"], noPlat_noRest_LNM_prob, pos_label=1)
ax[0].plot(fpr, tpr, label='-Plat -TCGA -MRI (area = %0.2f)' % roc_auc_score(targets["LNM"], noPlat_noRest_LNM_prob))

fpr, tpr, _ = roc_curve(targets["LNM"], noPlat_MRI_LNM_prob, pos_label=1)
ax[0].plot(fpr, tpr, label='-Plat -TCGA +MRI (area = %0.2f)' % roc_auc_score(targets["LNM"], noPlat_MRI_LNM_prob))

fpr, tpr, _ = roc_curve(targets["LNM"], noPlat_TCGA_LNM_prob, pos_label=1)
ax[0].plot(fpr, tpr, label='-Plat +TCGA -MRI (area = %0.2f)' % roc_auc_score(targets["LNM"], noPlat_TCGA_LNM_prob))

fpr, tpr, _ = roc_curve(targets["LNM"], noPlat_TCGA_MRI_LNM_prob, pos_label=1)
ax[0].plot(fpr, tpr, label='-Plat +TCGA +MRI (area = %0.2f)' % roc_auc_score(targets["LNM"], noPlat_TCGA_MRI_LNM_prob))

fpr, tpr, _ = roc_curve(targets["LNM"], Plat_noRest_LNM_prob, pos_label=1)
ax[0].plot(fpr, tpr, label='+Plat -TCGA -MRI (area = %0.2f)' % roc_auc_score(targets["LNM"], Plat_noRest_LNM_prob))

fpr, tpr, _ = roc_curve(targets["LNM"], Plat_all_LNM_prob, pos_label=1)
ax[0].plot(fpr, tpr, label='+Plat +TCGA +MRI (area = %0.2f)' % roc_auc_score(targets["LNM"], Plat_all_LNM_prob))

ax[0].title.set_text("LNM")
ax[0].legend(loc="lower right")
# Print AUC in the plot
    
# Survival
ax[1].plot([0, 1], [0, 1], linestyle='--', lw=2, color='r', alpha=.8)

fpr, tpr, _ = roc_curve(targets[targetSurv], org_Surv_res_prob, pos_label=1)
ax[1].plot(fpr, tpr, label='Origineel Netwerk (area = %0.2f)' % roc_auc_score(targets[targetSurv], org_Surv_res_prob))

fpr, tpr, _ = roc_curve(targets[targetSurv], noPlat_noRest_Surv_prob, pos_label=1)
ax[1].plot(fpr, tpr, label='-Plat -TCGA -MRI (area = %0.2f)' % roc_auc_score(targets[targetSurv], noPlat_noRest_Surv_prob))

fpr, tpr, _ = roc_curve(targets[targetSurv], noPlat_MRI_Surv_prob, pos_label=1)
ax[1].plot(fpr, tpr, label='-Plat -TCGA +MRI (area = %0.2f)' % roc_auc_score(targets[targetSurv], noPlat_MRI_Surv_prob))

fpr, tpr, _ = roc_curve(targets[targetSurv], noPlat_TCGA_Surv_prob, pos_label=1)
ax[1].plot(fpr, tpr, label='-Plat +TCGA -MRI (area = %0.2f)' % roc_auc_score(targets[targetSurv], noPlat_TCGA_Surv_prob))

fpr, tpr, _ = roc_curve(targets[targetSurv], noPlat_TCGA_MRI_Surv_prob, pos_label=1)
ax[1].plot(fpr, tpr, label='-Plat +TCGA +MRI (area = %0.2f)' % roc_auc_score(targets[targetSurv], noPlat_TCGA_MRI_Surv_prob))

fpr, tpr, _ = roc_curve(targets[targetSurv], Plat_noRest_Surv_prob, pos_label=1)
ax[1].plot(fpr, tpr, label='+Plat -TCGA -MRI (area = %0.2f)' % roc_auc_score(targets[targetSurv], Plat_noRest_Surv_prob))

fpr, tpr, _ = roc_curve(targets[targetSurv], Plat_all_Surv_prob, pos_label=1)
ax[1].plot(fpr, tpr, label='+Plat +TCGA +MRI (area = %0.2f)' % roc_auc_score(targets[targetSurv], Plat_all_Surv_prob))

ax[1].title.set_text("Survival")
ax[1].legend(loc="lower right")

# x and y labels
for i in range(2):
    ax[i].set_xlabel('False Positive Rate')
    ax[i].set_ylabel('True Positive Rate')

Define a function to get the results based on a range of thresholds for the probability

Pseudocode:
- Define getRangeResults function:
    - Define thresholds
    - Create empty list
    - Loop through the probabilities
        - Loop through the thresholds
            - If the probability is higher than the threshold, append 1 to the list
            - Else, append 0 to the list
    - Return the list

In [None]:
def getRangeResults(probResults):
    # Define thresholds
    thresholds = [0.05, 0.10, 0.20, 0.30, 0.40, 0.50, 0.60 ,0.70, 0.80, 0.90]
    # Create empty list
    rows = len(probResults)

    cols = len(thresholds)
    
    res = [[0 for i in range(cols)] for j in range(rows)]
    
    for i in range(len(probResults)):
        targetcol = 0
        
        for col in probResults.columns:
            if isinstance(col, str) and col.__contains__("ESGO"):
                targetcol = col
            
        for j in range(len(thresholds)):
            if probResults[targetcol][i] >= thresholds[j]:
                res[i][j] = 1
            else:
                res[i][j] = 0
                
    df = pd.DataFrame(res, columns=thresholds)
    
    return df


    

Get the range of thresholds for the probabilities

In [None]:
org_LNM_res_range = getRangeResults(org_LNM_res_prob)
org_Surv_res_range = getRangeResults(org_Surv_res_prob)

noPlat_noRest_LNM_range = getRangeResults(noPlat_noRest_LNM_prob)
noPlat_noRest_Surv_range = getRangeResults(noPlat_noRest_Surv_prob)

noPlat_MRI_LNM_range = getRangeResults(noPlat_MRI_LNM_prob)
noPlat_MRI_Surv_range = getRangeResults(noPlat_MRI_Surv_prob)

noPlat_TCGA_LNM_range = getRangeResults(noPlat_TCGA_LNM_prob)
noPlat_TCGA_Surv_range = getRangeResults(noPlat_TCGA_Surv_prob)

noPlat_TCGA_MRI_LNM_range = getRangeResults(noPlat_TCGA_MRI_LNM_prob)
noPlat_TCGA_MRI_Surv_range = getRangeResults(noPlat_TCGA_MRI_Surv_prob)

Plat_noRest_LNM_range = getRangeResults(Plat_noRest_LNM_prob)
Plat_noRest_Surv_range = getRangeResults(Plat_noRest_Surv_prob)

Plat_all_LNM_range = getRangeResults(Plat_all_LNM_prob)
Plat_all_Surv_range = getRangeResults(Plat_all_Surv_prob)

Define a function to make a 3d matrix of metrics for each threshold

Pseudocode:
- Define getMetricsRange function:
    - Define thresholds
    - Create empty list
    - Metricnames
    - Loop through the thresholds
        - Get the metrics for the results
        - Append the metrics to the list
    - Concatenate the list
    - Return the list

In [None]:
# Create a function to make a 3d matrix of metrics for each threshold
def getMetricsRange(results, targets):
    # Define thresholds
    thresholds = [0.05, 0.10, 0.20, 0.30, 0.40, 0.50, 0.60 ,0.70, 0.80, 0.90]
    # Create empty list
    metrics = []
    # Metricnames
    metricnames = ["Accuracy", "Precision (PPV)", "TPR (Recall/Sens)","TNR (Spec)", "F1", "Brier", "Log Loss"]
    
    for threshold in thresholds:
        res = results[threshold].values
        
        metrics.append(round(getMetrics(res, targets).loc[metricnames],3))
    
    metrics = pd.concat(metrics, axis=1)
    metrics.columns = thresholds
    return metrics

Get the metrics for each of the cutoff points

In [None]:
# Get the metrics for each of the cutoff points
# Get the metrics for the original network
org_LNM_metrics_range = getMetricsRange(org_LNM_res_range, targets["LNM"])
org_Surv_metrics_range = getMetricsRange(org_Surv_res_range, targets[targetSurv])

noPlat_noRest_LNM_metrics_range = getMetricsRange(noPlat_noRest_LNM_range, targets["LNM"])
noPlat_noRest_Surv_metrics_range = getMetricsRange(noPlat_noRest_Surv_range, targets[targetSurv])

noPlat_MRI_LNM_metrics_range = getMetricsRange(noPlat_MRI_LNM_range, targets["LNM"])
noPlat_MRI_Surv_metrics_range = getMetricsRange(noPlat_MRI_Surv_range, targets[targetSurv])

noPlat_TCGA_LNM_metrics_range = getMetricsRange(noPlat_TCGA_LNM_range, targets["LNM"])
noPlat_TCGA_Surv_metrics_range = getMetricsRange(noPlat_TCGA_Surv_range, targets[targetSurv])

noPlat_TCGA_MRI_LNM_metrics_range = getMetricsRange(noPlat_TCGA_MRI_LNM_range, targets["LNM"])
noPlat_TCGA_MRI_Surv_metrics_range = getMetricsRange(noPlat_TCGA_MRI_Surv_range, targets[targetSurv])

Plat_noRest_LNM_metrics_range = getMetricsRange(Plat_noRest_LNM_range, targets["LNM"])
Plat_noRest_Surv_metrics_range = getMetricsRange(Plat_noRest_Surv_range, targets[targetSurv])

Plat_all_LNM_metrics_range = getMetricsRange(Plat_all_LNM_range, targets["LNM"])
Plat_all_Surv_metrics_range = getMetricsRange(Plat_all_Surv_range, targets[targetSurv])

Plot the recall vs precision for the LNM and Survival

In [None]:
# Recall vs Precision
# LNM
fig, ax = plt.subplots(1,1, figsize=(15,5))

fig.suptitle("LNM Recall vs Precision per Model", fontsize=16)

ax.plot(org_LNM_metrics_range.loc["TPR (Recall/Sens)"], org_LNM_metrics_range.loc["Precision (PPV)"], label="Origineel Netwerk")

ax.plot(noPlat_noRest_LNM_metrics_range.loc["TPR (Recall/Sens)"], noPlat_noRest_LNM_metrics_range.loc["Precision (PPV)"], label="-Plat -TCGA -MRI")
ax.plot(noPlat_MRI_LNM_metrics_range.loc["TPR (Recall/Sens)"], noPlat_MRI_LNM_metrics_range.loc["Precision (PPV)"], label="-Plat -TCGA +MRI")
ax.plot(noPlat_TCGA_LNM_metrics_range.loc["TPR (Recall/Sens)"], noPlat_TCGA_LNM_metrics_range.loc["Precision (PPV)"], label="-Plat +TCGA -MRI")
ax.plot(noPlat_TCGA_MRI_LNM_metrics_range.loc["TPR (Recall/Sens)"], noPlat_TCGA_MRI_LNM_metrics_range.loc["Precision (PPV)"], label="-Plat +TCGA +MRI")
ax.plot(Plat_noRest_LNM_metrics_range.loc["TPR (Recall/Sens)"], Plat_noRest_LNM_metrics_range.loc["Precision (PPV)"], label="+Plat -TCGA -MRI")
ax.plot(Plat_all_LNM_metrics_range.loc["TPR (Recall/Sens)"], Plat_all_LNM_metrics_range.loc["Precision (PPV)"], label="+Plat +TCGA +MRI")

ax.legend(loc="upper right")
ax.set_xlabel("Recall")
ax.set_ylabel("Precision")

# Survival
fig, ax = plt.subplots(1,1, figsize=(15,5))

fig.suptitle("Survival Recall vs Precision per Model", fontsize=16)
    
ax.plot(org_Surv_metrics_range.loc["TPR (Recall/Sens)"], org_Surv_metrics_range.loc["Precision (PPV)"], label="Origineel Netwerk")
#ax.plot(inter_Surv_metrics_range.loc["TPR (Recall/Sens)"], inter_Surv_metrics_range.loc["Precision (PPV)"], label="Endomcancer3")
ax.plot(noPlat_noRest_Surv_metrics_range.loc["TPR (Recall/Sens)"], noPlat_noRest_Surv_metrics_range.loc["Precision (PPV)"], label="-Plat -TCGA -MRI")
ax.plot(noPlat_MRI_Surv_metrics_range.loc["TPR (Recall/Sens)"], noPlat_MRI_Surv_metrics_range.loc["Precision (PPV)"], label="-Plat -TCGA +MRI")
ax.plot(noPlat_TCGA_Surv_metrics_range.loc["TPR (Recall/Sens)"], noPlat_TCGA_Surv_metrics_range.loc["Precision (PPV)"], label="-Plat +TCGA -MRI")
ax.plot(noPlat_TCGA_MRI_Surv_metrics_range.loc["TPR (Recall/Sens)"], noPlat_TCGA_MRI_Surv_metrics_range.loc["Precision (PPV)"], label="-Plat +TCGA +MRI")
ax.plot(Plat_noRest_Surv_metrics_range.loc["TPR (Recall/Sens)"], Plat_noRest_Surv_metrics_range.loc["Precision (PPV)"], label="+Plat -TCGA -MRI")
ax.plot(Plat_all_Surv_metrics_range.loc["TPR (Recall/Sens)"], Plat_all_Surv_metrics_range.loc["Precision (PPV)"], label="+Plat +TCGA +MRI")

ax.legend(loc="lower left")
ax.set_xlabel("Recall")
ax.set_ylabel("Precision")



Plot the recall vs threshold for the LNM and Survival

In [None]:
# Recall
# LNM
fig, ax = plt.subplots(1,1, figsize=(15,5))

fig.suptitle("LNM Recall per Threshold per Model", fontsize=16)

ax.plot(org_LNM_metrics_range.loc["TPR (Recall/Sens)"], label="Origineel Netwerk")
#ax.plot(inter_LNM_metrics_range.loc["TPR (Recall/Sens)"], label="Endomcancer3")
ax.plot(noPlat_noRest_LNM_metrics_range.loc["TPR (Recall/Sens)"], label="-Plat -TCGA -MRI")
ax.plot(noPlat_MRI_LNM_metrics_range.loc["TPR (Recall/Sens)"], label="-Plat -TCGA +MRI")
ax.plot(noPlat_TCGA_LNM_metrics_range.loc["TPR (Recall/Sens)"], label="-Plat +TCGA -MRI")
ax.plot(noPlat_TCGA_MRI_LNM_metrics_range.loc["TPR (Recall/Sens)"], label="-Plat +TCGA +MRI")
ax.plot(Plat_noRest_LNM_metrics_range.loc["TPR (Recall/Sens)"], label="+Plat -TCGA -MRI")
ax.plot(Plat_all_LNM_metrics_range.loc["TPR (Recall/Sens)"], label="+Plat +TCGA +MRI")

ax.legend(loc="upper right")
ax.set_xlabel("Threshold")
ax.set_ylabel("Recall")
ax.set_xticks([0.05, 0.10, 0.20, 0.30, 0.40, 0.50, 0.60 ,0.70, 0.80, 0.90])
ax.set_xticklabels([0.05, 0.10, 0.20, 0.30, 0.40, 0.50, 0.60 ,0.70, 0.80, 0.90])

# Survival
fig, ax = plt.subplots(1,1, figsize=(15,5))

fig.suptitle("Survival Recall per Threshold per Model", fontsize=16)

ax.plot(org_Surv_metrics_range.loc["TPR (Recall/Sens)"], label="Origineel Netwerk")
#ax.plot(inter_Surv_metrics_range.loc["TPR (Recall/Sens)"], label="Endomcancer3")
ax.plot(noPlat_noRest_Surv_metrics_range.loc["TPR (Recall/Sens)"], label="-Plat -TCGA -MRI")
ax.plot(noPlat_MRI_Surv_metrics_range.loc["TPR (Recall/Sens)"], label="-Plat -TCGA +MRI")
ax.plot(noPlat_TCGA_Surv_metrics_range.loc["TPR (Recall/Sens)"], label="-Plat +TCGA -MRI")
ax.plot(noPlat_TCGA_MRI_Surv_metrics_range.loc["TPR (Recall/Sens)"], label="-Plat +TCGA +MRI")
ax.plot(Plat_noRest_Surv_metrics_range.loc["TPR (Recall/Sens)"], label="+Plat -TCGA -MRI")
ax.plot(Plat_all_Surv_metrics_range.loc["TPR (Recall/Sens)"], label="+Plat +TCGA +MRI")

ax.legend(loc="upper right")
ax.set_xlabel("Threshold")
ax.set_ylabel("Recall")
ax.set_xticks([0.05, 0.10, 0.20, 0.30, 0.40, 0.50, 0.60 ,0.70, 0.80, 0.90])
ax.set_xticklabels([0.05, 0.10, 0.20, 0.30, 0.40, 0.50, 0.60 ,0.70, 0.80, 0.90])


Plot the precision vs threshold for the LNM and Survival

In [None]:
# Precision
# LNM
fig, ax = plt.subplots(1,1, figsize=(15,5))

fig.suptitle("LNM Precision per Threshold per Model", fontsize=16)

ax.plot(org_LNM_metrics_range.loc["Precision (PPV)"], label="Origineel Netwerk")
#ax.plot(inter_LNM_metrics_range.loc["Precision (PPV)"], label="Endomcancer3")
ax.plot(noPlat_noRest_LNM_metrics_range.loc["Precision (PPV)"], label="-Plat -TCGA -MRI")
ax.plot(noPlat_MRI_LNM_metrics_range.loc["Precision (PPV)"], label="-Plat -TCGA +MRI")
ax.plot(noPlat_TCGA_LNM_metrics_range.loc["Precision (PPV)"], label="-Plat +TCGA -MRI")
ax.plot(noPlat_TCGA_MRI_LNM_metrics_range.loc["Precision (PPV)"], label="-Plat +TCGA +MRI")
ax.plot(Plat_noRest_LNM_metrics_range.loc["Precision (PPV)"], label="+Plat -TCGA -MRI")
ax.plot(Plat_all_LNM_metrics_range.loc["Precision (PPV)"], label="+Plat +TCGA +MRI")

ax.legend(loc="lower right")
ax.set_xlabel("Threshold")
ax.set_ylabel("Precision")
ax.set_xticks([0.05, 0.10, 0.20, 0.30, 0.40, 0.50, 0.60 ,0.70, 0.80, 0.90])
ax.set_xticklabels([0.05, 0.10, 0.20, 0.30, 0.40, 0.50, 0.60 ,0.70, 0.80, 0.90])

# Survival
fig, ax = plt.subplots(1,1, figsize=(15,5))
    
fig.suptitle("Survival Precision per Threshold per Model", fontsize=16)

ax.plot(org_Surv_metrics_range.loc["Precision (PPV)"], label="Origineel Netwerk")
#ax.plot(inter_Surv_metrics_range.loc["Precision (PPV)"], label="Endomcancer3")
ax.plot(noPlat_noRest_Surv_metrics_range.loc["Precision (PPV)"], label="-Plat -TCGA -MRI")
ax.plot(noPlat_MRI_Surv_metrics_range.loc["Precision (PPV)"], label="-Plat -TCGA +MRI")
ax.plot(noPlat_TCGA_Surv_metrics_range.loc["Precision (PPV)"], label="-Plat +TCGA -MRI")
ax.plot(noPlat_TCGA_MRI_Surv_metrics_range.loc["Precision (PPV)"], label="-Plat +TCGA +MRI")
ax.plot(Plat_noRest_Surv_metrics_range.loc["Precision (PPV)"], label="+Plat -TCGA -MRI")
    
ax.legend(loc="lower right")
ax.set_xlabel("Threshold")
ax.set_ylabel("Precision")
ax.set_xticks([0.05, 0.10, 0.20, 0.30, 0.40, 0.50, 0.60 ,0.70, 0.80, 0.90])
ax.set_xticklabels([0.05, 0.10, 0.20, 0.30, 0.40, 0.50, 0.60 ,0.70, 0.80, 0.90])



Plot the F1 vs threshold for the LNM and Survival

In [None]:
# F1
# LNM
fig, ax = plt.subplots(1,1, figsize=(15,5))

fig.suptitle("LNM F1 per Threshold per Model", fontsize=16)

ax.plot(org_LNM_metrics_range.loc["F1"], label="Origineel Netwerk")
#ax.plot(inter_LNM_metrics_range.loc["F1"], label="Endomcancer3")
ax.plot(noPlat_noRest_LNM_metrics_range.loc["F1"], label="-Plat -TCGA -MRI")
ax.plot(noPlat_MRI_LNM_metrics_range.loc["F1"], label="-Plat -TCGA +MRI")
ax.plot(noPlat_TCGA_LNM_metrics_range.loc["F1"], label="-Plat +TCGA -MRI")
ax.plot(noPlat_TCGA_MRI_LNM_metrics_range.loc["F1"], label="-Plat +TCGA +MRI")
ax.plot(Plat_noRest_LNM_metrics_range.loc["F1"], label="+Plat -TCGA -MRI")
ax.plot(Plat_all_LNM_metrics_range.loc["F1"], label="+Plat +TCGA +MRI")

ax.legend(loc="upper left")
ax.set_xlabel("Threshold")
ax.set_ylabel("F1")

# Survival
fig, ax = plt.subplots(1,1, figsize=(15,5))

fig.suptitle("Survival F1 per Threshold per Model", fontsize=16)

ax.plot(org_Surv_metrics_range.loc["F1"], label="Origineel Netwerk")
#ax.plot(inter_Surv_metrics_range.loc["F1"], label="Endomcancer3")
ax.plot(noPlat_noRest_Surv_metrics_range.loc["F1"], label="-Plat -TCGA -MRI")
ax.plot(noPlat_MRI_Surv_metrics_range.loc["F1"], label="-Plat -TCGA +MRI")
ax.plot(noPlat_TCGA_Surv_metrics_range.loc["F1"], label="-Plat +TCGA -MRI")
ax.plot(noPlat_TCGA_MRI_Surv_metrics_range.loc["F1"], label="-Plat +TCGA +MRI")
ax.plot(Plat_noRest_Surv_metrics_range.loc["F1"], label="+Plat -TCGA -MRI")
ax.plot(Plat_all_Surv_metrics_range.loc["F1"], label="+Plat +TCGA +MRI")

ax.legend(loc="upper left")
ax.set_xlabel("Threshold")
ax.set_ylabel("F1")


Plot the Brier vs threshold for the LNM and Survival

In [None]:
# Brier
# LNM
fig, ax = plt.subplots(1,1, figsize=(15,5))

fig.suptitle("LNM Brier per Threshold per Model", fontsize=16)

ax.plot(org_LNM_metrics_range.loc["Brier"], label="Origineel Netwerk")
#ax.plot(inter_LNM_metrics_range.loc["Brier"], label="Endomcancer3")
ax.plot(noPlat_noRest_LNM_metrics_range.loc["Brier"], label="-Plat -TCGA -MRI")
ax.plot(noPlat_MRI_LNM_metrics_range.loc["Brier"], label="-Plat -TCGA +MRI")
ax.plot(noPlat_TCGA_LNM_metrics_range.loc["Brier"], label="-Plat +TCGA -MRI")
ax.plot(noPlat_TCGA_MRI_LNM_metrics_range.loc["Brier"], label="-Plat +TCGA +MRI")
ax.plot(Plat_noRest_LNM_metrics_range.loc["Brier"], label="+Plat -TCGA -MRI")
ax.plot(Plat_all_LNM_metrics_range.loc["Brier"], label="+Plat +TCGA +MRI")

ax.legend(loc="upper right")
ax.set_xlabel("Threshold")
ax.set_ylabel("Brier")

# Survival
fig, ax = plt.subplots(1,1, figsize=(15,5))
    
fig.suptitle("Survival Brier per Threshold per Model", fontsize=16)

ax.plot(org_Surv_metrics_range.loc["Brier"], label="Origineel Netwerk")
#ax.plot(inter_Surv_metrics_range.loc["Brier"], label="Endomcancer3")
ax.plot(noPlat_noRest_Surv_metrics_range.loc["Brier"], label="-Plat -TCGA -MRI")
ax.plot(noPlat_MRI_Surv_metrics_range.loc["Brier"], label="-Plat -TCGA +MRI")
ax.plot(noPlat_TCGA_Surv_metrics_range.loc["Brier"], label="-Plat +TCGA -MRI")
ax.plot(noPlat_TCGA_MRI_Surv_metrics_range.loc["Brier"], label="-Plat +TCGA +MRI")
ax.plot(Plat_noRest_Surv_metrics_range.loc["Brier"], label="+Plat -TCGA -MRI")
ax.plot(Plat_all_Surv_metrics_range.loc["Brier"], label="+Plat +TCGA +MRI")

ax.legend(loc="lower right")
ax.set_xlabel("Threshold")
ax.set_ylabel("Brier")



Plot the Log Loss vs threshold for the LNM and Survival

In [None]:
# Log Loss
# LNM
fig, ax = plt.subplots(1,1, figsize=(15,5))

fig.suptitle("LNM Log Loss per Threshold per Model", fontsize=16)
    
ax.plot(org_LNM_metrics_range.loc["Log Loss"], label="Origineel Netwerk")
#ax.plot(inter_LNM_metrics_range.loc["Log Loss"], label="Endomcancer3")
ax.plot(noPlat_noRest_LNM_metrics_range.loc["Log Loss"], label="-Plat -TCGA -MRI")
ax.plot(noPlat_MRI_LNM_metrics_range.loc["Log Loss"], label="-Plat -TCGA +MRI")
ax.plot(noPlat_TCGA_LNM_metrics_range.loc["Log Loss"], label="-Plat +TCGA -MRI")
ax.plot(noPlat_TCGA_MRI_LNM_metrics_range.loc["Log Loss"], label="-Plat +TCGA +MRI")
ax.plot(Plat_noRest_LNM_metrics_range.loc["Log Loss"], label="+Plat -TCGA -MRI")
ax.plot(Plat_all_LNM_metrics_range.loc["Log Loss"], label="+Plat +TCGA +MRI")

ax.legend(loc="upper right")
ax.set_xlabel("Threshold")
ax.set_ylabel("Log Loss")

# Survival
fig, ax = plt.subplots(1,1, figsize=(15,5))

fig.suptitle("Survival Log Loss per Threshold per Model", fontsize=16)

ax.plot(org_Surv_metrics_range.loc["Log Loss"], label="Origineel Netwerk")
#ax.plot(inter_Surv_metrics_range.loc["Log Loss"], label="Endomcancer3")
ax.plot(noPlat_noRest_Surv_metrics_range.loc["Log Loss"], label="-Plat -TCGA -MRI")
ax.plot(noPlat_MRI_Surv_metrics_range.loc["Log Loss"], label="-Plat -TCGA +MRI")
ax.plot(noPlat_TCGA_Surv_metrics_range.loc["Log Loss"], label="-Plat +TCGA -MRI")
ax.plot(noPlat_TCGA_MRI_Surv_metrics_range.loc["Log Loss"], label="-Plat +TCGA +MRI")
ax.plot(Plat_noRest_Surv_metrics_range.loc["Log Loss"], label="+Plat -TCGA -MRI")
ax.plot(Plat_all_Surv_metrics_range.loc["Log Loss"], label="+Plat +TCGA +MRI")

ax.legend(loc="lower right")
ax.set_xlabel("Threshold")
ax.set_ylabel("Log Loss")
