### Author: Ally Sprik
### Last-updated: 25-02-2024

Goal of this notebook is to compare the MAYO validation dataset with the different models that have been created.


In [None]:
import pyAgrum as gum
import pyAgrum.lib.notebook as gnb
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

origineel_net = gum.loadBN("../../0. Source_files/0.4. Original_Casper_files/Results/Casper_fitted_952.net")
Peter_net_woPlat = gum.loadBN("../1.3. Model/Fitted_Networks/R_WOP_original_952.net")
WOPlat_MRI = gum.loadBN("../1.3. Model/Fitted_Networks/R_WOP_AddMRIMI_fitted_952.net")
WOPlat_TCGA = gum.loadBN("../1.3. Model/Fitted_Networks/R_WOP_AddTCGA_1_fitted_952.net")
WOPlat_TCGA_MRI = gum.loadBN("../1.3. Model/Fitted_Networks/R_WOP_AddMRIMITCGA_fitted_952.net")
Peter_net_wPlat = gum.loadBN("../1.3. Model/Fitted_Networks/R_WP_952.net")
WPlat_all = gum.loadBN("../1.3. Model/Fitted_Networks/R_WP_all_952.net")

pd.options.mode.copy_on_write = True  # This will allow the code to run faster and keep Pandas happy. Technical detail: https://pandas.pydata.org/pandas-docs/stable/user_guide/copy_on_write.html#


Load the data and fix some labels

In [None]:
data = pd.read_csv("../../0. Source_files/0.2. Cleaned_data/MAYO_cleaned_model.csv", sep=",")
data = data.replace({1:"yes", 0:"no", "<50%":"lt_50", ">50%":"ge_50", "No invasion":"lt_50"})

Define the evidence

In [None]:
evidence_columns =  ['CTMRI', 'Platelets', 'CA125', 'PreoperativeGrade',
       'Cytology', 'p53', 'L1CAM', 'ER', 'PR', 'POLE', 'MSI', 'Platelets', 'MRI_MI']


Drop the rows that do not have target information

In [None]:
# Drop Na values in LNM and Survival5yr
data.dropna(subset=["LNM", "Survival5yr"], inplace=True)
# reset index
data.reset_index(drop=True, inplace=True)

Define the targets, and replace the labels

In [None]:
targets = data[["LNM", "Survival5yr"]].copy()
#targets = data[["LNM", "X5YR"]].copy()
targets.replace({"yes":1, "no":0}, inplace=True)
targets.replace({'negative':0, 'positive':1}, inplace=True)


Create therapy from combination of chemotherapy and radiotherapy

Pseudocode:
- Create a new column called "Therapy"
- Change the column to object
- Loop through the data
    - If chemotherapy is yes and radiotherapy is yes, then therapy is chemoradiotherapy
    - If chemotherapy is yes and radiotherapy is no, then therapy is chemotherapy
    - If chemotherapy is no and radiotherapy is yes, then therapy is radiotherapy
    - If chemotherapy is no and radiotherapy is no, then therapy is no

In [None]:
data["Therapy"] = np.nan
# Change therapy column to object
data["Therapy"] = data["Therapy"].astype("object")

for i in range(len(data)):
    if data["Chemotherapy"][i] == "yes" and data["Radiotherapy"][i] == "yes":
        data.loc[i, "Therapy"] = "chemoradiotherapy"
    elif data["Chemotherapy"][i] == "yes" and data["Radiotherapy"][i] == "no":
        data.loc[i, "Therapy"] = "chemotherapy"
    elif data["Chemotherapy"][i] == "no" and data["Radiotherapy"][i] == "yes":
        data.loc[i, "Therapy"] = "radiotherapy"
    else:
        data.loc[i, "Therapy"] = "no"

Create an evidence dataframe

In [None]:
evidence = data[evidence_columns].copy()

Replace the cytology label

In [None]:
evidence["Cytology"] = evidence["Cytology"].replace({"yes":"malignant", "no":"benign"})

Create specific datasets based on the different models

In [None]:
# Create specific datasets
data_or = evidence.copy().drop(columns=['POLE', 'MSI', 'Platelets', 'MRI_MI'])
data_noMRI_NoTCGA_noPlat = evidence.copy().drop(columns=['POLE', 'MSI', 'Platelets', 'MRI_MI'])
data_noMRI_NoPlat = evidence.copy().drop(columns=['Platelets', 'MRI_MI'])
data_NoPlat = evidence.copy().drop(columns=['Platelets'])
data_noMRI = evidence.copy().drop(columns=['MRI_MI'])
data_noMRI_NoTCGA = evidence.copy().drop(columns=['POLE', 'MSI', 'MRI_MI'])
data_noTCGA_NoPlat = evidence.copy().drop(columns=['POLE', 'MSI', 'Platelets'])

define a function to loop through the evidence and get the results

Pseudocode:
- Define getPribabilities function:
    - Loop through the evidence
        - Get the evidence row
        - Get the result for LNM
        - Get the result for Survival
        - Append the results to a list
    - Return the list

In [None]:
# Function to loop through the evidence and get the results
def getProbabilities(model,evidence, Surv = "Survival5yr"):
    resultsLNM = []
    resultsSurvival = []

    for i in range(len(evidence)):
        evidencerow = evidence.iloc[i]
        evidencerow = evidencerow.dropna().to_dict()
        
        try:
            result = gum.getPosterior(model, evs = evidencerow, target = "LNM")
            resultsLNM.append(result)
        except Exception as error:
            print("Error at row, regarding LNM", i)
            print(error)
        
        try:
            result = gum.getPosterior(model, evs = evidencerow, target = Surv)
            resultsSurvival.append(result)
        except Exception as error:
            print("Error at row, regarding Survival", i)
            print(error)
            
    print("length of resultsLNM", len(resultsLNM)), print("length of resultsSurvival", len(resultsSurvival))        
    return resultsLNM, resultsSurvival

Get the results for the networks

In [None]:
# Get the results for the original network
print("Started 1")

org_LNM_res, org_Surv_res = getProbabilities(origineel_net, data_or)

print("Started 2")
noPlat_noRest_LNM, noPlat_noRest_Surv = getProbabilities(Peter_net_woPlat, data_noMRI_NoTCGA_noPlat)

print("Started 3")
noPlat_MRI_LNM, noPlat_MRI_Surv = getProbabilities(WOPlat_MRI, data_noTCGA_NoPlat)

print("Started 4")
noPlat_TCGA_LNM, noPlat_TCGA_Surv = getProbabilities(WOPlat_TCGA, data_noMRI_NoPlat)

print("Started 5")
noPlat_TCGA_MRI_LNM, noPlat_TCGA_MRI_Surv = getProbabilities(WOPlat_TCGA_MRI, data_NoPlat)

print("Started 6")
Plat_noRest_LNM, Plat_noRest_Surv = getProbabilities(Peter_net_wPlat, data_noMRI_NoTCGA)

print("Started 7")
Plat_all_LNM, Plat_all_Surv = getProbabilities(WPlat_all, evidence)


Define functions to retrieve the results

In [None]:
def getProbResults(results, target):
    res = []
    
    for i in range(len(results)):
        res.append(results[i][target])
    return pd.DataFrame(res)

Get the probabilities

In [None]:
org_LNM_res_prob = getProbResults(org_LNM_res, 1)
org_Surv_res_prob = getProbResults(org_Surv_res, 1)

noPlat_noRest_LNM_prob = getProbResults(noPlat_noRest_LNM, 1)
noPlat_noRest_Surv_prob = getProbResults(noPlat_noRest_Surv, 1)

noPlat_MRI_LNM_prob = getProbResults(noPlat_MRI_LNM, 1)
noPlat_MRI_Surv_prob = getProbResults(noPlat_MRI_Surv, 1)

noPlat_TCGA_LNM_prob = getProbResults(noPlat_TCGA_LNM, 1)
noPlat_TCGA_Surv_prob = getProbResults(noPlat_TCGA_Surv, 1)

noPlat_TCGA_MRI_LNM_prob = getProbResults(noPlat_TCGA_MRI_LNM, 1)
noPlat_TCGA_MRI_Surv_prob = getProbResults(noPlat_TCGA_MRI_Surv, 1)

Plat_noRest_LNM_prob = getProbResults(Plat_noRest_LNM, 1)
Plat_noRest_Surv_prob = getProbResults(Plat_noRest_Surv, 1)

Plat_all_LNM_prob = getProbResults(Plat_all_LNM, 1)
Plat_all_Surv_prob = getProbResults(Plat_all_Surv, 1)


Define the metric functions

Pseudocode:
- Define getMetrics function:
    - Get the confusion matrix
    - Get the accuracy
    - Get the ROC AUC
    - Get the precision
    - Get the recall
    - Get the specificity
    - Get the F1 score
    - Get the Brier score
    - Get the log loss
    - Return the metrics
- Define getSlimMetrics function:
    - Get the ROC AUC
    - Get the Log Loss
    - Get the Brier score
    - Get the N Predicted/N Observed
    - Get the Ratio
    - Return the metrics

In [None]:
from sklearn.metrics import roc_curve
# Find the accuracy, roc auc, precision and recall for the results and the targets data
from sklearn.metrics import accuracy_score, roc_auc_score, precision_score, recall_score, confusion_matrix, log_loss, brier_score_loss

def getMetrics(results, targets):
    TP = confusion_matrix(targets, results)[1][1]
    TN = confusion_matrix(targets, results)[0][0]
    FP = confusion_matrix(targets, results)[0][1]
    FN = confusion_matrix(targets, results)[1][0]
    
    accuracy = accuracy_score(targets, results)
    roc_auc = roc_auc_score(targets, results)
    precision = precision_score(targets, results)
    TPR = recall_score(targets, results)
    TNR = TN / (TN + FP)
    
    f1 = 2 * (precision * TPR) / (precision + TPR)
    brier = brier_score_loss(targets, results)
    loglike = log_loss(targets, results)
    
    x = pd.DataFrame([accuracy, roc_auc, precision, TPR,TNR, f1, brier, loglike], index=["Accuracy", "ROC AUC", "Precision (PPV)", "TPR (Recall/Sens)","TNR (Spec)", "F1", "Brier", "Log Loss"])
    return x

def getSlimMetrics(results, targets):
    
    if not len(results) == len(targets):
        raise Exception('Results and targets are not the same length. Results:', len(results), ' Targets:', len(targets))
    
    # Get ROC AUC, Log Loss, Brier, and N Predicted/N Observed
    curve = roc_curve(targets, results, pos_label=1)
    ROC = round(roc_auc_score(targets, results), 4)
    LL = round(log_loss(targets, results), 4)
    Brier = round(brier_score_loss(targets, results), 4)
    N_pred = results.sum()[0]
    N_obs = int(targets.sum())
    N_br = f"{int(N_pred)}/{int(N_obs)}"
    Ratio = round(N_pred/N_obs, 4)
    
    x = pd.DataFrame([ROC, LL, Brier, N_br, Ratio], index=["ROC AUC", "Log Loss", "Brier", "N Predicted/N Observed", "Ratio"])
    return x


Get the metrics for the networks

In [None]:
# Get slim metrics
# Get the metrics for the original network

targetSurv = "Survival5yr"
print("Started 1")
org_LNM_metrics = getSlimMetrics(org_LNM_res_prob, targets["LNM"])
org_Surv_metrics = getSlimMetrics(org_Surv_res_prob, targets[targetSurv])

print("Started 2")
woPlat_noRest_LNM_metrics = getSlimMetrics(noPlat_noRest_LNM_prob, targets["LNM"])
woPlat_noRest_Surv_metrics = getSlimMetrics(noPlat_noRest_Surv_prob, targets[targetSurv])

print("Started 3")
woPlat_MRI_LNM_metrics = getSlimMetrics(noPlat_MRI_LNM_prob, targets["LNM"])
woPlat_MRI_Surv_metrics = getSlimMetrics(noPlat_MRI_Surv_prob, targets[targetSurv])

print("Started 4")
woPlat_TCGA_LNM_metrics = getSlimMetrics(noPlat_TCGA_LNM_prob, targets["LNM"])
woPlat_TCGA_Surv_metrics = getSlimMetrics(noPlat_TCGA_Surv_prob, targets[targetSurv])

print("Started 5")
woPlat_TCGA_MRI_LNM_metrics = getSlimMetrics(noPlat_TCGA_MRI_LNM_prob, targets["LNM"])
woPlat_TCGA_MRI_Surv_metrics = getSlimMetrics(noPlat_TCGA_MRI_Surv_prob, targets[targetSurv])

print("Started 6")
Plat_noRest_LNM_metrics = getSlimMetrics(Plat_noRest_LNM_prob, targets["LNM"])
Plat_noRest_Surv_metrics = getSlimMetrics(Plat_noRest_Surv_prob, targets[targetSurv])

print("Started 7")
Plat_all_LNM_metrics = getSlimMetrics(Plat_all_LNM_prob, targets["LNM"])
Plat_all_Surv_metrics = getSlimMetrics(Plat_all_Surv_prob, targets[targetSurv])


Concatenate the metrics to compare between the models LNM

In [None]:
LNM_metrics = pd.concat([org_LNM_metrics,
                         woPlat_noRest_LNM_metrics, woPlat_MRI_LNM_metrics, woPlat_TCGA_LNM_metrics, woPlat_TCGA_MRI_LNM_metrics, Plat_noRest_LNM_metrics, Plat_all_LNM_metrics], axis=1)

LNM_metrics.columns = ["Origineel Netwerk",
                       "-Plat -TCGA -MRI", "-Plat -TCGA +MRI", "-Plat +TCGA -MRI", "-Plat +TCGA +MRI", "+Plat -TCGA -MRI", "+Plat +TCGA +MRI"]
LNM_metrics


Concatenate the metrics to between compare the models Survival

In [None]:
Surv_metrics = pd.concat([org_Surv_metrics, #inter_Surv_metrics, 
                          woPlat_noRest_Surv_metrics, woPlat_MRI_Surv_metrics, woPlat_TCGA_Surv_metrics, woPlat_TCGA_MRI_Surv_metrics, Plat_noRest_Surv_metrics, Plat_all_Surv_metrics], axis=1)
Surv_metrics = Surv_metrics.round(3)
Surv_metrics.columns = ["Origineel Netwerk", #"Endomcancer3", 
                        "-Plat -TCGA -MRI", "-Plat -TCGA +MRI", "-Plat +TCGA -MRI", "-Plat +TCGA +MRI", "+Plat -TCGA -MRI", "+Plat +TCGA +MRI"]
Surv_metrics

Plot the ROC curves

In [None]:
from matplotlib_inline.backend_inline import set_matplotlib_formats
import matplotlib.pyplot as plt
import seaborn as sns

# Set the style
sns.set_style("whitegrid")
# Set the output as png
set_matplotlib_formats('png', 'pdf')
plt.rcParams['figure.dpi'] = 150

# plot all the ROC curves in one plot (LNM and Survival seperate)
fig, ax = plt.subplots(1,2, figsize=(15,5))

fig.suptitle("ROC Curves", fontsize=16)

# LNM
ax[0].plot([0, 1], [0, 1], linestyle='--', lw=2, color='r', alpha=.8)

fpr, tpr, _ = roc_curve(targets["LNM"], org_LNM_res_prob, pos_label=1)
ax[0].plot(fpr, tpr, label='Origineel Netwerk (area = %0.2f)' % roc_auc_score(targets["LNM"], org_LNM_res_prob))

fpr, tpr, _ = roc_curve(targets["LNM"], noPlat_noRest_LNM_prob, pos_label=1)
ax[0].plot(fpr, tpr, label='-Plat -TCGA -MRI (area = %0.2f)' % roc_auc_score(targets["LNM"], noPlat_noRest_LNM_prob))

fpr, tpr, _ = roc_curve(targets["LNM"], noPlat_MRI_LNM_prob, pos_label=1)
ax[0].plot(fpr, tpr, label='-Plat -TCGA +MRI (area = %0.2f)' % roc_auc_score(targets["LNM"], noPlat_MRI_LNM_prob))

fpr, tpr, _ = roc_curve(targets["LNM"], noPlat_TCGA_LNM_prob, pos_label=1)
ax[0].plot(fpr, tpr, label='-Plat +TCGA -MRI (area = %0.2f)' % roc_auc_score(targets["LNM"], noPlat_TCGA_LNM_prob))

fpr, tpr, _ = roc_curve(targets["LNM"], noPlat_TCGA_MRI_LNM_prob, pos_label=1)
ax[0].plot(fpr, tpr, label='-Plat +TCGA +MRI (area = %0.2f)' % roc_auc_score(targets["LNM"], noPlat_TCGA_MRI_LNM_prob))

fpr, tpr, _ = roc_curve(targets["LNM"], Plat_noRest_LNM_prob, pos_label=1)
ax[0].plot(fpr, tpr, label='+Plat -TCGA -MRI (area = %0.2f)' % roc_auc_score(targets["LNM"], Plat_noRest_LNM_prob))

fpr, tpr, _ = roc_curve(targets["LNM"], Plat_all_LNM_prob, pos_label=1)
ax[0].plot(fpr, tpr, label='+Plat +TCGA +MRI (area = %0.2f)' % roc_auc_score(targets["LNM"], Plat_all_LNM_prob))

ax[0].title.set_text("LNM")
ax[0].legend(loc="lower right")
# Print AUC in the plot
    
# Survival
ax[1].plot([0, 1], [0, 1], linestyle='--', lw=2, color='r', alpha=.8)

fpr, tpr, _ = roc_curve(targets[targetSurv], org_Surv_res_prob, pos_label=1)
ax[1].plot(fpr, tpr, label='Origineel Netwerk (area = %0.2f)' % roc_auc_score(targets[targetSurv], org_Surv_res_prob))

fpr, tpr, _ = roc_curve(targets[targetSurv], noPlat_noRest_Surv_prob, pos_label=1)
ax[1].plot(fpr, tpr, label='-Plat -TCGA -MRI (area = %0.2f)' % roc_auc_score(targets[targetSurv], noPlat_noRest_Surv_prob))

fpr, tpr, _ = roc_curve(targets[targetSurv], noPlat_MRI_Surv_prob, pos_label=1)
ax[1].plot(fpr, tpr, label='-Plat -TCGA +MRI (area = %0.2f)' % roc_auc_score(targets[targetSurv], noPlat_MRI_Surv_prob))

fpr, tpr, _ = roc_curve(targets[targetSurv], noPlat_TCGA_Surv_prob, pos_label=1)
ax[1].plot(fpr, tpr, label='-Plat +TCGA -MRI (area = %0.2f)' % roc_auc_score(targets[targetSurv], noPlat_TCGA_Surv_prob))

fpr, tpr, _ = roc_curve(targets[targetSurv], noPlat_TCGA_MRI_Surv_prob, pos_label=1)
ax[1].plot(fpr, tpr, label='-Plat +TCGA +MRI (area = %0.2f)' % roc_auc_score(targets[targetSurv], noPlat_TCGA_MRI_Surv_prob))

fpr, tpr, _ = roc_curve(targets[targetSurv], Plat_noRest_Surv_prob, pos_label=1)
ax[1].plot(fpr, tpr, label='+Plat -TCGA -MRI (area = %0.2f)' % roc_auc_score(targets[targetSurv], Plat_noRest_Surv_prob))

fpr, tpr, _ = roc_curve(targets[targetSurv], Plat_all_Surv_prob, pos_label=1)
ax[1].plot(fpr, tpr, label='+Plat +TCGA +MRI (area = %0.2f)' % roc_auc_score(targets[targetSurv], Plat_all_Surv_prob))

ax[1].title.set_text("Survival")
ax[1].legend(loc="lower right")


Define a function to get the results based on a range of thresholds for the probability

In [None]:
def getRangeResults(probResults):
    # Define thresholds
    thresholds = [0.05, 0.10, 0.20, 0.30, 0.40, 0.50, 0.60 ,0.70, 0.80, 0.90]
    # Create empty list
    rows = len(probResults)

    cols = len(thresholds)
    
    res = [[0 for i in range(cols)] for j in range(rows)]
    
    for i in range(len(probResults)):
        for j in range(len(thresholds)):
            if probResults[0][i] > thresholds[j]:
                res[i][j] = 1
            else:
                res[i][j] = 0
                
    df = pd.DataFrame(res, columns=thresholds)
    
    return df


Get the threshold range results

In [None]:
# Get the results for the original network
org_LNM_res_range = getRangeResults(org_LNM_res_prob)
org_Surv_res_range = getRangeResults(org_Surv_res_prob)

noPlat_noRest_LNM_range = getRangeResults(noPlat_noRest_LNM_prob)
noPlat_noRest_Surv_range = getRangeResults(noPlat_noRest_Surv_prob)

noPlat_MRI_LNM_range = getRangeResults(noPlat_MRI_LNM_prob)
noPlat_MRI_Surv_range = getRangeResults(noPlat_MRI_Surv_prob)

noPlat_TCGA_LNM_range = getRangeResults(noPlat_TCGA_LNM_prob)
noPlat_TCGA_Surv_range = getRangeResults(noPlat_TCGA_Surv_prob)

noPlat_TCGA_MRI_LNM_range = getRangeResults(noPlat_TCGA_MRI_LNM_prob)
noPlat_TCGA_MRI_Surv_range = getRangeResults(noPlat_TCGA_MRI_Surv_prob)

Plat_noRest_LNM_range = getRangeResults(Plat_noRest_LNM_prob)
Plat_noRest_Surv_range = getRangeResults(Plat_noRest_Surv_prob)

Plat_all_LNM_range = getRangeResults(Plat_all_LNM_prob)
Plat_all_Surv_range = getRangeResults(Plat_all_Surv_prob)

Define a function to make a 3d matrix of metrics for each threshold

In [None]:
# Create a function to make a 3d matrix of metrics for each threshold
def getMetricsRange(results, targets):
    # Define thresholds
    thresholds = [0.05, 0.10, 0.20, 0.30, 0.40, 0.50, 0.60 ,0.70, 0.80, 0.90]
    # Create empty list
    metrics = []
    # Metricnames
    metricnames = ["Accuracy", "Precision (PPV)", "TPR (Recall/Sens)","TNR (Spec)", "F1", "Brier", "Log Loss"]
    
    for threshold in thresholds:
        res = results[threshold].values
        
        metrics.append(round(getMetrics(res, targets).loc[metricnames],3))
    
    metrics = pd.concat(metrics, axis=1)
    metrics.columns = thresholds
    return metrics

Get the metrics for each of the cutoff points
Note: it is possible that you will get an 'ill-defined' error, this is because at certain thresholds one label can be completely absent.

In [None]:
# Get the metrics for each of the cutoff points
# Get the metrics for the original network
org_LNM_metrics_range = getMetricsRange(org_LNM_res_range, targets["LNM"])
org_Surv_metrics_range = getMetricsRange(org_Surv_res_range, targets[targetSurv])

noPlat_noRest_LNM_metrics_range = getMetricsRange(noPlat_noRest_LNM_range, targets["LNM"])
noPlat_noRest_Surv_metrics_range = getMetricsRange(noPlat_noRest_Surv_range, targets[targetSurv])

noPlat_MRI_LNM_metrics_range = getMetricsRange(noPlat_MRI_LNM_range, targets["LNM"])
noPlat_MRI_Surv_metrics_range = getMetricsRange(noPlat_MRI_Surv_range, targets[targetSurv])

noPlat_TCGA_LNM_metrics_range = getMetricsRange(noPlat_TCGA_LNM_range, targets["LNM"])
noPlat_TCGA_Surv_metrics_range = getMetricsRange(noPlat_TCGA_Surv_range, targets[targetSurv])

noPlat_TCGA_MRI_LNM_metrics_range = getMetricsRange(noPlat_TCGA_MRI_LNM_range, targets["LNM"])
noPlat_TCGA_MRI_Surv_metrics_range = getMetricsRange(noPlat_TCGA_MRI_Surv_range, targets[targetSurv])

Plat_noRest_LNM_metrics_range = getMetricsRange(Plat_noRest_LNM_range, targets["LNM"])
Plat_noRest_Surv_metrics_range = getMetricsRange(Plat_noRest_Surv_range, targets[targetSurv])

Plat_all_LNM_metrics_range = getMetricsRange(Plat_all_LNM_range, targets["LNM"])
Plat_all_Surv_metrics_range = getMetricsRange(Plat_all_Surv_range, targets[targetSurv])

Plot the recall vs precision for each model

In [None]:
# Recall vs Precision
# LNM
fig, ax = plt.subplots(1,1, figsize=(15,5))

fig.suptitle("LNM Recall vs Precision per Model", fontsize=16)

ax.plot(org_LNM_metrics_range.loc["TPR (Recall/Sens)"], org_LNM_metrics_range.loc["Precision (PPV)"], label="Origineel Netwerk")
#ax.plot(inter_LNM_metrics_range.loc["TPR (Recall/Sens)"], inter_LNM_metrics_range.loc["Precision (PPV)"], label="Endomcancer3")
ax.plot(noPlat_noRest_LNM_metrics_range.loc["TPR (Recall/Sens)"], noPlat_noRest_LNM_metrics_range.loc["Precision (PPV)"], label="-Plat -TCGA -MRI")
ax.plot(noPlat_MRI_LNM_metrics_range.loc["TPR (Recall/Sens)"], noPlat_MRI_LNM_metrics_range.loc["Precision (PPV)"], label="-Plat -TCGA +MRI")
ax.plot(noPlat_TCGA_LNM_metrics_range.loc["TPR (Recall/Sens)"], noPlat_TCGA_LNM_metrics_range.loc["Precision (PPV)"], label="-Plat +TCGA -MRI")
ax.plot(noPlat_TCGA_MRI_LNM_metrics_range.loc["TPR (Recall/Sens)"], noPlat_TCGA_MRI_LNM_metrics_range.loc["Precision (PPV)"], label="-Plat +TCGA +MRI")
ax.plot(Plat_noRest_LNM_metrics_range.loc["TPR (Recall/Sens)"], Plat_noRest_LNM_metrics_range.loc["Precision (PPV)"], label="+Plat -TCGA -MRI")
ax.plot(Plat_all_LNM_metrics_range.loc["TPR (Recall/Sens)"], Plat_all_LNM_metrics_range.loc["Precision (PPV)"], label="+Plat +TCGA +MRI")

ax.legend(loc="upper right")
ax.set_xlabel("Recall")
ax.set_ylabel("Precision")

# Survival
fig, ax = plt.subplots(1,1, figsize=(15,5))

fig.suptitle("Survival Recall vs Precision per Model", fontsize=16)
    
ax.plot(org_Surv_metrics_range.loc["TPR (Recall/Sens)"], org_Surv_metrics_range.loc["Precision (PPV)"], label="Origineel Netwerk")
#ax.plot(inter_Surv_metrics_range.loc["TPR (Recall/Sens)"], inter_Surv_metrics_range.loc["Precision (PPV)"], label="Endomcancer3")
ax.plot(noPlat_noRest_Surv_metrics_range.loc["TPR (Recall/Sens)"], noPlat_noRest_Surv_metrics_range.loc["Precision (PPV)"], label="-Plat -TCGA -MRI")
ax.plot(noPlat_MRI_Surv_metrics_range.loc["TPR (Recall/Sens)"], noPlat_MRI_Surv_metrics_range.loc["Precision (PPV)"], label="-Plat -TCGA +MRI")
ax.plot(noPlat_TCGA_Surv_metrics_range.loc["TPR (Recall/Sens)"], noPlat_TCGA_Surv_metrics_range.loc["Precision (PPV)"], label="-Plat +TCGA -MRI")
ax.plot(noPlat_TCGA_MRI_Surv_metrics_range.loc["TPR (Recall/Sens)"], noPlat_TCGA_MRI_Surv_metrics_range.loc["Precision (PPV)"], label="-Plat +TCGA +MRI")
ax.plot(Plat_noRest_Surv_metrics_range.loc["TPR (Recall/Sens)"], Plat_noRest_Surv_metrics_range.loc["Precision (PPV)"], label="+Plat -TCGA -MRI")
ax.plot(Plat_all_Surv_metrics_range.loc["TPR (Recall/Sens)"], Plat_all_Surv_metrics_range.loc["Precision (PPV)"], label="+Plat +TCGA +MRI")

ax.legend(loc="upper right")
ax.set_xlabel("Recall")
ax.set_ylabel("Precision")


Plot the recall vs threshold for each model

In [None]:
# Recall
# LNM
fig, ax = plt.subplots(1,1, figsize=(15,5))

fig.suptitle("LNM Recall per Threshold per Model", fontsize=16)

ax.plot(org_LNM_metrics_range.loc["TPR (Recall/Sens)"], label="Origineel Netwerk")
#ax.plot(inter_LNM_metrics_range.loc["TPR (Recall/Sens)"], label="Endomcancer3")
ax.plot(noPlat_noRest_LNM_metrics_range.loc["TPR (Recall/Sens)"], label="-Plat -TCGA -MRI")
ax.plot(noPlat_MRI_LNM_metrics_range.loc["TPR (Recall/Sens)"], label="-Plat -TCGA +MRI")
ax.plot(noPlat_TCGA_LNM_metrics_range.loc["TPR (Recall/Sens)"], label="-Plat +TCGA -MRI")
ax.plot(noPlat_TCGA_MRI_LNM_metrics_range.loc["TPR (Recall/Sens)"], label="-Plat +TCGA +MRI")
ax.plot(Plat_noRest_LNM_metrics_range.loc["TPR (Recall/Sens)"], label="+Plat -TCGA -MRI")
ax.plot(Plat_all_LNM_metrics_range.loc["TPR (Recall/Sens)"], label="+Plat +TCGA +MRI")

ax.legend(loc="upper right")
ax.set_xlabel("Threshold")
ax.set_ylabel("Recall")
ax.set_xticks([0.05, 0.10, 0.20, 0.30, 0.40, 0.50, 0.60 ,0.70, 0.80, 0.90])
ax.set_xticklabels([0.05, 0.10, 0.20, 0.30, 0.40, 0.50, 0.60 ,0.70, 0.80, 0.90])

# Survival
fig, ax = plt.subplots(1,1, figsize=(15,5))

fig.suptitle("Survival Recall per Threshold per Model", fontsize=16)

ax.plot(org_Surv_metrics_range.loc["TPR (Recall/Sens)"], label="Origineel Netwerk")
#ax.plot(inter_Surv_metrics_range.loc["TPR (Recall/Sens)"], label="Endomcancer3")
ax.plot(noPlat_noRest_Surv_metrics_range.loc["TPR (Recall/Sens)"], label="-Plat -TCGA -MRI")
ax.plot(noPlat_MRI_Surv_metrics_range.loc["TPR (Recall/Sens)"], label="-Plat -TCGA +MRI")
ax.plot(noPlat_TCGA_Surv_metrics_range.loc["TPR (Recall/Sens)"], label="-Plat +TCGA -MRI")
ax.plot(noPlat_TCGA_MRI_Surv_metrics_range.loc["TPR (Recall/Sens)"], label="-Plat +TCGA +MRI")
ax.plot(Plat_noRest_Surv_metrics_range.loc["TPR (Recall/Sens)"], label="+Plat -TCGA -MRI")
ax.plot(Plat_all_Surv_metrics_range.loc["TPR (Recall/Sens)"], label="+Plat +TCGA +MRI")

ax.legend(loc="lower left")
ax.set_xlabel("Threshold")
ax.set_ylabel("Recall")
ax.set_xticks([0.05, 0.10, 0.20, 0.30, 0.40, 0.50, 0.60 ,0.70, 0.80, 0.90])
ax.set_xticklabels([0.05, 0.10, 0.20, 0.30, 0.40, 0.50, 0.60 ,0.70, 0.80, 0.90])


Plot the precision vs threshold for each model

In [None]:
# Precision
# LNM
fig, ax = plt.subplots(1,1, figsize=(15,5))

fig.suptitle("LNM Precision per Threshold per Model", fontsize=16)

ax.plot(org_LNM_metrics_range.loc["Precision (PPV)"], label="Origineel Netwerk")
#ax.plot(inter_LNM_metrics_range.loc["Precision (PPV)"], label="Endomcancer3")
ax.plot(noPlat_noRest_LNM_metrics_range.loc["Precision (PPV)"], label="-Plat -TCGA -MRI")
ax.plot(noPlat_MRI_LNM_metrics_range.loc["Precision (PPV)"], label="-Plat -TCGA +MRI")
ax.plot(noPlat_TCGA_LNM_metrics_range.loc["Precision (PPV)"], label="-Plat +TCGA -MRI")
ax.plot(noPlat_TCGA_MRI_LNM_metrics_range.loc["Precision (PPV)"], label="-Plat +TCGA +MRI")
ax.plot(Plat_noRest_LNM_metrics_range.loc["Precision (PPV)"], label="+Plat -TCGA -MRI")

ax.legend(loc="upper left")
ax.set_xlabel("Threshold")
ax.set_ylabel("Precision")
ax.set_xticks([0.05, 0.10, 0.20, 0.30, 0.40, 0.50, 0.60 ,0.70, 0.80, 0.90])
ax.set_xticklabels([0.05, 0.10, 0.20, 0.30, 0.40, 0.50, 0.60 ,0.70, 0.80, 0.90])

# Survival
fig, ax = plt.subplots(1,1, figsize=(15,5))
    
fig.suptitle("Survival Precision per Threshold per Model", fontsize=16)

ax.plot(org_Surv_metrics_range.loc["Precision (PPV)"], label="Origineel Netwerk")
#ax.plot(inter_Surv_metrics_range.loc["Precision (PPV)"], label="Endomcancer3")
ax.plot(noPlat_noRest_Surv_metrics_range.loc["Precision (PPV)"], label="-Plat -TCGA -MRI")
ax.plot(noPlat_MRI_Surv_metrics_range.loc["Precision (PPV)"], label="-Plat -TCGA +MRI")
ax.plot(noPlat_TCGA_Surv_metrics_range.loc["Precision (PPV)"], label="-Plat +TCGA -MRI")
ax.plot(noPlat_TCGA_MRI_Surv_metrics_range.loc["Precision (PPV)"], label="-Plat +TCGA +MRI")
ax.plot(Plat_noRest_Surv_metrics_range.loc["Precision (PPV)"], label="+Plat -TCGA -MRI")
    
ax.legend(loc="upper left")
ax.set_xlabel("Threshold")
ax.set_ylabel("Precision")
ax.set_xticks([0.05, 0.10, 0.20, 0.30, 0.40, 0.50, 0.60 ,0.70, 0.80, 0.90])
ax.set_xticklabels([0.05, 0.10, 0.20, 0.30, 0.40, 0.50, 0.60 ,0.70, 0.80, 0.90])



Plot the F1 vs threshold for each model

In [None]:
# F1
# LNM
fig, ax = plt.subplots(1,1, figsize=(15,5))

fig.suptitle("LNM F1 per Threshold per Model", fontsize=16)

ax.plot(org_LNM_metrics_range.loc["F1"], label="Origineel Netwerk")
#ax.plot(inter_LNM_metrics_range.loc["F1"], label="Endomcancer3")
ax.plot(noPlat_noRest_LNM_metrics_range.loc["F1"], label="-Plat -TCGA -MRI")
ax.plot(noPlat_MRI_LNM_metrics_range.loc["F1"], label="-Plat -TCGA +MRI")
ax.plot(noPlat_TCGA_LNM_metrics_range.loc["F1"], label="-Plat +TCGA -MRI")
ax.plot(noPlat_TCGA_MRI_LNM_metrics_range.loc["F1"], label="-Plat +TCGA +MRI")
ax.plot(Plat_noRest_LNM_metrics_range.loc["F1"], label="+Plat -TCGA -MRI")
ax.plot(Plat_all_LNM_metrics_range.loc["F1"], label="+Plat +TCGA +MRI")

ax.legend(loc="upper right")
ax.set_xlabel("Threshold")
ax.set_ylabel("F1")

# Survival
fig, ax = plt.subplots(1,1, figsize=(15,5))

fig.suptitle("Survival F1 per Threshold per Model", fontsize=16)

ax.plot(org_Surv_metrics_range.loc["F1"], label="Origineel Netwerk")
#ax.plot(inter_Surv_metrics_range.loc["F1"], label="Endomcancer3")
ax.plot(noPlat_noRest_Surv_metrics_range.loc["F1"], label="-Plat -TCGA -MRI")
ax.plot(noPlat_MRI_Surv_metrics_range.loc["F1"], label="-Plat -TCGA +MRI")
ax.plot(noPlat_TCGA_Surv_metrics_range.loc["F1"], label="-Plat +TCGA -MRI")
ax.plot(noPlat_TCGA_MRI_Surv_metrics_range.loc["F1"], label="-Plat +TCGA +MRI")
ax.plot(Plat_noRest_Surv_metrics_range.loc["F1"], label="+Plat -TCGA -MRI")
ax.plot(Plat_all_Surv_metrics_range.loc["F1"], label="+Plat +TCGA +MRI")

ax.legend(loc="upper left")
ax.set_xlabel("Threshold")
ax.set_ylabel("F1")


Plot the Brier score vs threshold for each model

In [None]:
# Brier
# LNM
fig, ax = plt.subplots(1,1, figsize=(15,5))

fig.suptitle("LNM Brier per Threshold per Model", fontsize=16)

ax.plot(org_LNM_metrics_range.loc["Brier"], label="Origineel Netwerk")
#ax.plot(inter_LNM_metrics_range.loc["Brier"], label="Endomcancer3")
ax.plot(noPlat_noRest_LNM_metrics_range.loc["Brier"], label="-Plat -TCGA -MRI")
ax.plot(noPlat_MRI_LNM_metrics_range.loc["Brier"], label="-Plat -TCGA +MRI")
ax.plot(noPlat_TCGA_LNM_metrics_range.loc["Brier"], label="-Plat +TCGA -MRI")
ax.plot(noPlat_TCGA_MRI_LNM_metrics_range.loc["Brier"], label="-Plat +TCGA +MRI")
ax.plot(Plat_noRest_LNM_metrics_range.loc["Brier"], label="+Plat -TCGA -MRI")
ax.plot(Plat_all_LNM_metrics_range.loc["Brier"], label="+Plat +TCGA +MRI")

ax.legend(loc="upper right")
ax.set_xlabel("Threshold")
ax.set_ylabel("Brier")

# Survival
fig, ax = plt.subplots(1,1, figsize=(15,5))
    
fig.suptitle("Survival Brier per Threshold per Model", fontsize=16)

ax.plot(org_Surv_metrics_range.loc["Brier"], label="Origineel Netwerk")
#ax.plot(inter_Surv_metrics_range.loc["Brier"], label="Endomcancer3")
ax.plot(noPlat_noRest_Surv_metrics_range.loc["Brier"], label="-Plat -TCGA -MRI")
ax.plot(noPlat_MRI_Surv_metrics_range.loc["Brier"], label="-Plat -TCGA +MRI")
ax.plot(noPlat_TCGA_Surv_metrics_range.loc["Brier"], label="-Plat +TCGA -MRI")
ax.plot(noPlat_TCGA_MRI_Surv_metrics_range.loc["Brier"], label="-Plat +TCGA +MRI")
ax.plot(Plat_noRest_Surv_metrics_range.loc["Brier"], label="+Plat -TCGA -MRI")
ax.plot(Plat_all_Surv_metrics_range.loc["Brier"], label="+Plat +TCGA +MRI")

ax.legend(loc="lower left")
ax.set_xlabel("Threshold")
ax.set_ylabel("Brier")



Plot the log loss vs threshold for each model

In [None]:
# Log Loss
# LNM
fig, ax = plt.subplots(1,1, figsize=(15,5))

fig.suptitle("LNM Log Loss per Threshold per Model", fontsize=16)
    
ax.plot(org_LNM_metrics_range.loc["Log Loss"], label="Origineel Netwerk")
#ax.plot(inter_LNM_metrics_range.loc["Log Loss"], label="Endomcancer3")
ax.plot(noPlat_noRest_LNM_metrics_range.loc["Log Loss"], label="-Plat -TCGA -MRI")
ax.plot(noPlat_MRI_LNM_metrics_range.loc["Log Loss"], label="-Plat -TCGA +MRI")
ax.plot(noPlat_TCGA_LNM_metrics_range.loc["Log Loss"], label="-Plat +TCGA -MRI")
ax.plot(noPlat_TCGA_MRI_LNM_metrics_range.loc["Log Loss"], label="-Plat +TCGA +MRI")
ax.plot(Plat_noRest_LNM_metrics_range.loc["Log Loss"], label="+Plat -TCGA -MRI")
ax.plot(Plat_all_LNM_metrics_range.loc["Log Loss"], label="+Plat +TCGA +MRI")

ax.legend(loc="upper right")
ax.set_xlabel("Threshold")
ax.set_ylabel("Log Loss")

# Survival
fig, ax = plt.subplots(1,1, figsize=(15,5))

fig.suptitle("Survival Log Loss per Threshold per Model", fontsize=16)

ax.plot(org_Surv_metrics_range.loc["Log Loss"], label="Origineel Netwerk")
#ax.plot(inter_Surv_metrics_range.loc["Log Loss"], label="Endomcancer3")
ax.plot(noPlat_noRest_Surv_metrics_range.loc["Log Loss"], label="-Plat -TCGA -MRI")
ax.plot(noPlat_MRI_Surv_metrics_range.loc["Log Loss"], label="-Plat -TCGA +MRI")
ax.plot(noPlat_TCGA_Surv_metrics_range.loc["Log Loss"], label="-Plat +TCGA -MRI")
ax.plot(noPlat_TCGA_MRI_Surv_metrics_range.loc["Log Loss"], label="-Plat +TCGA +MRI")
ax.plot(Plat_noRest_Surv_metrics_range.loc["Log Loss"], label="+Plat -TCGA -MRI")
ax.plot(Plat_all_Surv_metrics_range.loc["Log Loss"], label="+Plat +TCGA +MRI")

ax.legend(loc="lower left")
ax.set_xlabel("Threshold")
ax.set_ylabel("Log Loss")
