### Author: Ally Sprik
### Last-updated: 25-02-2024

Goal of this notebook is to validate different networks with the Brno validation set



In [None]:
import pandas as pd
import numpy as np
import pyAgrum as gum
import pyAgrum.lib.notebook as gnb
from collections import Counter

pd.options.mode.copy_on_write = True  # This will allow the code to run faster and keep Pandas happy. Technical detail: https://pandas.pydata.org/pandas-docs/stable/user_guide/copy_on_write.html#

# Loading in networks
origineel_net = gum.loadBN("../../0. Source_files/0.4. Original_Casper_files/Results/Casper_fitted_952.net")
WOPlat = gum.loadBN("../1.3. Model/Fitted_Networks/R_WOP_original_952.net")
WOPlat_MRI = gum.loadBN("../1.3. Model/Fitted_Networks/R_WOP_AddMRIMI_fitted_952.net")
WOPlat_TCGA = gum.loadBN("../1.3. Model/Fitted_Networks/R_WOP_AddTCGA_1_fitted_952.net")
WOPlat_TCGA_MRI = gum.loadBN("../1.3. Model/Fitted_Networks/R_WOP_AddMRIMITCGA_fitted_952.net")
WPlat = gum.loadBN("../1.3. Model/Fitted_Networks/R_WP_952.net")
WPlat_all = gum.loadBN("../1.3. Model/Fitted_Networks/R_WP_all_952.net")


# Loading in data
df = pd.read_csv("../../0. Source_files/0.2. Cleaned_data/Cleaned_Brno_model_complete.csv")
df

Select the rows that can be used for validation, subset for the LNM and the Survival5yr

In [None]:
# Select where ENDORISK_LNM and FU is yes
df_LNM = df.copy().dropna(subset=["LNM"])
df_Surv = df.copy().dropna(subset=["Survival5yr"])
df_LNM.reset_index(drop=True, inplace=True)
df_Surv.reset_index(drop=True, inplace=True)

Replace the labels for the targets with 1 and 0

In [None]:
target_LNM = df_LNM["LNM"].replace({"yes":1, "no":0})
target_Surv = df_Surv["Survival5yr"].replace({"yes":1, "no":0})

Define the overall evidence columns

In [None]:
evidence_columns = ["ER", "PR", "p53", "L1CAM", "CA125", "CTMRI", "Platelets", "Cytology", "MRI_MI", "MSI", "POLE", "PreoperativeGrade"]
# Define the evidence columns for this dataset
evidence_columns = [x for x in evidence_columns if x in df.columns]

Create datasets for the different networks, based on the evidence columns and the nodes that are present in the network

In [None]:
# Create different datasets based on the nodes that are present in the network
df_origineel_LNM = df_LNM[list(Counter(evidence_columns) & Counter(list(origineel_net.names())))].copy()
df_origineel_Surv = df_Surv[list(Counter(evidence_columns) & Counter(list(origineel_net.names())))].copy()

df_WOPlat_LNM = df_LNM[list(Counter(evidence_columns) & Counter(list(WOPlat.names())))].copy()
df_WOPlat_Surv = df_Surv[list(Counter(evidence_columns) & Counter(list(WOPlat.names())))].copy()

df_WOPlat_MRI_LNM = df_LNM[list(Counter(evidence_columns) & Counter(list(WOPlat_MRI.names())))].copy()
df_WOPlat_MRI_Surv = df_Surv[list(Counter(evidence_columns) & Counter(list(WOPlat_MRI.names())))].copy()

df_WOPlat_TCGA_LNM = df_LNM[list(Counter(evidence_columns) & Counter(list(WOPlat_TCGA.names())))].copy()
df_WOPlat_TCGA_Surv = df_Surv[list(Counter(evidence_columns) & Counter(list(WOPlat_TCGA.names())))].copy()

df_WOPlat_TCGA_MRI_LNM = df_LNM[list(Counter(evidence_columns) & Counter(list(WOPlat_TCGA_MRI.names())))].copy()
df_WOPlat_TCGA_MRI_Surv = df_Surv[list(Counter(evidence_columns) & Counter(list(WOPlat_TCGA_MRI.names())))].copy()

df_WPlat_LNM = df_LNM[list(Counter(evidence_columns) & Counter(list(WPlat.names())))].copy()
df_WPlat_Surv = df_Surv[list(Counter(evidence_columns) & Counter(list(WPlat.names())))].copy()

df_WPlat_all_LNM = df_LNM[list(Counter(evidence_columns) & Counter(list(WPlat_all.names())))].copy()
df_WPlat_all_Surv = df_Surv[list(Counter(evidence_columns) & Counter(list(WPlat_all.names())))].copy()

Create a function to get the probabilities for the different networks

In [None]:
def getProbabilities(model, evidence_LNM, evidence_Surv, Surv = "Survival5yr", samples = 100):
    ls_result_LNM = []
    ls_result_Surv = []
    
    #for i in range(1, samples):
    resultsLNM = []
    resultsSurvival = []
    net = gum.LazyPropagation(model)
    net.getNumberOfThreads()
    net.setNumberOfThreads(10)
    
    for j in range(len(evidence_LNM)):
        evidencerow = evidence_LNM.iloc[j]
        evidencerow = evidencerow.dropna().to_dict()
        
        try:
            net.setEvidence(evidencerow)
            
            net.makeInference()

            resultLNM = net.posterior("LNM")
            
            resultsLNM.append(resultLNM)
        except Exception as error:
            print("Error at row regarding LNM", j)
            print(error)
            
            resultsLNM.append(resultLNM)

    
    for j in range(len(evidence_Surv)):
        evidencerow = evidence_Surv.iloc[j]
        evidencerow = evidencerow.dropna().to_dict()
        
        try:
            net.setEvidence(evidencerow)
            
            net.makeInference()

            resultSurvival = net.posterior("Survival5yr")
            
            resultsSurvival.append(resultSurvival)
        except Exception as error:
            print("Error at row regarding Survival", j)
            print(error)
            
            resultsSurvival.append(resultSurvival)

        
    return resultsLNM, resultsSurvival


Get the probabilities for the different networks

In [None]:
print("Started 1")
results_origineel_LNM, results_origineel_Surv = getProbabilities(origineel_net, df_origineel_LNM, df_origineel_Surv)

print("Started 2")
results_WOPlat_LNM, results_WOPlat_Surv = getProbabilities(WOPlat, df_WOPlat_LNM, df_WOPlat_Surv)

print("Started 3")
results_WOPlat_MRI_LNM, results_WOPlat_MRI_Surv = getProbabilities(WOPlat_MRI, df_WOPlat_MRI_LNM, df_WOPlat_MRI_Surv)

print("Started 4")
results_WOPlat_TCGA_LNM, results_WOPlat_TCGA_Surv = getProbabilities(WOPlat_TCGA, df_WOPlat_TCGA_LNM, df_WOPlat_TCGA_Surv)

print("Started 5")
results_WOPlat_TCGA_MRI_LNM, results_WOPlat_TCGA_MRI_Surv = getProbabilities(WOPlat_TCGA_MRI, df_WOPlat_TCGA_MRI_LNM, df_WOPlat_TCGA_MRI_Surv)

print("Started 6")
results_WPlat_LNM, results_WPlat_Surv = getProbabilities(WPlat, df_WPlat_LNM, df_WPlat_Surv)

print("Started 7")
results_WPlat_all_LNM, results_WPlat_all_Surv = getProbabilities(WPlat_all, df_WPlat_all_LNM, df_WPlat_all_Surv)



Create a function to extract the probability values from the results

In [None]:
def getProbResults(results, target):
    res = []
    
    for i in range(len(results)):
        res.append(results[i][target])
    return pd.DataFrame(res)

Get the probabilities for the targets, LNM and surival yes

In [None]:
org_LNM = getProbResults(results_origineel_LNM, 1)
org_Surv = getProbResults(results_origineel_Surv, 1)

WOPlat_LNM = getProbResults(results_WOPlat_LNM, 1)
WOPlat_Surv = getProbResults(results_WOPlat_Surv, 1)

WOPlat_MRI_LNM = getProbResults(results_WOPlat_MRI_LNM, 1)
WOPlat_MRI_Surv = getProbResults(results_WOPlat_MRI_Surv, 1)

WOPlat_TCGA_LNM = getProbResults(results_WOPlat_TCGA_LNM, 1)
WOPlat_TCGA_Surv = getProbResults(results_WOPlat_TCGA_Surv, 1)

WOPlat_TCGA_MRI_LNM = getProbResults(results_WOPlat_TCGA_MRI_LNM, 1)
WOPlat_TCGA_MRI_Surv = getProbResults(results_WOPlat_TCGA_MRI_Surv, 1)

WPlat_LNM = getProbResults(results_WPlat_LNM, 1)
WPlat_Surv = getProbResults(results_WPlat_Surv, 1)

WPlat_all_LNM = getProbResults(results_WPlat_all_LNM, 1)
WPlat_all_Surv = getProbResults(results_WPlat_all_Surv, 1)


Define functions to retrieve the metrics

Pseudocode:
- Define getMetrics:
    - Get the confusion matrix
    - Use it to generate TP, TN, FP, FN
    - Calculate the accuracy, ROC AUC, Precision, TPR, TNR, F1, Brier, Log Loss
    - Return the results in a dataframe
- Define getSlimMetrics:
    - Check if the results and targets are the same length
    - Get the ROC AUC, Log Loss, Brier, and N Predicted/N Observed
    - Return the results in a dataframe

In [None]:
from sklearn.metrics import roc_curve, f1_score
# Find the accuracy, roc auc, precision and recall for the results and the targets data
from sklearn.metrics import accuracy_score, roc_auc_score, precision_score, recall_score, confusion_matrix, log_loss, brier_score_loss

def getMetrics(results, targets, threshold = 0.5):
    TP = confusion_matrix(targets, results)[1][1]
    TN = confusion_matrix(targets, results)[0][0]
    FP = confusion_matrix(targets, results)[0][1]
    FN = confusion_matrix(targets, results)[1][0]
    
    accuracy = accuracy_score(targets, results)
    roc_auc = roc_auc_score(targets, results)
    precision = precision_score(targets, results)
    TPR = recall_score(targets, results)
    TNR = TN / (TN + FP)
    
    f1 = f1_score(targets, results)
    brier = brier_score_loss(targets, results)
    loglike = log_loss(targets, results)
    
    x = pd.DataFrame([accuracy, roc_auc, precision, TPR,TNR, f1, brier, loglike], index=["Accuracy", "ROC AUC", "Precision (PPV)", "TPR (Recall/Sens)","TNR (Spec)", "F1", "Brier", "Log Loss"])
    return x

def getSlimMetrics(results, targets):
    
    if not len(results) == len(targets):
        raise Exception('Results and targets are not the same length. Results:', len(results), ' Targets:', len(targets))
    
    # Get ROC AUC, Log Loss, Brier, and N Predicted/N Observed
    curve = roc_curve(targets, results, pos_label=1)
    ROC = round(roc_auc_score(targets, results), 4)
    LL = round(log_loss(targets, results), 4)
    Brier = round(brier_score_loss(targets, results), 4)
    N_pred = results.sum()[0]
    N_obs = int(targets.sum())
    N_br = f"{int(N_pred)}/{int(N_obs)}"
    Ratio = round(N_pred/N_obs, 4)
    
    x = pd.DataFrame([ROC, LL, Brier, N_br, Ratio], index=["ROC AUC", "Log Loss", "Brier", "N Predicted/N Observed", "Ratio"])
    return x


Get the slim (non thresholded) metrics for the different models

In [None]:
# Get the Slim metrics for the different models
print("Started 1")
org_LNM_metrics = getSlimMetrics(org_LNM, target_LNM)
org_Surv_metrics = getSlimMetrics(org_Surv, target_Surv)

print("Started 2")
WOPlat_LNM_metrics = getSlimMetrics(WOPlat_LNM, target_LNM)
WOPlat_Surv_metrics = getSlimMetrics(WOPlat_Surv, target_Surv)

print("Started 3")
WOPlat_MRI_LNM_metrics = getSlimMetrics(WOPlat_MRI_LNM, target_LNM)
WOPlat_MRI_Surv_metrics = getSlimMetrics(WOPlat_MRI_Surv, target_Surv)

print("Started 4")
WOPlat_TCGA_LNM_metrics = getSlimMetrics(WOPlat_TCGA_LNM, target_LNM)
WOPlat_TCGA_Surv_metrics = getSlimMetrics(WOPlat_TCGA_Surv, target_Surv)

print("Started 5")
WOPlat_TCGA_MRI_LNM_metrics = getSlimMetrics(WOPlat_TCGA_MRI_LNM, target_LNM)
WOPlat_TCGA_MRI_Surv_metrics = getSlimMetrics(WOPlat_TCGA_MRI_Surv, target_Surv)

print("Started 6")
WPlat_LNM_metrics = getSlimMetrics(WPlat_LNM, target_LNM)
WPlat_Surv_metrics = getSlimMetrics(WPlat_Surv, target_Surv)

print("Started 7")
WPlat_all_LNM_metrics = getSlimMetrics(WPlat_all_LNM, target_LNM)
WPlat_all_Surv_metrics = getSlimMetrics(WPlat_all_Surv, target_Surv)


Concate the metrics to display a table comparing the different models LNM

In [None]:
# Concatenate the metrics to between the models LNM
LNM_metrics = pd.concat([org_LNM_metrics,  
                         WOPlat_LNM_metrics, WOPlat_MRI_LNM_metrics, WOPlat_TCGA_LNM_metrics, WOPlat_TCGA_MRI_LNM_metrics, WPlat_LNM_metrics, WPlat_all_LNM_metrics], axis=1)

LNM_metrics.columns = ["+Plat -TCGA - MRI", "-Plat -TCGA -MRI", "-Plat -TCGA +MRI", "-Plat +TCGA -MRI", "-Plat +TCGA +MRI", "+Plat -TCGA -MRI", "+Plat +TCGA +MRI"]
LNM_metrics


Concate the metrics to display a table comparing the different models Survival

In [None]:
# Concatenate the metrics to between the models Survival
Surv_metrics = pd.concat([org_Surv_metrics, 
                          WOPlat_Surv_metrics, WOPlat_MRI_Surv_metrics, WOPlat_TCGA_Surv_metrics, WOPlat_TCGA_MRI_Surv_metrics, WPlat_Surv_metrics, WPlat_all_Surv_metrics], axis=1)
Surv_metrics = Surv_metrics.round(3)
Surv_metrics.columns = ["+Plat -TCGA - MRI", "-Plat -TCGA -MRI", "-Plat -TCGA +MRI", "-Plat +TCGA -MRI", "-Plat +TCGA +MRI", "+Plat -TCGA -MRI", "+Plat +TCGA +MRI"]
Surv_metrics

Plot the ROC curves for the different models

In [None]:
from matplotlib_inline.backend_inline import set_matplotlib_formats
import matplotlib.pyplot as plt
import seaborn as sns

# Set the style
sns.set_style("whitegrid")
# Set the output as png
set_matplotlib_formats('png', 'pdf')
plt.rcParams['figure.dpi'] = 150

# plot all the ROC curves in one plot (LNM and Survival seperate)
fig, ax = plt.subplots(1,2, figsize=(15,5))

fig.suptitle("ROC Curves", fontsize=16)

# LNM
ax[0].plot([0, 1], [0, 1], linestyle='--', lw=2, color='r', alpha=.8)

fpr, tpr, _ = roc_curve(target_LNM, org_LNM, pos_label=1)
ax[0].plot(fpr, tpr, label='Origineel Netwerk (area = %0.2f)' % roc_auc_score(target_LNM, org_LNM))

fpr, tpr, _ = roc_curve(target_LNM, org_LNM, pos_label=1)
ax[0].plot(fpr, tpr, label='-Plat -TCGA -MRI (area = %0.2f)' % roc_auc_score(target_LNM, WOPlat_LNM))

fpr, tpr, _ = roc_curve(target_LNM, WOPlat_MRI_LNM, pos_label=1)
ax[0].plot(fpr, tpr, label='-Plat -TCGA +MRI (area = %0.2f)' % roc_auc_score(target_LNM, WOPlat_MRI_LNM))

fpr, tpr, _ = roc_curve(target_LNM, WOPlat_TCGA_LNM, pos_label=1)
ax[0].plot(fpr, tpr, label='-Plat +TCGA -MRI (area = %0.2f)' % roc_auc_score(target_LNM, WOPlat_TCGA_LNM))

fpr, tpr, _ = roc_curve(target_LNM, WOPlat_TCGA_MRI_LNM, pos_label=1)
ax[0].plot(fpr, tpr, label='-Plat +TCGA +MRI (area = %0.2f)' % roc_auc_score(target_LNM, WOPlat_TCGA_MRI_LNM))

fpr, tpr, _ = roc_curve(target_LNM, WPlat_LNM, pos_label=1)
ax[0].plot(fpr, tpr, label='+Plat -TCGA -MRI (area = %0.2f)' % roc_auc_score(target_LNM, WPlat_LNM))

fpr, tpr, _ = roc_curve(target_LNM, WPlat_all_LNM, pos_label=1)
ax[0].plot(fpr, tpr, label='+Plat +TCGA +MRI (area = %0.2f)' % roc_auc_score(target_LNM, WPlat_all_LNM))

ax[0].title.set_text("LNM")
ax[0].legend(loc="lower right")
# Print AUC in the plot
    
# Survival
ax[1].plot([0, 1], [0, 1], linestyle='--', lw=2, color='r', alpha=.8)

fpr, tpr, _ = roc_curve(target_Surv, org_Surv, pos_label=1)
ax[1].plot(fpr, tpr, label='Origineel Netwerk (area = %0.2f)' % roc_auc_score(target_Surv, org_Surv))

fpr, tpr, _ = roc_curve(target_Surv, WOPlat_Surv, pos_label=1)
ax[1].plot(fpr, tpr, label='-Plat -TCGA -MRI (area = %0.2f)' % roc_auc_score(target_Surv, WOPlat_Surv))

fpr, tpr, _ = roc_curve(target_Surv, WOPlat_MRI_Surv, pos_label=1)
ax[1].plot(fpr, tpr, label='-Plat -TCGA +MRI (area = %0.2f)' % roc_auc_score(target_Surv, WOPlat_MRI_Surv))

fpr, tpr, _ = roc_curve(target_Surv, WOPlat_TCGA_Surv, pos_label=1)
ax[1].plot(fpr, tpr, label='-Plat +TCGA -MRI (area = %0.2f)' % roc_auc_score(target_Surv, WOPlat_TCGA_Surv))

fpr, tpr, _ = roc_curve(target_Surv, WOPlat_TCGA_MRI_Surv, pos_label=1)
ax[1].plot(fpr, tpr, label='-Plat +TCGA +MRI (area = %0.2f)' % roc_auc_score(target_Surv, WOPlat_TCGA_MRI_Surv))

fpr, tpr, _ = roc_curve(target_Surv, WPlat_Surv, pos_label=1)
ax[1].plot(fpr, tpr, label='+Plat -TCGA -MRI (area = %0.2f)' % roc_auc_score(target_Surv, WPlat_Surv))

fpr, tpr, _ = roc_curve(target_Surv, WPlat_all_Surv, pos_label=1)
ax[1].plot(fpr, tpr, label='+Plat +TCGA +MRI (area = %0.2f)' % roc_auc_score(target_Surv, WPlat_all_Surv))

ax[1].title.set_text("Survival")
ax[1].legend(loc="lower right")

Define a function to get the diagnoses on a range of thresholds

Pseudocode:
- Define a function to get the results based on a range of thresholds for the probability
    - Define thresholds
    - Create empty list
    - Loop through the probabilities
        - Loop through the thresholds
            - If the probability is higher than the threshold, append 1 to the list
            - Else append 0
    - Return the list as a dataframe


In [None]:
# Define a function to get the results based on a threshold for the probability
def getRangeResults(probResults):
    # Define thresholds
    thresholds = [0.05, 0.10, 0.20, 0.30, 0.40, 0.50, 0.60 ,0.70, 0.80, 0.90]
    # Create empty list
    rows = len(probResults)

    cols = len(thresholds)
    
    res = [[0 for i in range(cols)] for j in range(rows)]
    
    for i in range(len(probResults)):
        targetcol = 0
        
        for col in probResults.columns:
            if isinstance(col, str) and col.__contains__("ESGO"):
                targetcol = col
            
        for j in range(len(thresholds)):
            if probResults[targetcol][i] >= thresholds[j]:
                res[i][j] = 1
            else:
                res[i][j] = 0
                
    df = pd.DataFrame(res, columns=thresholds)
    
    return df



Get the ranges for the different models



In [None]:
org_LNM_range = getRangeResults(org_LNM)
org_Surv_range = getRangeResults(org_Surv)

WOPlat_LNM_range = getRangeResults(WOPlat_LNM)
WOPlat_Surv_range = getRangeResults(WOPlat_Surv)

WOPlat_MRI_LNM_range = getRangeResults(WOPlat_MRI_LNM)
WOPlat_MRI_Surv_range = getRangeResults(WOPlat_MRI_Surv)

WOPlat_TCGA_LNM_range = getRangeResults(WOPlat_TCGA_LNM)
WOPlat_TCGA_Surv_range = getRangeResults(WOPlat_TCGA_Surv)

WOPlat_TCGA_MRI_LNM_range = getRangeResults(WOPlat_TCGA_MRI_LNM)
WOPlat_TCGA_MRI_Surv_range = getRangeResults(WOPlat_TCGA_MRI_Surv)

WPlat_LNM_range = getRangeResults(WPlat_LNM)
WPlat_Surv_range = getRangeResults(WPlat_Surv)

WPlat_all_LNM_range = getRangeResults(WPlat_all_LNM)
WPlat_all_Surv_range = getRangeResults(WPlat_all_Surv)


Create a function to make a 3d matrix of metrics for each threshold

Pseudocode:
- Define a function to make a 3d matrix of metrics for each threshold
    - Define thresholds
    - Create empty list
    - Define metricnames
    - Loop through the thresholds
        - Get the metrics for the results of each threshold
        - Append the metrics to the list
    - Concatenate the list
    - Return the list

In [None]:
# Create a function to make a 3d matrix of metrics for each threshold
def getMetricsRange(results, targets):
    # Define thresholds
    thresholds = [0.05, 0.10, 0.20, 0.30, 0.40, 0.50, 0.60 ,0.70, 0.80, 0.90]
    # Create empty list
    metrics = []
    # Metricnames
    metricnames = ["Accuracy", "Precision (PPV)", "TPR (Recall/Sens)","TNR (Spec)", "F1", "Brier", "Log Loss"]
    
    for threshold in thresholds:
        res = results[threshold].values
        
        metrics.append(round(getMetrics(res, targets).loc[metricnames],3))
    
    metrics = pd.concat(metrics, axis=1)
    metrics.columns = thresholds
    return metrics

Get the metric ranges for the different models
Due to the nature of ranges you can get a 'is ill-defined' warning, since it is possible no cases are predicted after a certain threshold.

In [None]:
print("Started 1")
org_LNM_metrics_range = getMetricsRange(org_LNM_range, target_LNM)
org_Surv_metrics_range = getMetricsRange(org_Surv_range, target_Surv)

print("Started 2")
WOPlat_LNM_metrics_range = getMetricsRange(WOPlat_LNM_range, target_LNM)
WOPlat_Surv_metrics_range = getMetricsRange(WOPlat_Surv_range, target_Surv)

print("Started 3")  
WOPlat_MRI_LNM_metrics_range = getMetricsRange(WOPlat_MRI_LNM_range, target_LNM)
WOPlat_MRI_Surv_metrics_range = getMetricsRange(WOPlat_MRI_Surv_range, target_Surv)

print("Started 4")
WOPlat_TCGA_LNM_metrics_range = getMetricsRange(WOPlat_TCGA_LNM_range, target_LNM)
WOPlat_TCGA_Surv_metrics_range = getMetricsRange(WOPlat_TCGA_Surv_range, target_Surv)

print("Started 5")
WOPlat_TCGA_MRI_LNM_metrics_range = getMetricsRange(WOPlat_TCGA_MRI_LNM_range, target_LNM)
WOPlat_TCGA_MRI_Surv_metrics_range = getMetricsRange(WOPlat_TCGA_MRI_Surv_range, target_Surv)

print("Started 6")
WPlat_LNM_metrics_range = getMetricsRange(WPlat_LNM_range, target_LNM)
WPlat_Surv_metrics_range = getMetricsRange(WPlat_Surv_range, target_Surv)

print("Started 7")
WPlat_all_LNM_metrics_range = getMetricsRange(WPlat_all_LNM_range, target_LNM)
WPlat_all_Surv_metrics_range = getMetricsRange(WPlat_all_Surv_range, target_Surv)

Plot the recall vs precision for the different models

In [None]:
from matplotlib_inline.backend_inline import set_matplotlib_formats
import matplotlib.pyplot as plt
import seaborn as sns

# Set the style
sns.set_style("whitegrid")
# Set the output as png
set_matplotlib_formats('png', 'pdf')
plt.rcParams['figure.dpi'] = 150

# Recall vs Precision
# LNM
fig, ax = plt.subplots(1,1, figsize=(15,5))

fig.suptitle("LNM Recall vs Precision per Model", fontsize=16)

ax.plot(org_LNM_metrics_range.loc["TPR (Recall/Sens)"], org_LNM_metrics_range.loc["Precision (PPV)"], label="Origineel Netwerk")
ax.plot(WOPlat_LNM_metrics_range.loc["TPR (Recall/Sens)"], WOPlat_LNM_metrics_range.loc["Precision (PPV)"], label="-Plat -TCGA -MRI")
ax.plot(WOPlat_MRI_LNM_metrics_range.loc["TPR (Recall/Sens)"], WOPlat_MRI_LNM_metrics_range.loc["Precision (PPV)"], label="-Plat -TCGA +MRI")
ax.plot(WOPlat_TCGA_LNM_metrics_range.loc["TPR (Recall/Sens)"], WOPlat_TCGA_LNM_metrics_range.loc["Precision (PPV)"], label="-Plat +TCGA -MRI")
ax.plot(WOPlat_TCGA_MRI_LNM_metrics_range.loc["TPR (Recall/Sens)"], WOPlat_TCGA_MRI_LNM_metrics_range.loc["Precision (PPV)"], label="-Plat +TCGA +MRI")
ax.plot(WPlat_LNM_metrics_range.loc["TPR (Recall/Sens)"], WPlat_LNM_metrics_range.loc["Precision (PPV)"], label="+Plat -TCGA -MRI")
ax.plot(WPlat_all_LNM_metrics_range.loc["TPR (Recall/Sens)"], WPlat_all_LNM_metrics_range.loc["Precision (PPV)"], label="+Plat +TCGA +MRI")

ax.legend(loc="upper right")
ax.set_xlabel("Recall")
ax.set_ylabel("Precision")

# Survival
fig, ax = plt.subplots(1,1, figsize=(15,5))

fig.suptitle("Survival Recall vs Precision per Model", fontsize=16)
    
ax.plot(org_Surv_metrics_range.loc["TPR (Recall/Sens)"], org_Surv_metrics_range.loc["Precision (PPV)"], label="Origineel Netwerk")
ax.plot(WOPlat_Surv_metrics_range.loc["TPR (Recall/Sens)"], WOPlat_Surv_metrics_range.loc["Precision (PPV)"], label="-Plat -TCGA -MRI")
ax.plot(WOPlat_MRI_Surv_metrics_range.loc["TPR (Recall/Sens)"], WOPlat_MRI_Surv_metrics_range.loc["Precision (PPV)"], label="-Plat -TCGA +MRI")
ax.plot(WOPlat_TCGA_Surv_metrics_range.loc["TPR (Recall/Sens)"], WOPlat_TCGA_Surv_metrics_range.loc["Precision (PPV)"], label="-Plat +TCGA -MRI")
ax.plot(WOPlat_TCGA_MRI_Surv_metrics_range.loc["TPR (Recall/Sens)"], WOPlat_TCGA_MRI_Surv_metrics_range.loc["Precision (PPV)"], label="-Plat +TCGA +MRI")
ax.plot(WPlat_Surv_metrics_range.loc["TPR (Recall/Sens)"], WPlat_Surv_metrics_range.loc["Precision (PPV)"], label="+Plat -TCGA -MRI")
ax.plot(WPlat_all_Surv_metrics_range.loc["TPR (Recall/Sens)"], WPlat_all_Surv_metrics_range.loc["Precision (PPV)"], label="+Plat +TCGA +MRI")

ax.legend(loc="upper right")
ax.set_xlabel("Recall")
ax.set_ylabel("Precision")



Plot the recall vs threshold for the different models

In [None]:
# Recall vs threshold
# LNM
fig, ax = plt.subplots(1,1, figsize=(15,5))

fig.suptitle("LNM Recall vs Threshold per Model", fontsize=16)

# Recall on the x-axis
ax.plot(org_LNM_metrics_range.loc["TPR (Recall/Sens)"], label="Origineel Netwerk")
ax.plot(WOPlat_LNM_metrics_range.loc["TPR (Recall/Sens)"], label="-Plat -TCGA -MRI")
ax.plot(WOPlat_MRI_LNM_metrics_range.loc["TPR (Recall/Sens)"], label="-Plat -TCGA +MRI")
ax.plot(WOPlat_TCGA_LNM_metrics_range.loc["TPR (Recall/Sens)"], label="-Plat +TCGA -MRI")
ax.plot(WOPlat_TCGA_MRI_LNM_metrics_range.loc["TPR (Recall/Sens)"], label="-Plat +TCGA +MRI")
ax.plot(WPlat_LNM_metrics_range.loc["TPR (Recall/Sens)"], label="+Plat -TCGA -MRI")
ax.plot(WPlat_all_LNM_metrics_range.loc["TPR (Recall/Sens)"], label="+Plat +TCGA +MRI")

ax.legend(loc="upper right")
ax.set_xlabel("Threshold")
ax.set_ylabel("Recall")
ax.set_xticks([0.05, 0.10, 0.20, 0.30, 0.40, 0.50, 0.60 ,0.70, 0.80, 0.90])
ax.set_xticklabels([0.05, 0.10, 0.20, 0.30, 0.40, 0.50, 0.60 ,0.70, 0.80, 0.90])

# Survival
fig, ax = plt.subplots(1,1, figsize=(15,5))
    
fig.suptitle("Survival Recall per Threshold per Model", fontsize=16)

ax.plot(org_Surv_metrics_range.loc["TPR (Recall/Sens)"], label="Origineel Netwerk")
ax.plot(WOPlat_Surv_metrics_range.loc["TPR (Recall/Sens)"], label="-Plat -TCGA -MRI")
ax.plot(WOPlat_MRI_Surv_metrics_range.loc["TPR (Recall/Sens)"], label="-Plat -TCGA +MRI")
ax.plot(WOPlat_TCGA_Surv_metrics_range.loc["TPR (Recall/Sens)"], label="-Plat +TCGA -MRI")
ax.plot(WOPlat_TCGA_MRI_Surv_metrics_range.loc["TPR (Recall/Sens)"], label="-Plat +TCGA +MRI")
ax.plot(WPlat_Surv_metrics_range.loc["TPR (Recall/Sens)"], label="+Plat -TCGA -MRI")
ax.plot(WPlat_all_Surv_metrics_range.loc["TPR (Recall/Sens)"], label="+Plat +TCGA +MRI")
    
ax.legend(loc="lower left")
ax.set_xlabel("Threshold")
ax.set_ylabel("Recall")
ax.set_xticks([0.05, 0.10, 0.20, 0.30, 0.40, 0.50, 0.60 ,0.70, 0.80, 0.90])
ax.set_xticklabels([0.05, 0.10, 0.20, 0.30, 0.40, 0.50, 0.60 ,0.70, 0.80, 0.90])



Plot the precision vs threshold for the different models

In [None]:
# Precision vs threshold
# LNM
fig, ax = plt.subplots(1,1, figsize=(15,5))

fig.suptitle("LNM Precision vs Threshold per Model", fontsize=16)

ax.plot(org_LNM_metrics_range.loc["Precision (PPV)"], label="Origineel Netwerk")
ax.plot(WOPlat_LNM_metrics_range.loc["Precision (PPV)"], label="-Plat -TCGA -MRI")
ax.plot(WOPlat_MRI_LNM_metrics_range.loc["Precision (PPV)"], label="-Plat -TCGA +MRI")
ax.plot(WOPlat_TCGA_LNM_metrics_range.loc["Precision (PPV)"], label="-Plat +TCGA -MRI")
ax.plot(WOPlat_TCGA_MRI_LNM_metrics_range.loc["Precision (PPV)"], label="-Plat +TCGA +MRI")
ax.plot(WPlat_LNM_metrics_range.loc["Precision (PPV)"], label="+Plat -TCGA -MRI")
ax.plot(WPlat_all_LNM_metrics_range.loc["Precision (PPV)"], label="+Plat +TCGA +MRI")

ax.legend(loc="upper left")
ax.set_xlabel("Threshold")
ax.set_ylabel("Precision")
ax.set_xticks([0.05, 0.10, 0.20, 0.30, 0.40, 0.50, 0.60 ,0.70, 0.80, 0.90])
ax.set_xticklabels([0.05, 0.10, 0.20, 0.30, 0.40, 0.50, 0.60 ,0.70, 0.80, 0.90])

# Survival
fig, ax = plt.subplots(1,1, figsize=(15,5))

fig.suptitle("Survival Precision vs Threshold per Model", fontsize=16)

ax.plot(org_Surv_metrics_range.loc["Precision (PPV)"], label="Origineel Netwerk")
ax.plot(WOPlat_Surv_metrics_range.loc["Precision (PPV)"], label="-Plat -TCGA -MRI")
ax.plot(WOPlat_MRI_Surv_metrics_range.loc["Precision (PPV)"], label="-Plat -TCGA +MRI")
ax.plot(WOPlat_TCGA_Surv_metrics_range.loc["Precision (PPV)"], label="-Plat +TCGA -MRI")
ax.plot(WOPlat_TCGA_MRI_Surv_metrics_range.loc["Precision (PPV)"], label="-Plat +TCGA +MRI")
ax.plot(WPlat_Surv_metrics_range.loc["Precision (PPV)"], label="+Plat -TCGA -MRI")
ax.plot(WPlat_all_Surv_metrics_range.loc["Precision (PPV)"], label="+Plat +TCGA +MRI")

ax.legend(loc="upper left")
ax.set_xlabel("Threshold")
ax.set_ylabel("Precision")
ax.set_xticks([0.05, 0.10, 0.20, 0.30, 0.40, 0.50, 0.60 ,0.70, 0.80, 0.90])
ax.set_xticklabels([0.05, 0.10, 0.20, 0.30, 0.40, 0.50, 0.60 ,0.70, 0.80, 0.90])
