### Author: Ally Sprik
### Last-updated: 25-02-2024

Goal of this notebook is to validate the created networks on the tubingen dataset. 



In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pyAgrum as gum
import pyAgrum.lib.notebook as gnb

df = pd.read_csv('../../0. Source_files/0.2. Cleaned_data/Tubingen_Validation_wMSI.csv')

pd.options.mode.copy_on_write = True  # This will allow the code to run faster and keep Pandas happy. Technical detail: https://pandas.pydata.org/pandas-docs/stable/user_guide/copy_on_write.html#

origineel_net = gum.loadBN("../../0. Source_files/0.4. Original_Casper_files/Results/Casper_fitted_952.net")
woPlat_woRest = gum.loadBN("../1.3. Model/Fitted_Networks/R_WOP_original_952.net")
WOPlat_MRI = gum.loadBN("../1.3. Model/Fitted_Networks/R_WOP_AddMRIMI_fitted_952.net")
WOPlat_TCGA = gum.loadBN("../1.3. Model/Fitted_Networks/R_WOP_AddTCGA_1_fitted_952.net")
WOPlat_TCGA_MRI = gum.loadBN("../1.3. Model/Fitted_Networks/R_WOP_AddMRIMITCGA_fitted_952.net")
wPlat_woRest = gum.loadBN("../1.3. Model/Fitted_Networks/R_WP_952.net")
WPlat_all = gum.loadBN("../1.3. Model/Fitted_Networks/R_WP_all_952.net")


# Change the target labels, and select an evidence set

In [None]:
# Drop the rows that do not have target information
df = df.dropna(subset=['LNM', 'Survival5yr'])
# Reset index
df.reset_index(drop=True, inplace=True)
# Create some datasets
df['LNM'] = df['LNM'].replace({"yes":1, "no":0, "negative":0, "positive":1})
df['Survival5yr'] = df['Survival5yr'].replace({"yes":1, "no":0, "negative":0, "positive":1})

evidence_columns = ["ER", "PR", "p53", "L1CAM", "CA125", "CTMRI", "Platelets", "Cytology", "MRI_MI", "MSI", "POLE", "PreoperativeGrade"]
# Filter evidence based on the available columnns in the dataset
evidence_columns = [col for col in evidence_columns if col in df.columns]

evidence = df[evidence_columns]

Create specific datasets for each network

In [None]:
# Create specific datasets
data_noMRI_NoTCGA_noPlat = evidence.copy().drop(columns=['POLE', 'MSI', 'Platelets'])
data_noMRI_NoPlat = evidence.copy().drop(columns=['Platelets'])
data_NoPlat = evidence.copy().drop(columns=['Platelets'])
data_noMRI_NoTCGA = evidence.copy().drop(columns=['POLE', 'MSI'])
data_noTCGA_NoPlat = evidence.copy().drop(columns=['POLE', 'MSI', 'Platelets'])

Define a function to get the results for the networks

Pseudocode:
- Create an empty list for the results
- Loop over the evidence
    - Get the evidence row
    - Get the probabilities for the LNM and the Survival
    - Append the results to the list
- Return the list

In [None]:
# Function to loop through the evidence and get the results
def getProbabilities(model,evidence, Surv = "Survival5yr"):
    resultsLNM = []
    resultsSurvival = []
    
    for i in range(len(evidence)):
        evidencerow = evidence.iloc[i]
        evidencerow = evidencerow.dropna().to_dict()

        result = gum.getPosterior(model, evs = evidencerow, target = "LNM")
        resultsLNM.append(result)
        
        result = gum.getPosterior(model, evs = evidencerow, target = Surv)
        resultsSurvival.append(result)
        
    return resultsLNM, resultsSurvival

Get the results for the networks

In [None]:
# Get the results for the original network
print("Started 1")
org_net_LNM, org_net_Surv = getProbabilities(origineel_net, data_noMRI_NoTCGA_noPlat)

print("Started 2")
WOPlat_TCGA_MRI_LNM, WOPlat_TCGA_MRI_Surv = getProbabilities(woPlat_woRest, data_noMRI_NoTCGA_noPlat)

print("Started 3")
Plat_Rest_No_LNM, Plat_Rest_No_Surv = getProbabilities(wPlat_woRest, data_noMRI_NoTCGA)

print("Started 4")
noTCGA_NoPlat_LNM, noTCGA_NoPlat_Surv = getProbabilities(WOPlat_MRI, data_noTCGA_NoPlat)

print("Started 5")
noMRI_NoPlat_LNM, noMRI_NoPlat_Surv = getProbabilities(WOPlat_TCGA, data_noMRI_NoPlat)

print("Started 6")
Plat_Rest_yes_LNM, Plat_Rest_yes_Surv = getProbabilities(WPlat_all, evidence)


Define functions to unpack the results

In [None]:
def getProbResults(results, target):
    res = []
    
    for i in range(len(results)):
        res.append(results[i][target])
    return pd.DataFrame(res)

Unpack the results

In [None]:
org_LNM_res = getProbResults(org_net_LNM, 1)
org_Surv_res = getProbResults(org_net_Surv, 1)

woPlat_LNM_res = getProbResults(WOPlat_TCGA_MRI_LNM, 1)
woPlat_Surv_res = getProbResults(WOPlat_TCGA_MRI_Surv, 1)

wPlat_LNM_res = getProbResults(Plat_Rest_No_LNM, 1)
wPlat_Surv_res = getProbResults(Plat_Rest_No_Surv, 1)

noTCGA_LNM_res = getProbResults(noTCGA_NoPlat_LNM, 1)
noTCGA_Surv_res = getProbResults(noTCGA_NoPlat_Surv, 1)

noMRI_LNM_res = getProbResults(noMRI_NoPlat_LNM, 1)
noMRI_Surv_res = getProbResults(noMRI_NoPlat_Surv, 1)

wPlat_all_LNM_res = getProbResults(Plat_Rest_yes_LNM, 1)
wPlat_all_Surv_res = getProbResults(Plat_Rest_yes_Surv, 1)

Define a function to get the metrics for the results

Pseudocode:
- Define getMetrics function:
    - Get the confusion matrix
    - Get the accuracy
    - Get the ROC AUC
    - Get the precision
    - Get the recall
    - Get the specificity
    - Get the F1 score
    - Get the Brier score
    - Get the log loss
    - Return the metrics
- Define getSlimMetrics function:
    - Get the ROC AUC
    - Get the Log Loss
    - Get the Brier score
    - Get the N Predicted/N Observed
    - Get the Ratio
    - Return the metrics

In [None]:
from sklearn.metrics import roc_curve
# Find the accuracy, roc auc, precision and recall for the results and the targets data
from sklearn.metrics import accuracy_score, roc_auc_score, precision_score, recall_score, confusion_matrix, log_loss, brier_score_loss

def getMetrics(results, targets):
    TP = confusion_matrix(targets, results)[1][1]
    TN = confusion_matrix(targets, results)[0][0]
    FP = confusion_matrix(targets, results)[0][1]
    FN = confusion_matrix(targets, results)[1][0]
    
    accuracy = accuracy_score(targets, results)
    roc_auc = roc_auc_score(targets, results)
    precision = precision_score(targets, results)
    TPR = recall_score(targets, results)
    TNR = TN / (TN + FP)
    
    f1 = 2 * (precision * TPR) / (precision + TPR)
    brier = brier_score_loss(targets, results)
    loglike = log_loss(targets, results)
    
    x = pd.DataFrame([accuracy, roc_auc, precision, TPR,TNR, f1, brier, loglike], index=["Accuracy", "ROC AUC", "Precision (PPV)", "TPR (Recall/Sens)","TNR (Spec)", "F1", "Brier", "Log Loss"])
    return x

def getSlimMetrics(results, targets):
    
    if not len(results) == len(targets):
        raise Exception('Results and targets are not the same length. Results:', len(results), ' Targets:', len(targets))
    
    # Get ROC AUC, Log Loss, Brier, and N Predicted/N Observed
    curve = roc_curve(targets, results, pos_label=1)
    ROC = round(roc_auc_score(targets, results), 4)
    LL = round(log_loss(targets, results), 4)
    Brier = round(brier_score_loss(targets, results), 4)
    N_pred = results.sum()[0]
    N_obs = int(targets.sum())
    N_br = f"{int(N_pred)}/{int(N_obs)}"
    Ratio = round(N_pred/N_obs, 4)
    
    x = pd.DataFrame([ROC, LL, Brier, N_br, Ratio], index=["ROC AUC", "Log Loss", "Brier", "N Predicted/N Observed", "Ratio"])
    return x


Define targets

In [None]:
# Get the metrics for the results
targetSurv = df['Survival5yr']
targetLNM = df['LNM']


Drop the rows that do not have a target

Pseudocode:
- Create an empty list
- Loop over the targetSurv
    - If the targetSurv is nan, append the index to the list
- Drop the rows from the targets
- Drop the rows from the results


In [None]:
rowindices = []

for i in range(len(targetSurv)):
    if pd.isna(targetSurv[i]):
        rowindices.append(i)

# Drop the rows from the targets that do not have a target and drop it from the results as well
targetSurv = targetSurv.drop(rowindices)

org_Surv_res = org_Surv_res.drop(rowindices)
woPlat_Surv_res = woPlat_Surv_res.drop(rowindices)
wPlat_Surv_res = wPlat_Surv_res.drop(rowindices)
noTCGA_Surv_res = noTCGA_Surv_res.drop(rowindices)
noMRI_Surv_res = noMRI_Surv_res.drop(rowindices)
wPlat_all_Surv_res = wPlat_all_Surv_res.drop(rowindices)

Get the metrics for the results

In [None]:
org_LNM_metrics = getSlimMetrics(org_LNM_res, targetLNM)
org_Surv_metrics = getSlimMetrics(org_Surv_res, targetSurv)

noTCGA_LNM_metrics = getSlimMetrics(noTCGA_LNM_res, targetLNM)
noTCGA_Surv_metrics = getSlimMetrics(noTCGA_Surv_res, targetSurv)

noMRI_LNM_metrics = getSlimMetrics(noMRI_LNM_res, targetLNM)
noMRI_Surv_metrics = getSlimMetrics(noMRI_Surv_res, targetSurv)

woPlat_LNM_metrics = getSlimMetrics(woPlat_LNM_res, targetLNM)
woPlat_Surv_metrics = getSlimMetrics(woPlat_Surv_res, targetSurv)

wPlat_LNM_metrics = getSlimMetrics(wPlat_LNM_res, targetLNM)
wPlat_Surv_metrics = getSlimMetrics(wPlat_Surv_res, targetSurv)

wPlat_all_LNM_metrics = getSlimMetrics(wPlat_all_LNM_res, targetLNM)
wPlat_all_Surv_metrics = getSlimMetrics(wPlat_all_Surv_res, targetSurv)

Concatenate the metrics to compare LNM

In [None]:
# Concatenate the metrics LNM

LNM_metrics = pd.concat([org_LNM_metrics, #inter_LNM_metrics, 
                         noMRI_LNM_metrics, wPlat_LNM_metrics, wPlat_all_LNM_metrics], axis=1)

LNM_metrics = LNM_metrics.round(3)

LNM_metrics.columns = ["Original", #"Inter", 
                       "-Plat -TCGA", "+Plat -TCGA", "+Plat +TCGA"]

LNM_metrics

Concatenate the metrics to compare Survival

In [None]:
# Concatenate the metrics Survival
Surv_metrics = pd.concat([org_Surv_metrics, #inter_Surv_metrics, 
                          noMRI_Surv_metrics, wPlat_Surv_metrics, wPlat_all_Surv_metrics], axis=1)

Surv_metrics = Surv_metrics.round(3)

Surv_metrics.columns = ["Original", #"Inter", 
                        "-Plat -TCGA", "+Plat -TCGA", "+Plat +TCGA"]

Surv_metrics

Plot the ROC curves

In [None]:
from sklearn.metrics import auc
from matplotlib_inline.backend_inline import set_matplotlib_formats
import matplotlib.pyplot as plt
import seaborn as sns


# Set the style
sns.set_style("whitegrid")
# Set the output as png
set_matplotlib_formats('png', 'pdf')
plt.rcParams['figure.dpi'] = 150


# plot all the ROC curves in one plot (LNM and Survival seperate)
fig, ax = plt.subplots(1,2, figsize=(15,5))

fig.suptitle("ROC Curves", fontsize=16)

# LNM
ax[0].plot([0, 1], [0, 1], linestyle='--', lw=2, color='r', alpha=.8)

fpr, tpr, _ = roc_curve(targetLNM, org_LNM_res)
roc_auc = round(auc(fpr, tpr), 3)
ax[0].plot(fpr, tpr, color='b', lw=2, label=f'Original (AUC = {roc_auc})')

fpr, tpr, _ = roc_curve(targetLNM, noMRI_LNM_res)
roc_auc = round(auc(fpr, tpr), 3)
ax[0].plot(fpr, tpr, color='g', lw=2, label=f'-Plat -TCGA (AUC = {roc_auc})')

fpr, tpr, _ = roc_curve(targetLNM, wPlat_LNM_res)
roc_auc = round(auc(fpr, tpr), 3)
ax[0].plot(fpr, tpr, color='y', lw=2, label=f'+Plat -TCGA (AUC = {roc_auc})')

fpr, tpr, _ = roc_curve(targetLNM, wPlat_all_LNM_res)
roc_auc = round(auc(fpr, tpr), 3)
ax[0].plot(fpr, tpr, color='m', lw=2, label=f'+Plat +TCGA (AUC = {roc_auc})')

ax[0].set(xlim=[-0.05, 1.05], ylim=[-0.05, 1.05], title="LNM")
ax[0].legend(loc="lower right")

# Survival  
ax[1].plot([0, 1], [0, 1], linestyle='--', lw=2, color='r', alpha=.8)

fpr, tpr, _ = roc_curve(targetSurv, org_Surv_res)
roc_auc = round(auc(fpr, tpr), 3)
ax[1].plot(fpr, tpr, color='b', lw=2, label=f'Original (AUC = {roc_auc})')

fpr, tpr, _ = roc_curve(targetSurv, noMRI_Surv_res)
roc_auc = round(auc(fpr, tpr), 3)
ax[1].plot(fpr, tpr, color='g', lw=2, label=f'-Plat -TCGA (AUC = {roc_auc})')

fpr, tpr, _ = roc_curve(targetSurv, wPlat_Surv_res)
roc_auc = round(auc(fpr, tpr), 3)
ax[1].plot(fpr, tpr, color='y', lw=2, label=f'+Plat -TCGA (AUC = {roc_auc})')

fpr, tpr, _ = roc_curve(targetSurv, wPlat_all_Surv_res)
roc_auc = round(auc(fpr, tpr), 3)
ax[1].plot(fpr, tpr, color='m', lw=2, label=f'+Plat +TCGA (AUC = {roc_auc})')

ax[1].set(xlim=[-0.05, 1.05], ylim=[-0.05, 1.05], title="Survival")
ax[1].legend(loc="lower right")




Define a function to get the results based on a range of thresholds for the probability

Pseudocode:
- Define getRangeResults function:
    - Define thresholds
    - Create empty list
    - Loop through the probabilities
        - Loop through the thresholds
            - If the probability is higher than the threshold, append 1 to the list
            - Else, append 0 to the list
    - Return the list

In [None]:
def getRangeResults(probResults):
    # Define thresholds
    thresholds = [0.05, 0.10, 0.20, 0.30, 0.40, 0.50, 0.60 ,0.70, 0.80, 0.90]
    # Create empty list
    rows = len(probResults)

    cols = len(thresholds)
    
    res = [[0 for i in range(cols)] for j in range(rows)]
    
    for i in range(len(probResults)):
        for j in range(len(thresholds)):
            if probResults[0][i] > thresholds[j]:
                res[i][j] = 1
            else:
                res[i][j] = 0
                
    df = pd.DataFrame(res, columns=thresholds)
    
    return df


Get the range results

In [None]:
# Get the range results
org_LNM_range = getRangeResults(org_LNM_res)
org_Surv_range = getRangeResults(org_Surv_res)

noTCGA_LNM_range = getRangeResults(noTCGA_LNM_res)
noTCGA_Surv_range = getRangeResults(noTCGA_Surv_res)

noMRI_LNM_range = getRangeResults(noMRI_LNM_res)
noMRI_Surv_range = getRangeResults(noMRI_Surv_res)

woPlat_LNM_range = getRangeResults(woPlat_LNM_res)
woPlat_Surv_range = getRangeResults(woPlat_Surv_res)

wPlat_LNM_range = getRangeResults(wPlat_LNM_res)
wPlat_Surv_range = getRangeResults(wPlat_Surv_res)


Create a function to make a 3d matrix of metrics for each threshold

Pseudocode:
- Define getMetricsRange function:
    - Define thresholds
    - Create empty list
    - Metricnames
    - Loop through the thresholds
        - Get the metrics for the results
        - Append the metrics to the list
    - Concatenate the list
    - Return the list

In [None]:
def getMetricsRange(results, targets):
    # Define thresholds
    thresholds = [0.05, 0.10, 0.20, 0.30, 0.40, 0.50, 0.60 ,0.70, 0.80, 0.90]
    # Create empty list
    metrics = []
    # Metricnames
    metricnames = ["Accuracy", "Precision (PPV)", "TPR (Recall/Sens)","TNR (Spec)", "F1", "Brier", "Log Loss"]
    
    for threshold in thresholds:
        res = results[threshold].values
        
        metrics.append(round(getMetrics(res, targets).loc[metricnames],3))
    
    metrics = pd.concat(metrics, axis=1)
    metrics.columns = thresholds
    return metrics

Get the metrics for the range results

In [None]:
org_LNM_metrics_range = getMetricsRange(org_LNM_range, targetLNM)
org_Surv_metrics_range = getMetricsRange(org_Surv_range, targetSurv)

noTCGA_LNM_metrics_range = getMetricsRange(noTCGA_LNM_range, targetLNM)
noTCGA_Surv_metrics_range = getMetricsRange(noTCGA_Surv_range, targetSurv)

noMRI_LNM_metrics_range = getMetricsRange(noMRI_LNM_range, targetLNM)
noMRI_Surv_metrics_range = getMetricsRange(noMRI_Surv_range, targetSurv)

woPlat_LNM_metrics_range = getMetricsRange(woPlat_LNM_range, targetLNM)
woPlat_Surv_metrics_range = getMetricsRange(woPlat_Surv_range, targetSurv)

wPlat_LNM_metrics_range = getMetricsRange(wPlat_LNM_range, targetLNM)
wPlat_Surv_metrics_range = getMetricsRange(wPlat_Surv_range, targetSurv)


Plot the recall, precision curve

In [None]:
# Recall vs Precision
fig, ax = plt.subplots(1,2, figsize=(15,5))

fig.suptitle("Precision against Recall", fontsize=16) # Recall on x

# LNM
ax[0].plot(org_LNM_metrics_range.loc["TPR (Recall/Sens)"], org_LNM_metrics_range.loc["Precision (PPV)"], color='b', lw=2, label=f'Original')
ax[0].plot(noTCGA_LNM_metrics_range.loc["TPR (Recall/Sens)"], noTCGA_LNM_metrics_range.loc["Precision (PPV)"], color='g', lw=2, label=f'-Plat -TCGA')
ax[0].plot(noMRI_LNM_metrics_range.loc["TPR (Recall/Sens)"], noMRI_LNM_metrics_range.loc["Precision (PPV)"], color='y', lw=2, label=f'+Plat -TCGA')
ax[0].plot(woPlat_LNM_metrics_range.loc["TPR (Recall/Sens)"], woPlat_LNM_metrics_range.loc["Precision (PPV)"], color='m', lw=2, label=f'-Plat +TCGA')
ax[0].plot(wPlat_LNM_metrics_range.loc["TPR (Recall/Sens)"], wPlat_LNM_metrics_range.loc["Precision (PPV)"], color='m', lw=2, label=f'+Plat +TCGA')

ax[0].set(title="LNM")
ax[0].legend(loc="upper right")

# Survival
ax[1].plot(org_Surv_metrics_range.loc["TPR (Recall/Sens)"], org_Surv_metrics_range.loc["Precision (PPV)"], color='b', lw=2, label=f'Original')
ax[1].plot(noTCGA_Surv_metrics_range.loc["TPR (Recall/Sens)"], noTCGA_Surv_metrics_range.loc["Precision (PPV)"], color='g', lw=2, label=f'-Plat -TCGA')
ax[1].plot(noMRI_Surv_metrics_range.loc["TPR (Recall/Sens)"], noMRI_Surv_metrics_range.loc["Precision (PPV)"], color='y', lw=2, label=f'+Plat -TCGA')
ax[1].plot(woPlat_Surv_metrics_range.loc["TPR (Recall/Sens)"], woPlat_Surv_metrics_range.loc["Precision (PPV)"], color='m', lw=2, label=f'-Plat +TCGA')
ax[1].plot(wPlat_Surv_metrics_range.loc["TPR (Recall/Sens)"], wPlat_Surv_metrics_range.loc["Precision (PPV)"], color='m', lw=2, label=f'+Plat +TCGA')

ax[1].set(title="Survival")
ax[1].legend(loc="upper right")
# x and y labels
ax[0].set_xlabel("Precision")
ax[0].set_ylabel("Recall")
ax[1].set_xlabel("Precision")
ax[1].set_ylabel("Recall")

Plot recall vs threshold

In [None]:
# Recall vs threshold
fig, ax = plt.subplots(1,2, figsize=(15,5))

fig.suptitle("Recall", fontsize=16)
# Threshold on x axis
ax[0].plot(org_LNM_metrics_range.columns, org_LNM_metrics_range.loc["TPR (Recall/Sens)"], color='b', lw=2, label=f'Original')

ax[0].plot(noTCGA_LNM_metrics_range.columns, noTCGA_LNM_metrics_range.loc["TPR (Recall/Sens)"], color='g', lw=2, label=f'-Plat -TCGA')

ax[0].plot(noMRI_LNM_metrics_range.columns, noMRI_LNM_metrics_range.loc["TPR (Recall/Sens)"], color='y', lw=2, label=f'+Plat -TCGA')

ax[0].plot(woPlat_LNM_metrics_range.columns, woPlat_LNM_metrics_range.loc["TPR (Recall/Sens)"], color='m', lw=2, label=f'-Plat +TCGA')

ax[0].plot(wPlat_LNM_metrics_range.columns, wPlat_LNM_metrics_range.loc["TPR (Recall/Sens)"], color='m', lw=2, label=f'+Plat +TCGA')

ax[0].set_xticks([0.05, 0.10, 0.20, 0.30, 0.40, 0.50, 0.60 ,0.70, 0.80, 0.90])
ax[0].set_xticklabels([0.05, 0.10, 0.20, 0.30, 0.40, 0.50, 0.60 ,0.70, 0.80, 0.90])
ax[0].set(title="LNM")
ax[0].legend(loc="lower left")

ax[1].plot(org_Surv_metrics_range.columns, org_Surv_metrics_range.loc["TPR (Recall/Sens)"], color='b', lw=2, label=f'Original')

ax[1].plot(noTCGA_Surv_metrics_range.columns, noTCGA_Surv_metrics_range.loc["TPR (Recall/Sens)"], color='g', lw=2, label=f'-Plat -TCGA')

ax[1].plot(noMRI_Surv_metrics_range.columns, noMRI_Surv_metrics_range.loc["TPR (Recall/Sens)"], color='y', lw=2, label=f'+Plat -TCGA')

ax[1].plot(woPlat_Surv_metrics_range.columns, woPlat_Surv_metrics_range.loc["TPR (Recall/Sens)"], color='m', lw=2, label=f'-Plat +TCGA')

ax[1].plot(wPlat_Surv_metrics_range.columns, wPlat_Surv_metrics_range.loc["TPR (Recall/Sens)"], color='m', lw=2, label=f'+Plat +TCGA')
# set xticks
ax[1].set_xticks([0.05, 0.10, 0.20, 0.30, 0.40, 0.50, 0.60 ,0.70, 0.80, 0.90])
ax[1].set_xticklabels([0.05, 0.10, 0.20, 0.30, 0.40, 0.50, 0.60 ,0.70, 0.80, 0.90])
ax[1].set(title="Survival")
ax[1].legend(loc="lower left")
# x and y labels
ax[0].set_xlabel("Threshold")
ax[0].set_ylabel("Recall")
ax[1].set_xlabel("Threshold")
ax[1].set_ylabel("Recall")


Plot precision vs threshold

In [None]:
# Precision vs threshold (threshold on x axis)
fig, ax = plt.subplots(1,2, figsize=(15,5))

fig.suptitle("Precision", fontsize=16)

# LNM
ax[0].plot(org_LNM_metrics_range.columns, org_LNM_metrics_range.loc["Precision (PPV)"], color='b', lw=2, label=f'Original')
ax[0].plot(noTCGA_LNM_metrics_range.columns, noTCGA_LNM_metrics_range.loc["Precision (PPV)"], color='g', lw=2, label=f'-Plat -TCGA')
ax[0].plot(noMRI_LNM_metrics_range.columns, noMRI_LNM_metrics_range.loc["Precision (PPV)"], color='y', lw=2, label=f'+Plat -TCGA')
ax[0].plot(woPlat_LNM_metrics_range.columns, woPlat_LNM_metrics_range.loc["Precision (PPV)"], color='m', lw=2, label=f'-Plat +TCGA')
ax[0].plot(wPlat_LNM_metrics_range.columns, wPlat_LNM_metrics_range.loc["Precision (PPV)"], color='m', lw=2, label=f'+Plat +TCGA')

ax[0].set_xticks([0.05, 0.10, 0.20, 0.30, 0.40, 0.50, 0.60 ,0.70, 0.80, 0.90])
ax[0].set_xticklabels([0.05, 0.10, 0.20, 0.30, 0.40, 0.50, 0.60 ,0.70, 0.80, 0.90])
ax[0].set(title="LNM")
ax[0].legend(loc="upper left")


# Survival
ax[1].plot(org_Surv_metrics_range.columns, org_Surv_metrics_range.loc["Precision (PPV)"], color='b', lw=2, label=f'Original')
ax[1].plot(noTCGA_Surv_metrics_range.columns, noTCGA_Surv_metrics_range.loc["Precision (PPV)"], color='g', lw=2, label=f'-Plat -TCGA')
ax[1].plot(noMRI_Surv_metrics_range.columns, noMRI_Surv_metrics_range.loc["Precision (PPV)"], color='y', lw=2, label=f'+Plat -TCGA')
ax[1].plot(woPlat_Surv_metrics_range.columns, woPlat_Surv_metrics_range.loc["Precision (PPV)"], color='m', lw=2, label=f'-Plat +TCGA')
ax[1].plot(wPlat_Surv_metrics_range.columns, wPlat_Surv_metrics_range.loc["Precision (PPV)"], color='m', lw=2, label=f'+Plat +TCGA')

ax[1].set_xticks([0.05, 0.10, 0.20, 0.30, 0.40, 0.50, 0.60 ,0.70, 0.80, 0.90])
ax[1].set_xticklabels([0.05, 0.10, 0.20, 0.30, 0.40, 0.50, 0.60 ,0.70, 0.80, 0.90])
ax[1].set(title="Survival")
ax[1].legend(loc="upper left")
# x and y labels
ax[0].set_xlabel("Threshold")
ax[0].set_ylabel("Precision")
ax[1].set_xlabel("Threshold")
ax[1].set_ylabel("Precision")

Plot the decision curves

In [None]:
# Decision curve analysis
from dcurves import dca, plot_graphs
import pandas as pd
import numpy as np
import statsmodels.api as sm
import lifelines

In [None]:
import glasbey

# Concatenate all model results with targets for the DCA
LNM_DCA = pd.concat([targetLNM, org_LNM_res, noTCGA_LNM_res, noMRI_LNM_res, wPlat_LNM_res, wPlat_all_LNM_res], axis=1)
LNM_DCA.columns = ["Target", "-Plat -TCGA -MRI", "-Plat -TCGA +MRI", "-Plat +TCGA -MRI", "+Plat -TCGA -MRI", "+Plat +TCGA +MRI"]

dca_multi_LNM = \
    dca(
        data = LNM_DCA, 
        outcome = "Target", 
        modelnames = ["-Plat -TCGA -MRI", "-Plat -TCGA +MRI", "-Plat +TCGA -MRI", "+Plat -TCGA -MRI", "+Plat +TCGA +MRI"],
        thresholds = np.arange(0, 0.30,0.02),
    )

# Select only the models from the last two dcas
figure = plt.figure(figsize=(10, 7))
plt.rcParams.update({'font.size': 10})
plt.rcParams.update({'legend.loc': 'upper right', 'legend.borderaxespad': 0})

palette = glasbey.create_palette(palette_size=7, colorblind_safe=True)


plot_graphs(
    plot_df=pd.concat([dca_multi_LNM]),
    y_limits=[-0.005, 0.1],

    graph_type="net_benefit",
    color_names=palette
)