In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from random import randint, sample
from scipy.interpolate import interp1d
from pickle import dump, load

woi = "preseizure1"

# from google.colab import drive
# drive.mount('/content/gdrive')
# main_folder = "/content/gdrive/My Drive/epigame-folder/game_vKarla/"

# main_folder = "/media/kivi/ADATA HV100/epigame-folder/game_vKarla"

In [None]:
conn_measures = ['PAC', 'SCR-(0,4)', 'SCI-(0,4)', 'PLV-(0,4)', 'PLI-(0,4)', 'CC-(0,4)', 'SCR-(4,8)', 'SCI-(4,8)', 'PLV-(4,8)', 'PLI-(4,8)', 'CC-(4,8)', 'SCR-(8,13)', 'SCI-(8,13)', 'PLV-(8,13)', 'PLI-(8,13)', 'CC-(8,13)', 'SCR-(13,30)', 'SCI-(13,30)', 'PLV-(13,30)', 'PLI-(13,30)', 'CC-(13,30)', 'SCR-(30,70)', 'SCI-(30,70)', 'PLV-(30,70)', 'PLI-(30,70)', 'CC-(30,70)', 'SCR-(70,150)', 'SCI-(70,150)', 'PLV-(70,150)', 'PLI-(70,150)', 'CC-(70,150)']

In [None]:
def to_labels(pos_probs, threshold):
    # function to map all values >=threshold to 1 and all values <threshold to 0

	return list((pos_probs >= threshold).astype('int'))


def moving_thresh_auc(predictive_measure=[], outcome=[], moving_step=0.00001):
    # returns AUC, best threshold, true negatives and true positives at the best threshold

    thresholds = np.arange(0, np.max(predictive_measure), moving_step)

    g = np.array([pm for i,pm in enumerate(predictive_measure) if outcome[i]=="good"])
    b = np.array([pm for i,pm in enumerate(predictive_measure) if outcome[i]=="bad"])

    A, A_top = 0, 0
    T = 0
    tp_top, tn_top = 0, 0
    step = 0
    for t in thresholds:
        g_l, b_l = to_labels(g, t), to_labels(b, t)
        tp = sum(g_l)/14
        tn = b_l.count(0)/7
        A = (tp + tn)/2
        if A>A_top:
            step=0
            A_top=A
            T=t
            tn_top,tp_top=tn,tp
        elif A==A_top: step+=moving_step

    return (A_top, T, tn_top, tp_top)

In [None]:
import seaborn as sns
# !pip install statannot
from statannot import add_stat_annotation

from scipy.stats import shapiro, mannwhitneyu, ttest_ind

# Do not print out pyplot outputs
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "last_expr"

Sigma, CM, Summary_stat, Gauss, Pvalue_Shapiro, Test, Pvalue, MAUC, T, TN, TP = [],[],[],[],[],[],[],[],[],[],[]

for sigma in [4]:

    load_data = pd.read_excel(main_folder+f"surgical_outcome_data_{sigma}sigma.xlsx", engine='openpyxl')
    
    summary_stat_label = ["average", "mm", "coefvar", "max", "random"]

    load_data["Mean_overlap_ratio"].fillna(0, inplace=True) # if 0 winners, nan is saved, so replace it with 0

    for cm in conn_measures:

        for summary_stat in summary_stat_label:
            data = load_data.groupby("CM").get_group(cm).groupby("Strategy").get_group(summary_stat)
            # print(data.dtypes)

            x_plot, y_plot = list(data.Mean_overlap_ratio), ["good" if val==1 else "bad" for val in data["Outcome"]]

            # calculate moving threshold-based AUC
            mauc = moving_thresh_auc(x_plot, y_plot, moving_step=0.00001)

            group1 = [x for i,x in enumerate(x_plot) if y_plot[i]=="good"]
            group0 = [x for i,x in enumerate(x_plot) if y_plot[i]=="bad"]

            gaussian, stest = False, ''
            stat1, p1 = shapiro(group1)
            stat0, p0 = shapiro(group0)

            pvalue = 1
            if p1 > 0.05 or p0 > 0.05: _, pvalue = mannwhitneyu(group1, group0, alternative='two-sided'); stest = 'Mann-Whitney'; gaussian = True
            elif p1 <= 0.05 and p0 <= 0.05: _, pvalue = ttest_ind(group1, group0, equal_var = False); stest = 't-test_welch'

            g = sns.catplot(data=data, x='Outcome', y='Mean_overlap_ratio', order=[1,0], kind="box", hue="Outcome",
                            palette=["#FFA7A0", "#ABEAC9"],
                            height=4, aspect=.7);
            g.map_dataframe(sns.stripplot, x='Outcome', y='Mean_overlap_ratio', order=[1,0], hue="Outcome",
                            palette=["#404040","#404040"],
                            alpha=0.6, dodge=True);

            p_annot = add_stat_annotation(g.axes[0][0], data=data, x=x_plot, y=y_plot, order=["good","bad"],
                            box_pairs=[("good","bad")],
                            test=stest, text_format='star', loc='outside', verbose=0);

            pvalue_statannot = p_annot[1][0].__dict__["pval"]
            if pvalue<=0.05:
                print("P-value computed by statannot =", pvalue_statannot)
                print(stest)
                plt.title(f"Sigma = {sigma}; Measure = {cm}; summary_stat = {summary_stat}; p-value = {pvalue}; AUC = {mauc[0]}", pad=50)
                plt.show()

            plt.close('all')

            Sigma.append(sigma)
            CM.append(cm)
            Summary_stat.append(summary_stat)
            Gauss.append(gaussian)
            Pvalue_Shapiro.append((p0,p1))
            Test.append(stest)
            Pvalue.append(pvalue)
            MAUC.append(mauc[0])
            T.append(mauc[1])
            TN.append(mauc[2])
            TP.append(mauc[3])

outcome_pred = pd.DataFrame({"CM":CM, "Summary_stat":summary_stat, "Sigma":Sigma,
                                     "Gauss":Gauss, "Pvalue-Shapiro":Pvalue_Shapiro,
                                     "Test":Test, "Pvalue":Pvalue,
                                     "MAUC":MAUC, "T":T, "TN":TN, "TP":TP})
outcome_pred.to_excel(main_folder + f"surgical_outcome_prediction_game_vKarla.xlsx")
pass;

The code below checks if there are any zero Mean_overlap_ratio in the data. If yes, it fetches the value from the same index in the previous sigma file (sigma-1) and continues with the analysis. If a zero is found in row `zero_row_index` in the current sigma file, it replaces the corresponding value with the one from the previous sigma file.


In [None]:
# !pip install statannot

import pandas as pd
import seaborn as sns
from statannot import add_stat_annotation
from scipy.stats import shapiro, mannwhitneyu, ttest_ind
from itertools import combinations

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "last_expr"
import matplotlib.pyplot as plt

In [None]:
sigma = 4
while sigma > 0:
    load_data = pd.read_excel(main_folder + f"surgical_outcome_data_{sigma}sigma.xlsx", engine='openpyxl')

    # Check if "Mean_overlap_ratio" contains any zeros
    zero_row_indices = load_data.index[load_data["Mean_overlap_ratio"] == 0].tolist()

    if zero_row_indices:
        print(f"Zeros found in rows {zero_row_indices} for sigma={sigma}")
        if sigma == 1:
            print("Skipping iteration for sigma=1 with zeros")
            break

        sigma -= 1
        prev_load_data = pd.read_excel(main_folder + f"surgical_outcome_data_{sigma}sigma.xlsx", engine='openpyxl')

        for zero_row_index in zero_row_indices:
            zero_value = prev_load_data.loc[zero_row_index, "Mean_overlap_ratio"]
            load_data.loc[zero_row_index, "Mean_overlap_ratio"] = zero_value
    else:
        break  # No zeros found, exit the loop



In [None]:
CM, Summary_stat, Gauss, Pvalue_Shapiro, Test, Pvalue, MAUC, T, TN, TP = [], [], [], [], [], [], [], [], [], []

summary_stat_label = ["average", "mm", "coefvar", "max", "random"]

for cm in conn_measures:
    for summary_stat in summary_stat_label:
        data = load_data.groupby("CM").get_group(cm).groupby("Strategy").get_group(summary_stat)

        x_plot, y_plot = list(data.Mean_overlap_ratio), ["good" if val == 1 else "bad" for val in data["Outcome"]]

        mauc = moving_thresh_auc(x_plot, y_plot, moving_step=0.0001)

        group1 = [x for i, x in enumerate(x_plot) if y_plot[i] == "good"]
        group0 = [x for i, x in enumerate(x_plot) if y_plot[i] == "bad"]

        gaussian, stest = False, ''
        stat1, p1 = shapiro(group1)
        stat0, p0 = shapiro(group0)

        pvalue = 1
        if p1 > 0.05 or p0 > 0.05:
            _, pvalue = mannwhitneyu(group1, group0, alternative='two-sided')
            stest = 'Mann-Whitney'
            gaussian = True
        elif p1 <= 0.05 and p0 <= 0.05:
            _, pvalue = ttest_ind(group1, group0, equal_var=False)
            stest = 't-test_welch'

        g = sns.catplot(data=data, x='Outcome', y='Mean_overlap_ratio', order=[1, 0], kind="box", hue="Outcome",
                        palette=["#FFA7A0", "#ABEAC9"],
                        height=4, aspect=.7)
        g.map_dataframe(sns.stripplot, x='Outcome', y='Mean_overlap_ratio', order=[1, 0], hue="Outcome",
                        palette=["#404040", "#404040"],
                        alpha=0.6, dodge=True)

        p_annot = add_stat_annotation(g.axes[0][0], data=data, x=x_plot, y=y_plot, order=["good", "bad"],
                                      box_pairs=[("good", "bad")],
                                      test=stest, text_format='star', loc='outside', verbose=0)

        pvalue_statannot = p_annot[1][0].__dict__["pval"]
        if pvalue <= 0.05:
            print("P-value computed by statannot =", pvalue_statannot)
            print(stest)
            plt.title(f"Measure = {cm}; summary_stat = {summary_stat}; p-value = {pvalue}; AUC = {mauc[0]}", pad=50)
            plt.show()

        plt.close('all')

        Sigma.append(sigma)
        CM.append(cm)
        Summary_stat.append(summary_stat)
        Gauss.append(gaussian)
        Pvalue_Shapiro.append((p0, p1))
        Test.append(stest)
        Pvalue.append(pvalue)
        MAUC.append(mauc[0])
        T.append(mauc[1])
        TN.append(mauc[2])
        TP.append(mauc[3])

outcome_pred = pd.DataFrame({"CM": CM, "Summary_stat": summary_stat,
                          "Gauss": Gauss, "Pvalue-Shapiro": Pvalue_Shapiro,
                          "Test": Test, "Pvalue": Pvalue,
                          "MAUC": MAUC, "T": T, "TN": TN, "TP": TP})
outcome_pred.to_excel(main_folder + f"surgical_outcome_prediction_game_vKarla_sigma_recursive.xlsx")
pass;

Combinations of 2 connectivity measures.

In [None]:
CM, Summary_stat, Gauss, Pvalue_Shapiro, Test, Pvalue, MAUC, T, TN, TP = [], [], [], [], [], [], [], [], [], []

summary_stat_label = ["average", "mm", "coefvar", "max", "random"]

for cm in combinations(conn_measures, 2): # combinations of scores based on different connectivity measures
    cm1,cm2 = cm[0],cm[1]
    # print(cm1,cm2)

    for summary_stat in summary_stat_label:

        data1 = load_data.groupby("CM").get_group(cm1).groupby("Strategy").get_group(summary_stat)
        data2 = load_data.groupby("CM").get_group(cm2).groupby("Strategy").get_group(summary_stat)

        # the combination is made as a mean value between two scores
        x_plot = [np.mean([val, list(data2.Mean_overlap)[i]]) for i,val in enumerate(data1.Mean_overlap)]
        y_plot = ["good" if val==1 else "bad" for val in data1["Outcome"]]
        
        data = pd.DataFrame({"Mean_overlap":x_plot,
                                "Outcome":y_plot})

        # calculate moving threshold-based AUC
        mauc = moving_thresh_auc(x_plot, y_plot, moving_step=0.0001)

        group1 = [x for i,x in enumerate(x_plot) if y_plot[i]=="good"]
        group0 = [x for i,x in enumerate(x_plot) if y_plot[i]=="bad"]

        gaussian, stest = False, ''
        stat1, p1 = shapiro(group1)
        stat0, p0 = shapiro(group0)

        pvalue = 1
        if p1 > 0.05 or p0 > 0.05: _, pvalue = mannwhitneyu(group1, group0, alternative='two-sided'); stest = 'Mann-Whitney'; gaussian = True
        elif p1 <= 0.05 and p0 <= 0.05: _, pvalue = ttest_ind(group1, group0, equal_var = False); stest = 't-test_welch'

        g = sns.catplot(data=data, x='Outcome', y='Mean_overlap', order=["good","bad"], kind="box", hue="Outcome",
                        palette=["#ABEAC9", "#FFA7A0"],
                        height=4, aspect=.7);
        g.map_dataframe(sns.stripplot, x='Outcome', y='Mean_overlap', order=["good","bad"], hue="Outcome",
                        palette=["#404040","#404040"],
                        alpha=0.6, dodge=True);

        p_annot = add_stat_annotation(g.axes[0][0], data=data, x=x_plot, y=y_plot, order=["good","bad"],
                        box_pairs=[("good","bad")],
                        test=stest, text_format='star', loc='outside', verbose=0);

        pvalue_statannot = p_annot[1][0].__dict__["pval"]
        if pvalue<=0.05:
            print("P-value computed by statannot =", pvalue_statannot)
            print(stest)
            plt.title(f"Measures = {(cm1, cm2)}; p-value = {pvalue:.4f}; AUC = {mauc[0]:.2f}", pad=50)
            plt.show()

        plt.close('all')

        CM.append((cm1,cm2))
        Summary_stat.append(summary_stat)
        Gauss.append(gaussian)
        Pvalue_Shapiro.append((p0, p1))
        Test.append(stest)
        Pvalue.append(pvalue)
        MAUC.append(mauc[0])
        T.append(mauc[1])
        TN.append(mauc[2])
        TP.append(mauc[3])

outcome_pred = pd.DataFrame({"CM": CM, "Summary_stat": summary_stat,
                          "Gauss": Gauss, "Pvalue-Shapiro": Pvalue_Shapiro,
                          "Test": Test, "Pvalue": Pvalue,
                          "MAUC": MAUC, "T": T, "TN": TN, "TP": TP})

outcome_pred.to_excel(f"surgical_outcome_prediction_game_vKarla_sigma_recursive_comb2.xlsx")
pass;

Combinations of 3 connectivity measures.

In [None]:
CM, Summary_stat, Gauss, Pvalue_Shapiro, Test, Pvalue, MAUC, T, TN, TP = [], [], [], [], [], [], [], [], [], []

summary_stat_label = ["average", "mm", "coefvar", "max", "random"]

for cm in combinations(conn_measures, 3): # combinations of scores based on different connectivity measures
    cm1,cm2,cm3 = cm[0],cm[1],cm[2]
    # print(cm1,cm2,cm3)

    for summary_stat in summary_stat_label:

        data1 = load_data.groupby("CM").get_group(cm1).groupby("Strategy").get_group(summary_stat)
        data2 = load_data.groupby("CM").get_group(cm2).groupby("Strategy").get_group(summary_stat)
        data3 = load_data.groupby("CM").get_group(cm3).groupby("Strategy").get_group(summary_stat)

        # the combination is made as a mean value between two scores
        x_plot = [np.mean([val, list(data2.Mean_overlap)[i], list(data3.Mean_overlap)[i]]) for i,val in enumerate(data1.Mean_overlap)]
        y_plot = ["good" if val==1 else "bad" for val in data1["Outcome"]]
        
        data = pd.DataFrame({"Mean_overlap":x_plot,
                                "Outcome":y_plot})

        # calculate moving threshold-based AUC
        mauc = moving_thresh_auc(x_plot, y_plot, moving_step=0.0001)

        group1 = [x for i,x in enumerate(x_plot) if y_plot[i]=="good"]
        group0 = [x for i,x in enumerate(x_plot) if y_plot[i]=="bad"]

        gaussian, stest = False, ''
        stat1, p1 = shapiro(group1)
        stat0, p0 = shapiro(group0)

        pvalue = 1
        if p1 > 0.05 or p0 > 0.05: _, pvalue = mannwhitneyu(group1, group0, alternative='two-sided'); stest = 'Mann-Whitney'; gaussian = True
        elif p1 <= 0.05 and p0 <= 0.05: _, pvalue = ttest_ind(group1, group0, equal_var = False); stest = 't-test_welch'

        g = sns.catplot(data=data, x='Outcome', y='Mean_overlap', order=["good","bad"], kind="box", hue="Outcome",
                        palette=["#ABEAC9", "#FFA7A0"],
                        height=4, aspect=.7);
        g.map_dataframe(sns.stripplot, x='Outcome', y='Mean_overlap', order=["good","bad"], hue="Outcome",
                        palette=["#404040","#404040"],
                        alpha=0.6, dodge=True);

        p_annot = add_stat_annotation(g.axes[0][0], data=data, x=x_plot, y=y_plot, order=["good","bad"],
                        box_pairs=[("good","bad")],
                        test=stest, text_format='star', loc='outside', verbose=0);

        pvalue_statannot = p_annot[1][0].__dict__["pval"]
        if pvalue<=0.05:
            print("P-value computed by statannot =", pvalue_statannot)
            print(stest)
            plt.title(f"Measures = {(cm1, cm2)}; p-value = {pvalue:.4f}; AUC = {mauc[0]:.2f}", pad=50)
            plt.show()

        plt.close('all')

        CM.append((cm1,cm2,cm3))
        Summary_stat.append(summary_stat)
        Gauss.append(gaussian)
        Pvalue_Shapiro.append((p0, p1))
        Test.append(stest)
        Pvalue.append(pvalue)
        MAUC.append(mauc[0])
        T.append(mauc[1])
        TN.append(mauc[2])
        TP.append(mauc[3])

outcome_pred = pd.DataFrame({"CM": CM, "Summary_stat": summary_stat,
                          "Gauss": Gauss, "Pvalue-Shapiro": Pvalue_Shapiro,
                          "Test": Test, "Pvalue": Pvalue,
                          "MAUC": MAUC, "T": T, "TN": TN, "TP": TP})

outcome_pred.to_excel(f"surgical_outcome_prediction_game_vKarla_sigma_recursive_comb3.xlsx")
pass;