In [1]:
from tensorflow.python.summary.summary_iterator import summary_iterator
import matplotlib.pylab as plt
import glob
import os
import numpy as np
import pandas as pd 
from scipy import stats
import json
from sklearn.metrics import roc_auc_score

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
def test_auc(tf_path):
    """Take a tf output path and create a list of values for the validtion auc of an experement"""
    auc = []
    for event in summary_iterator(tf_path):
        for value in event.summary.value:
            if value.tag == "final/test_auc":
                auc.append(value.simple_value)
    return auc

def get_auc_scores_from_file(root_dir):
    try:
        if isinstance(root_dir, str):
            all_aucs = []
            file_extension = "*.gpu1"

            joined_dir = "/mnt/ncshare/ozkilim/BRCA/results/TCGA_train_HGSOC_TEST_early_fusion/" + root_dir
            # Recursively traverse the directory and its subdirectories
            for subdir, _, files in os.walk(joined_dir):
                # Use glob to find all files with the specified extension
                for file in glob.glob(os.path.join(subdir, file_extension)):
                    auc = test_auc(file)
                    all_aucs.append(auc[0])
        else:
            all_aucs = root_dir.copy()
        
    except:
        all_aucs =  np.array([0,0,0,0,0,0,0,0,0])

    
    return all_aucs


def get_ensemble_auc_scores_from_files(root_dirs):
    '''Load up lost of root dirs actualy preds and make ensemble before returning ensemble AUC'''
    # loop over each split pred set. 
    all_aucs = []
    for i in range(5):
        accumulated_prob = None
        labels = None
        for root_dir in root_dirs: 
            file = "/mnt/ncshare/ozkilim/BRCA/results/results_TCGA_HGSOC_multimodal/" + root_dir + "/split_"+ str(i) +"_results.pkl"
            #read the pkl 
            df = pd.read_pickle(file)
            data = []
            for key, value in df.items():
                slide_id = value['slide_id']
                prob = value['prob'][0][1]
                label = value['label']
                data.append({'slide_id': slide_id, 'prob': prob, 'label': label})
            # Creating DataFrame
            df = pd.DataFrame(data)

            # Check and store the 'label' values
            if labels is None:
                labels = df['label']
            elif not df['label'].equals(labels):
                raise ValueError("Inconsistent 'label' values across dataframes")
            # Accumulate 'prob' values
            if accumulated_prob is None:
                accumulated_prob = df['prob']
            else:
                accumulated_prob += df['prob']
                
        # Average the accumulated probabilities
        ensemble_prob = accumulated_prob / len(root_dirs)
        # Calculate AUC
        auc = roc_auc_score(labels, ensemble_prob)
        all_aucs.append(auc)
    
    return all_aucs


def get_eval_results(eval_path):

    joined_dir = "/mnt/ncshare/ozkilim/BRCA/eval_results/" + eval_path
    try:
        all_aucs = pd.read_csv(joined_dir)
        all_aucs = all_aucs["test_auc"]
    except:
        return np.array([0,0,0,0,0,0,0,0,0])
    
    return all_aucs


def process_auc_files(results_dict):
    # Initialize an empty DataFrame to store all AUC scores
    # Loop through each file and extract AUC scores
    all_auc_scores = []
    # loop overdict...  

    items_list = list(results_dict)
    # Loop over the dictionary by index
    for i in range(len(items_list)):

        if "ENSEMBLE" in list(results_dict[i].keys())[0]:

            get_ensemble_auc_scores_from_files(list(results_dict[i].values())[0][0])
        
            category = list(results_dict[i].values())[0][1]
            embedder = list(results_dict[i].values())[0][2]

            mean_auc = round(np.mean(aucs),3)
            std = round(np.std(aucs),3)
        else:
                    
            aucs = get_auc_scores_from_file(list(results_dict[i].values())[0][0]) #always spulls same key
            category = list(results_dict[i].values())[0][1]
            embedder = list(results_dict[i].values())[0][2]

            # get stats here... 
            mean_auc = round(np.mean(aucs),3)
            std = round(np.std(aucs),3)

        row = {"model":list(results_dict[i].keys())[0], "TCGA":mean_auc.astype(str)+"±"+std.astype(str), "category":category,"embedder":embedder}

        all_auc_scores.append(row)
    
    all_auc_scores = pd.DataFrame(all_auc_scores)


    # Sort the DataFrame based on 'Mean AUC' in descending order
    sorted_auc_summary = all_auc_scores.sort_values(by='TCGA', ascending=True)
    
    return sorted_auc_summary


In [44]:
tissue_type = "primary"

# load ensemble results...

# need baseline omics scroes.
with open('TCGA_classical_omics_results_metastatic.json', 'r') as json_file:
    prots_60_ensemble = json.load(json_file)


#import results data for all runs.
results_dict = [{"64 protein ensemble \cite{chowdhury2023proteogenomic}":[prots_60_ensemble["metastatic_aucs"],"Omics","Lunit-Dino"]},

                {"clam\_sb \cite{lu2021data}":["TCGA_HGSOC_clam_sb_ViT_"+tissue_type+"_s1","WSI","Lunit-Dino"]},
                {"clam\_sb \cite{lu2021data}" :["TCGA_HGSOC_clam_sb_OV_ViT_"+tissue_type+"_s1","WSI","OV-Dino"]},
                {"clam\_sb \cite{lu2021data}" :["TCGA_HGSOC_clam_sb_CTransPath_"+tissue_type+"_s1","WSI","CTransPath"]},


                {"PorpoiseMMF \cite{lu2021data}":["TCGA_HGSOC_PorpoiseMMF_ViT_"+tissue_type+"_s1","Multimodal","Lunit-Dino"]},
                {"PorpoiseMMF \cite{lu2021data}" :["TCGA_HGSOC_PorpoiseMMF_OV_ViT_"+tissue_type+"_s1","Multimodal","OV-Dino"]},
                {"PorpoiseMMF \cite{lu2021data}" :["TCGA_HGSOC_PorpoiseMMF_CTransPath_"+tissue_type+"_s1","Multimodal","CTransPath"]},


                {"MCAT \cite{lu2021data}":["TCGA_HGSOC_MCAT_Surv_ViT_"+tissue_type+"_s1","Multimodal","Lunit-Dino"]},
                {"MCAT \cite{lu2021data}" :["TCGA_HGSOC_MCAT_Surv_OV_ViT_"+tissue_type+"_s1","Multimodal","OV-Dino"]},
                {"MCAT \cite{lu2021data}" :["TCGA_HGSOC_MCAT_Surv_CTransPath_"+tissue_type+"_s1","Multimodal","CTransPath"]},


                {"SurvPath \cite{lu2021data}":["TCGA_HGSOC_SurvPath_ViT_"+tissue_type+"_s1","Multimodal","Lunit-Dino"]},
                {"SurvPath \cite{lu2021data}" :["TCGA_HGSOC_SurvPath_OV_ViT_"+tissue_type+"_s1","Multimodal","OV-Dino"]},
                {"SurvPath \cite{lu2021data}" :["TCGA_HGSOC_SurvPath_CTransPath_"+tissue_type+"_s1","Multimodal","CTransPath"]},
                ]

# one column ... splits by tissue type?...after with EVAl... 

df = process_auc_files(results_dict)
df.head(30)

# maybe only primary needed as TCGA set is primary tumor sample?

Unnamed: 0,model,TCGA,category,embedder
2,clam\_sb \cite{lu2021data},0.484±0.018,WSI,OV-Dino
1,clam\_sb \cite{lu2021data},0.505±0.02,WSI,Lunit-Dino
3,clam\_sb \cite{lu2021data},0.526±0.016,WSI,CTransPath
0,64 protein ensemble \cite{chowdhury2023proteog...,0.565±0.023,Omics,Lunit-Dino
11,SurvPath \cite{lu2021data},0.622±0.038,Multimodal,OV-Dino
8,MCAT \cite{lu2021data},0.624±0.028,Multimodal,OV-Dino
9,MCAT \cite{lu2021data},0.62±0.032,Multimodal,CTransPath
7,MCAT \cite{lu2021data},0.633±0.033,Multimodal,Lunit-Dino
5,PorpoiseMMF \cite{lu2021data},0.686±0.03,Multimodal,OV-Dino
4,PorpoiseMMF \cite{lu2021data},0.691±0.035,Multimodal,Lunit-Dino


In [47]:

# Pivot the DataFrame
df_pivot = df.pivot_table(index=['model', 'category'], columns='embedder', values=['TCGA'], aggfunc='first').reset_index()
df_pivot.columns = [' '.join(col).strip() for col in df_pivot.columns.values]

def format_highest_values(df):
    for col in df.columns[2:]:
        highest_value = df[col].max()
        df[col] = df[col].apply(lambda x: f'\\textbf{{{x}}}' if x == highest_value else x)
    return df


# Function to generate LaTeX table
def generate_latex_table(df_pivot):
    # Start the table and add the header
    latex_str = "\\begin{table}[ht]\n\\centering\n\\begin{tabular}{cc|ccc}\n\\toprule\n"
    latex_str += " & \\multicolumn{1}{c}{Model} & \\multicolumn{3}{c}{TCGA} \\\\\n"
    latex_str += "\\midrule\n"
    latex_str += " &  & CTransPath \cite{wang2022transformer} & Lunit-Dino \cite{kang2023benchmarking} & OV-Dino (ours) \\\\\n"
    latex_str += "\\midrule\n"

    # Add rows from the DataFrame
    for category, group_df in df_pivot.groupby('category'):
        group_len = len(group_df)
        latex_str += f"\\multirow{{{group_len}}}{{*}}{{\\rotatebox[origin=c]{{90}}{{{category}}}}} \n"
        for _, row in group_df.iterrows():
            model = row['model']
            values = ' & '.join(str(x) for x in row[2:])
            latex_str += f" & {model} & {values} \\\\\n"
        latex_str += "\\midrule\n"

    latex_str += "\\bottomrule\n\\end{tabular}\n\\caption{Your caption here}\n\\end{table}"

    return latex_str

# Generate LaTeX table
latex_table = generate_latex_table(df_pivot)
print(latex_table)

\begin{table}[ht]
\centering
\begin{tabular}{cc|ccc}
\toprule
 & \multicolumn{1}{c}{Model} & \multicolumn{3}{c}{TCGA} \\
\midrule
 &  & CTransPath \cite{wang2022transformer} & Lunit-Dino \cite{kang2023benchmarking} & OV-Dino (ours) \\
\midrule
\multirow{4}{*}{\rotatebox[origin=c]{90}{Multimodal}} 
 & ENSEMBLE & 0.62±0.032 & 0.62±0.032 & 0.62±0.032 & nan \\
 & MCAT \cite{lu2021data} & 0.62±0.032 & 0.633±0.033 & 0.624±0.028 & 0.62±0.032 \\
 & PorpoiseMMF \cite{lu2021data} & 0.754±0.04 & 0.691±0.035 & 0.686±0.03 & 0.754±0.04 \\
 & SurvPath \cite{lu2021data} & 0.705±0.024 & 0.707±0.019 & 0.622±0.038 & 0.705±0.024 \\
\midrule
\multirow{1}{*}{\rotatebox[origin=c]{90}{Omics}} 
 & 64 protein ensemble \cite{chowdhury2023proteogenomic} & nan & 0.683±0.019 & nan & nan \\
\midrule
\multirow{1}{*}{\rotatebox[origin=c]{90}{WSI}} 
 & clam\_sb \cite{lu2021data} & 0.526±0.016 & 0.505±0.02 & 0.484±0.018 & 0.526±0.016 \\
\midrule
\bottomrule
\end{tabular}
\caption{Your caption here}
\end{table}


### Train on TCGA test on all primary smaples from HGSOC

In [48]:
tissue_type = "metastatic"


# load ensemble results..

# need baseline omics scroes.
with open('TCGA_train_HGSOC_Test_classical_omics_results_primary.json', 'r') as json_file:
    prots_60_ensemble = json.load(json_file)


# need to base at different DIR. 


#import results data for all runs.
results_dict = [
                {"64 protein ensemble \cite{chowdhury2023proteogenomic}":[prots_60_ensemble[tissue_type+"_aucs"],"Omics","Lunit-Dino"]},


                {"clam\_sb \cite{lu2021data}":["TCGA_HGSOC_clam_sb_ViT_"+tissue_type+"_s1","WSI","Lunit-Dino"]},
                {"clam\_sb \cite{lu2021data}" :["TCGA_HGSOC_clam_sb_OV_ViT_"+tissue_type+"_s1","WSI","OV-Dino"]},
                {"clam\_sb \cite{lu2021data}" :["TCGA_HGSOC_clam_sb_CTransPath_"+tissue_type+"_s1","WSI","CTransPath"]},
                {"clam\_sb \cite{lu2021data} ENSEMBLE" :[["TCGA_HGSOC_clam_sb_CTransPath_"+tissue_type+"_s1","TCGA_HGSOC_clam_sb_OV_ViT_"+tissue_type+"_s1","TCGA_HGSOC_clam_sb_ViT_"+tissue_type+"_s1"],"WSI","ensemble"]},


                {"PorpoiseMMF \cite{lu2021data}":["TCGA_HGSOC_PorpoiseMMF_ViT_"+tissue_type+"_s1","Multimodal","Lunit-Dino"]},
                {"PorpoiseMMF \cite{lu2021data}" :["TCGA_HGSOC_PorpoiseMMF_OV_ViT_"+tissue_type+"_s1","Multimodal","OV-Dino"]},
                {"PorpoiseMMF \cite{lu2021data}" :["TCGA_HGSOC_PorpoiseMMF_CTransPath_"+tissue_type+"_s1","Multimodal","CTransPath"]},
                {"PorpoiseMMF \cite{lu2021data} ENSEMBLE" :[["TCGA_HGSOC_PorpoiseMMF_ViT_"+tissue_type+"_s1","TCGA_HGSOC_PorpoiseMMF_OV_ViT_"+tissue_type+"_s1","TCGA_HGSOC_PorpoiseMMF_CTransPath_"+tissue_type+"_s1"],"Multimodal","ensemble"]},


                {"MCAT \cite{lu2021data}":["TCGA_HGSOC_MCAT_Surv_ViT_"+tissue_type+"_s1","Multimodal","Lunit-Dino"]},
                {"MCAT \cite{lu2021data}" :["TCGA_HGSOC_MCAT_Surv_OV_ViT_"+tissue_type+"_s1","Multimodal","OV-Dino"]},
                {"MCAT \cite{lu2021data}" :["TCGA_HGSOC_MCAT_Surv_CTransPath_"+tissue_type+"_s1","Multimodal","CTransPath"]},
                {"MCAT \cite{lu2021data} ENSEMBLE" :[["TCGA_HGSOC_MCAT_Surv_ViT_"+tissue_type+"_s1","TCGA_HGSOC_MCAT_Surv_OV_ViT_"+tissue_type+"_s1","TCGA_HGSOC_MCAT_Surv_CTransPath_"+tissue_type+"_s1"],"Multimodal","ensemble"]},


                #model ensemble set
                {"ENSEMBLE ENSEMBLE" :[["TCGA_HGSOC_PorpoiseMMF_ViT_"+tissue_type+"_s1","TCGA_HGSOC_MCAT_Surv_ViT_"+tissue_type+"_s1","TCGA_HGSOC_SurvPath_ViT_"+tissue_type+"_s1"],"Multimodal","Lunit-Dino"]},
                {"ENSEMBLE ENSEMBLE" :[["TCGA_HGSOC_PorpoiseMMF_OV_ViT_"+tissue_type+"_s1","TCGA_HGSOC_MCAT_Surv_OV_ViT_"+tissue_type+"_s1","TCGA_HGSOC_SurvPath_OV_ViT_"+tissue_type+"_s1"],"Multimodal","OV-Dino"]},
                {"ENSEMBLE ENSEMBLE" :[["TCGA_HGSOC_PorpoiseMMF_CTransPath_"+tissue_type+"_s1","TCGA_HGSOC_MCAT_Surv_CTransPath_"+tissue_type+"_s1","TCGA_HGSOC_SurvPath_CTransPath_"+tissue_type+"_s1"],"Multimodal","CTransPath"]},


                # {"MCAT 500PPI \cite{lu2021data}":["TCGA_HGSOC_MCAT_Surv_ViT_"+tissue_type+"_allprots_s1","Multimodal","Lunit-Dino"]},
                # {"MCAT 500PPI \cite{lu2021data}" :["TCGA_HGSOC_MCAT_Surv_OV_ViT_"+tissue_type+"_allprots_s1","Multimodal","OV-Dino"]},
                # {"MCAT 500PPI \cite{lu2021data}" :["TCGA_HGSOC_MCAT_Surv_CTransPath_"+tissue_type+"_allprots_s1","Multimodal","CTransPath"]},

                # {"SurvPath 500PPI \cite{lu2021data}":["TCGA_HGSOC_SurvPath_ViT_"+tissue_type+"_allprots_s1","Multimodal","Lunit-Dino"]},
                # {"SurvPath 500PPI \cite{lu2021data}" :["TCGA_HGSOC_SurvPath_OV_ViT_"+tissue_type+"_allprots_s1","Multimodal","OV-Dino"]},
                # {"SurvPath 500PPI \cite{lu2021data}" :["TCGA_HGSOC_SurvPath_CTransPath_"+tissue_type+"_allprots_s1","Multimodal","CTransPath"]},


                {"SurvPath \cite{lu2021data}":["TCGA_HGSOC_SurvPath_ViT_"+tissue_type+"_s1","Multimodal","Lunit-Dino"]},
                {"SurvPath \cite{lu2021data}" :["TCGA_HGSOC_SurvPath_OV_ViT_"+tissue_type+"_s1","Multimodal","OV-Dino"]},
                {"SurvPath \cite{lu2021data}" :["TCGA_HGSOC_SurvPath_CTransPath_"+tissue_type+"_s1","Multimodal","CTransPath"]},
                {"SurvPath \cite{lu2021data} ENSEMBLE" :[["TCGA_HGSOC_SurvPath_ViT_"+tissue_type+"_s1","TCGA_HGSOC_SurvPath_OV_ViT_"+tissue_type+"_s1","TCGA_HGSOC_SurvPath_CTransPath_"+tissue_type+"_s1"],"Multimodal","ensemble"]},

                ]


df = process_auc_files(results_dict)

substring_to_remove = ' ENSEMBLE'

# Remove the substring from every row in the column
df['model'] = df['model'].str.replace(substring_to_remove, '', regex=False)
df.head(30)

# make for each 

Unnamed: 0,model,TCGA,category,embedder
15,ENSEMBLE,0.408±0.044,Multimodal,CTransPath
14,ENSEMBLE,0.408±0.044,Multimodal,OV-Dino
13,ENSEMBLE,0.408±0.044,Multimodal,Lunit-Dino
11,MCAT \cite{lu2021data},0.408±0.044,Multimodal,CTransPath
12,MCAT \cite{lu2021data},0.408±0.044,Multimodal,ensemble
3,clam\_sb \cite{lu2021data},0.413±0.025,WSI,CTransPath
4,clam\_sb \cite{lu2021data},0.413±0.025,WSI,ensemble
1,clam\_sb \cite{lu2021data},0.443±0.01,WSI,Lunit-Dino
2,clam\_sb \cite{lu2021data},0.457±0.027,WSI,OV-Dino
17,SurvPath \cite{lu2021data},0.461±0.02,Multimodal,OV-Dino


In [42]:

# Pivot the DataFrame
df_pivot = df.pivot_table(index=['model', 'category'], columns='embedder', values=['TCGA'], aggfunc='first').reset_index()
df_pivot.columns = [' '.join(col).strip() for col in df_pivot.columns.values]

def format_highest_values(df):
    for col in df.columns[2:]:
        highest_value = df[col].max()
        df[col] = df[col].apply(lambda x: f'\\textbf{{{x}}}' if x == highest_value else x)
    return df


# Function to generate LaTeX table
def generate_latex_table(df_pivot):
    # Start the table and add the header
    latex_str = "\\begin{table}[ht]\n\\centering\n\\begin{tabular}{cc|cccc}\n\\toprule\n"
    latex_str += " & \\multicolumn{1}{c}{Model} & \\multicolumn{4}{c}{TCGA} \\\\\n"
    latex_str += "\\midrule\n"
    latex_str += " &  & CTransPath \cite{wang2022transformer} & Lunit-Dino \cite{kang2023benchmarking} & OV-Dino (ours) & ensemble \\\\\n"
    latex_str += "\\midrule\n"

    # Add rows from the DataFrame
    for category, group_df in df_pivot.groupby('category'):
        group_len = len(group_df)
        latex_str += f"\\multirow{{{group_len}}}{{*}}{{\\rotatebox[origin=c]{{90}}{{{category}}}}} \n"
        for _, row in group_df.iterrows():
            model = row['model']
            values = ' & '.join(str(x) for x in row[2:])
            latex_str += f" & {model} & {values} \\\\\n"
        latex_str += "\\midrule\n"

    latex_str += "\\bottomrule\n\\end{tabular}\n\\caption{Your caption here}\n\\end{table}"

    return latex_str

# Generate LaTeX table
latex_table = generate_latex_table(df_pivot)
print(latex_table)

\begin{table}[ht]
\centering
\begin{tabular}{cc|cccc}
\toprule
 & \multicolumn{1}{c}{Model} & \multicolumn{4}{c}{TCGA} \\
\midrule
 &  & CTransPath \cite{wang2022transformer} & Lunit-Dino \cite{kang2023benchmarking} & OV-Dino (ours) & ensemble \\
\midrule
\multirow{4}{*}{\rotatebox[origin=c]{90}{Multimodal}} 
 & ENSEMBLE & 0.62±0.032 & 0.62±0.032 & 0.62±0.032 & nan \\
 & MCAT \cite{lu2021data} & 0.62±0.032 & 0.633±0.033 & 0.624±0.028 & 0.62±0.032 \\
 & PorpoiseMMF \cite{lu2021data} & 0.754±0.04 & 0.691±0.035 & 0.686±0.03 & 0.754±0.04 \\
 & SurvPath \cite{lu2021data} & 0.705±0.024 & 0.707±0.019 & 0.622±0.038 & 0.705±0.024 \\
\midrule
\multirow{1}{*}{\rotatebox[origin=c]{90}{Omics}} 
 & 64 protein ensemble \cite{chowdhury2023proteogenomic} & nan & 0.683±0.019 & nan & nan \\
\midrule
\multirow{1}{*}{\rotatebox[origin=c]{90}{WSI}} 
 & clam\_sb \cite{lu2021data} & 0.526±0.016 & 0.505±0.02 & 0.484±0.018 & 0.526±0.016 \\
\midrule
\bottomrule
\end{tabular}
\caption{Your caption here}
\end{tabl

In [34]:
df_pivot.head()

Unnamed: 0,model,category,TCGA CTransPath,TCGA Lunit-Dino,TCGA OV-Dino,TCGA ensemble
0,64 protein ensemble \cite{chowdhury2023proteog...,Omics,,0.683±0.019,,
1,CTransPath ENSEMBLE,Multimodal,0.62±0.032,,,
2,Lunit-Dino ENSEMBLE,Multimodal,,0.62±0.032,,
3,MCAT ENSEMBLE,Multimodal,,,,0.62±0.032
4,MCAT \cite{lu2021data},Multimodal,0.62±0.032,0.633±0.033,0.624±0.028,


In [4]:
tissue_type = "primary"


# need to base at different DIR. 


#import results data for all runs.
results_dict = [

                {"clam\_sb \cite{lu2021data}":["NERO_HGSOC_50_clam_sb_ViT_"+tissue_type+"_s1","WSI","Lunit-Dino"]},
                {"clam\_sb \cite{lu2021data}" :["NERO_HGSOCC_50_clam_sb_OV_ViT_"+tissue_type+"_s1","WSI","OV-Dino"]},
                {"clam\_sb \cite{lu2021data}" :["NERO_HGSOCC_50_clam_sb_CTransPath_"+tissue_type+"_s1","WSI","CTransPath"]},
                
                ]

df = process_auc_files(results_dict)
df.head(30)

Instructions for updating:
Use eager execution and: 
`tf.data.TFRecordDataset(path)`


Unnamed: 0,model,TCGA,category,embedder
1,clam\_sb \cite{lu2021data},0.453±0.047,WSI,OV-Dino
0,clam\_sb \cite{lu2021data},0.49±0.012,WSI,Lunit-Dino
2,clam\_sb \cite{lu2021data},0.523±0.018,WSI,CTransPath


In [26]:
tissue_type = "primary"

#import results data for all runs.
results_dict = [

                {"MCAT_Surv":["TCGA_TRAIN_HGSOC_50_MCAT_Surv_ViT_"+tissue_type+"_s1","WSI","Lunit-Dino"]},
                {"MCAT_Surv" :["TCGA_TRAIN_HGSOCC_50_MCAT_Surv_OV_ViT_"+tissue_type+"_s1","WSI","OV-Dino"]},
                {"MCAT_Surv" :["TCGA_TRAIN_HGSOCC_50_MCAT_Surv_CTransPath_"+tissue_type+"_s1","WSI","CTransPath"]},

                {"SurvPath":["TCGA_TRAIN_HGSOC_50_SurvPath_ViT_"+tissue_type+"_s1","WSI","Lunit-Dino"]},
                {"SurvPath" :["TCGA_TRAIN_HGSOCC_50_SurvPath_OV_ViT_"+tissue_type+"_s1","WSI","OV-Dino"]},
                {"SurvPath" :["TCGA_TRAIN_HGSOCC_50_SurvPath_CTransPath_"+tissue_type+"_s1","WSI","CTransPath"]}, 
                ]


df = process_auc_files(results_dict)
df.head(30)

Unnamed: 0,model,TCGA,category,embedder
4,SurvPath,0.547±0.058,WSI,OV-Dino
1,MCAT_Surv,0.563±0.056,WSI,OV-Dino
3,SurvPath,0.619±0.014,WSI,Lunit-Dino
0,MCAT_Surv,0.656±0.024,WSI,Lunit-Dino
5,SurvPath,0.691±0.023,WSI,CTransPath
2,MCAT_Surv,0.709±0.032,WSI,CTransPath
