In [1]:
from tensorflow.python.summary.summary_iterator import summary_iterator
import matplotlib.pylab as plt
import glob
import os
import numpy as np
import pandas as pd 
from scipy import stats
import json

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
def test_auc(tf_path):
    """Take a tf output path and create a list of values for the validtion auc of an experement"""
    auc = []
    for event in summary_iterator(tf_path):
        for value in event.summary.value:
            if value.tag == "final/test_auc":
                auc.append(value.simple_value)
    return auc

def get_auc_scores_from_file(root_dir):
    try:
        if isinstance(root_dir, str):
            all_aucs = []
            file_extension = "*.gpu1"

            joined_dir = "/mnt/ncshare/ozkilim/BRCA/results_HGSOC_multimodal/" + root_dir
            # Recursively traverse the directory and its subdirectories
            for subdir, _, files in os.walk(joined_dir):
                # Use glob to find all files with the specified extension
                for file in glob.glob(os.path.join(subdir, file_extension)):
                    auc = test_auc(file)
                    all_aucs.append(auc[0])
        else:
            all_aucs = root_dir.copy()
        
    except:
        all_aucs =  np.array([0,0,0,0,0,0,0,0,0])

    
    return all_aucs

def get_eval_results(eval_path):

    joined_dir = "/mnt/ncshare/ozkilim/BRCA/eval_results/" + eval_path
    try:
        all_aucs = pd.read_csv(joined_dir)
        all_aucs = all_aucs["test_auc"]
    except:
        return np.array([0,0,0,0,0,0,0,0,0])
    
    return all_aucs


def process_auc_files(results_dict):
    # Initialize an empty DataFrame to store all AUC scores
    # Loop through each file and extract AUC scores
    all_auc_scores = []
    # loop overdict...  

    items_list = list(results_dict)
    # Loop over the dictionary by index
    for i in range(len(items_list)):
                
        auc_scores_AUB = get_auc_scores_from_file(list(results_dict[i].values())[0][0]) #always spulls same key
        auc_scores_Mayo = get_auc_scores_from_file(list(results_dict[i].values())[0][1])
        category = list(results_dict[i].values())[0][2]
        embedder = list(results_dict[i].values())[0][3]

        # get stats here... 
        auc_AUB = round(np.mean(auc_scores_AUB),3)
        std_AUB = round(np.std(auc_scores_AUB),3)

        auc_Mayo = round(np.mean(auc_scores_Mayo),3)
        std_Mayo = round(np.std(auc_scores_Mayo),3)

        row = {"model":list(results_dict[i].keys())[0], "UAB":auc_AUB.astype(str)+"±"+std_AUB.astype(str), "Mayo":auc_Mayo.astype(str)+"±"+std_Mayo.astype(str),"category":category,"embedder":embedder}

        all_auc_scores.append(row)
    
    all_auc_scores = pd.DataFrame(all_auc_scores)


    # Sort the DataFrame based on 'Mean AUC' in descending order
    sorted_auc_summary = all_auc_scores.sort_values(by='Mayo', ascending=True)
    
    return sorted_auc_summary



In [3]:
tissue_type = "metastatic"


#load dynamically.

rf_primary_UAB = np.array([0.703125,   0.78645833, 0.75  ,  0.64583333, 0.66145833])
rf_primary_FHCRC = np.array([0.73090909, 0.67676768, 0.61868687, 0.47727273, 0.55681818])
rf_primary_Mayo = np.array([0.71428571, 0.5974026,  0.58443058 ,0.5974026 , 0.64285714])


rf_metastatic_UAB = np.array([0.73529412, 0.80252101, 0.72689076 ,0.7605042 , 0.79831933])
rf_metastatic_FHCRC = np.array([0.69537830, 0.79201681, 0.73319328 ,0.75210084, 0.7710084 ])
rf_metastatic_Mayo = np.array([0.71428571, 0.78968254, 0.71031746, 0.83730309, 0.8234127 ])



# Load the dictionary from the JSON file
with open('classical_omics_results_metastatic.json', 'r') as json_file:
    metastatic_omics = json.load(json_file)

with open('classical_omics_results_primary.json', 'r') as json_file:
    primary_omics = json.load(json_file)


#import results data for all runs.
results_dict = [
                # "Random forest":[rf_primary_UAB,rf_primary_FHCRC, rf_primary_Mayo,"Omics"],
                {"Ensemble \cite{chowdhury2023proteogenomic}":[metastatic_omics["UAB_aucs_metastatic"],metastatic_omics["Mayo_aucs_metastatic"],"Omics","CTransPath"]},

                {"clam\_sb \cite{lu2021data}":["HGSOC_clam_sb_ViT_UAB_"+tissue_type+"_15_epocs_s1","HGSOC_clam_sb_ViT_Mayo_"+tissue_type+"_15_epocs_s1","WSI","Lunit-Dino"]},
                {"clam\_sb \cite{lu2021data}" :["HGSOC_clam_sb_OV_ViT_UAB_"+tissue_type+"_15_epocs_s1","HGSOC_clam_sb_OV_ViT_Mayo_"+tissue_type+"_15_epocs_s1","WSI","OV-Dino"]},
                {"clam\_sb \cite{lu2021data}":["HGSOC_clam_sb_CTransPath_UAB_"+tissue_type+"_15_epocs_s1","HGSOC_clam_sb_CTransPath_Mayo_"+tissue_type+"_15_epocs_s1","WSI", "CTransPath"]},
                
                
                {"SurvPath \cite{jaume2023modeling}":["HGSOC_SurvPath_ViT_UAB_"+tissue_type+"_15_epocs_s1", "HGSOC_SurvPath_ViT_Mayo_"+tissue_type+"_15_epocs_s1","Multimodal","Lunit-Dino"]},
                {"SurvPath \cite{jaume2023modeling}":["HGSOC_SurvPath_OV_ViT_UAB_"+tissue_type+"_15_epocs_s1","HGSOC_SurvPath_OV_ViT_Mayo_"+tissue_type+"_15_epocs_s1","Multimodal","OV-Dino"]},
                {"SurvPath \cite{jaume2023modeling}":["HGSOC_SurvPath_CTransPath_UAB_"+tissue_type+"_15_epocs_s1", "HGSOC_SurvPath_CTransPath_Mayo_"+tissue_type+"_15_epocs_s1","Multimodal","CTransPath"]},
                

                {"MCAT PPI \cite{chen2021multimodal}":["HGSOC_MCAT_Surv_ViT_UAB_"+tissue_type+"_15_epocs_PPI_60_s1","HGSOC_MCAT_Surv_ViT_Mayo_"+tissue_type+"_15_epocs_PPI_60_s1","Multimodal", "Lunit-Dino"]},
                {"MCAT PPI \cite{chen2021multimodal}":["HGSOC_MCAT_Surv_OV_ViT_UAB_"+tissue_type+"_15_epocs_PPI_60_s1","HGSOC_MCAT_Surv_OV_ViT_Mayo_"+tissue_type+"_15_epocs_PPI_60_s1","Multimodal","OV-Dino"]},
                {"MCAT PPI \cite{chen2021multimodal}":["HGSOC_MCAT_Surv_CTransPath_UAB_"+tissue_type+"_15_epocs_PPI_60_s1","HGSOC_MCAT_Surv_CTransPath_Mayo_"+tissue_type+"_15_epocs_PPI_60_s1","Multimodal","CTransPath"]},


                {"MCAT \cite{chen2021multimodal}":["HGSOC_MCAT_Surv_ViT_UAB_"+tissue_type+"_15_epocs_s1", "HGSOC_MCAT_Surv_ViT_Mayo_"+tissue_type+"_15_epocs_s1","Multimodal","Lunit-Dino"]},
                {"MCAT \cite{chen2021multimodal}":["HGSOC_MCAT_Surv_OV_ViT_UAB_"+tissue_type+"_15_epocs_s1","HGSOC_MCAT_Surv_OV_ViT_Mayo_"+tissue_type+"_15_epocs_s1","Multimodal","OV-Dino"]},
                {"MCAT \cite{chen2021multimodal}":["HGSOC_MCAT_Surv_CTransPath_UAB_"+tissue_type+"_15_epocs_s1","HGSOC_MCAT_Surv_CTransPath_Mayo_"+tissue_type+"_15_epocs_s1","Multimodal","CTransPath"]},


                {"PorpoiseMMF \cite{chen2022pan}":["HGSOC_PorpoiseMMF_ViT_UAB_"+tissue_type+"_15_epocs_s1", "HGSOC_PorpoiseMMF_ViT_Mayo_"+tissue_type+"_15_epocs_s1","Multimodal","Lunit-Dino"]},
                {"PorpoiseMMF \cite{chen2022pan}":["HGSOC_PorpoiseMMF_OV_ViT_UAB_"+tissue_type+"_15_epocs_s1", "HGSOC_PorpoiseMMF_OV_ViT_Mayo_"+tissue_type+"_15_epocs_s1","Multimodal","OV-Dino"]},
                {"PorpoiseMMF \cite{chen2022pan}":["HGSOC_PorpoiseMMF_CTransPath_UAB_"+tissue_type+"_15_epocs_s1","HGSOC_PorpoiseMMF_CTransPath_Mayo_"+tissue_type+"_15_epocs_s1","Multimodal","CTransPath"]},

                {"SurvPath 60 PPI \cite{jaume2023modeling}":["HGSOC_SurvPath_ViT_UAB_"+tissue_type+"_15_epocs_PPI_60_s1","HGSOC_SurvPath_ViT_Mayo_"+tissue_type+"_15_epocs_PPI_60_s1","Multimodal","Lunit-Dino"]},
                {"SurvPath 60 PPI \cite{jaume2023modeling}":["HGSOC_SurvPath_OV_ViT_UAB_"+tissue_type+"_15_epocs_PPI_60_s1","HGSOC_SurvPath_OV_ViT_Mayo_"+tissue_type+"_15_epocs_PPI_60_s1","Multimodal","OV-Dino"]},
                {"SurvPath 60 PPI \cite{jaume2023modeling}":["HGSOC_SurvPath_CTransPath_UAB_"+tissue_type+"_15_epocs_PPI_60_s1","HGSOC_SurvPath_CTransPath_Mayo_"+tissue_type+"_15_epocs_PPI_60_s1","Multimodal","CTransPath"]},
                
                ]

df = process_auc_files(results_dict)
df.head(30)

  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  keepdims=keepdims)
  arrmean, rcount, out=arrmean, casting='unsafe', subok=False)
  ret = ret.dtype.type(ret / rcount)


Unnamed: 0,model,UAB,Mayo,category,embedder
0,Ensemble \cite{chowdhury2023proteogenomic},0.783±0.054,0.768±0.033,Omics,CTransPath
16,SurvPath 60 PPI \cite{jaume2023modeling},nan±nan,nan±nan,Multimodal,Lunit-Dino
15,PorpoiseMMF \cite{chen2022pan},nan±nan,nan±nan,Multimodal,CTransPath
14,PorpoiseMMF \cite{chen2022pan},nan±nan,nan±nan,Multimodal,OV-Dino
13,PorpoiseMMF \cite{chen2022pan},nan±nan,nan±nan,Multimodal,Lunit-Dino
12,MCAT \cite{chen2021multimodal},nan±nan,nan±nan,Multimodal,CTransPath
11,MCAT \cite{chen2021multimodal},nan±nan,nan±nan,Multimodal,OV-Dino
10,MCAT \cite{chen2021multimodal},nan±nan,nan±nan,Multimodal,Lunit-Dino
17,SurvPath 60 PPI \cite{jaume2023modeling},nan±nan,nan±nan,Multimodal,OV-Dino
9,MCAT PPI \cite{chen2021multimodal},nan±nan,nan±nan,Multimodal,CTransPath


In [4]:

# Pivot the DataFrame
df_pivot = df.pivot_table(index=['model', 'category'], columns='embedder', values=['Mayo', 'UAB'], aggfunc='first').reset_index()
df_pivot.columns = [' '.join(col).strip() for col in df_pivot.columns.values]

def format_highest_values(df):
    for col in df.columns[2:]:
        highest_value = df[col].max()
        df[col] = df[col].apply(lambda x: f'\\textbf{{{x}}}' if x == highest_value else x)
    return df


# Function to generate LaTeX table
def generate_latex_table(df_pivot):
    # Start the table and add the header
    latex_str = "\\begin{table}[ht]\n\\centering\n\\begin{tabular}{cc|ccc|ccc}\n\\toprule\n"
    latex_str += " & \\multicolumn{1}{c}{Model} & \\multicolumn{3}{c}{Mayo} & \\multicolumn{3}{c}{UAB} \\\\\n"
    latex_str += "\\midrule\n"
    latex_str += " &  & CTransPath \cite{wang2022transformer} & Lunit-Dino \cite{kang2023benchmarking} & OV-Dino (ours) & CTransPath & Lunit-Dino & OV-Dino \\\\\n"
    latex_str += "\\midrule\n"

    # Add rows from the DataFrame
    for category, group_df in df_pivot.groupby('category'):
        group_len = len(group_df)
        latex_str += f"\\multirow{{{group_len}}}{{*}}{{\\rotatebox[origin=c]{{90}}{{{category}}}}} \n"
        for _, row in group_df.iterrows():
            model = row['model']
            values = ' & '.join(str(x) for x in row[2:])
            latex_str += f" & {model} & {values} \\\\\n"
        latex_str += "\\midrule\n"

    latex_str += "\\bottomrule\n\\end{tabular}\n\\caption{Your caption here}\n\\end{table}"

    return latex_str

# Generate LaTeX table
latex_table = generate_latex_table(df_pivot)
print(latex_table)

\begin{table}[ht]
\centering
\begin{tabular}{cc|ccc|ccc}
\toprule
 & \multicolumn{1}{c}{Model} & \multicolumn{3}{c}{Mayo} & \multicolumn{3}{c}{UAB} \\
\midrule
 &  & CTransPath \cite{wang2022transformer} & Lunit-Dino \cite{kang2023benchmarking} & OV-Dino (ours) & CTransPath & Lunit-Dino & OV-Dino \\
\midrule
\multirow{5}{*}{\rotatebox[origin=c]{90}{Multimodal}} 
 & MCAT PPI \cite{chen2021multimodal} & nan±nan & nan±nan & nan±nan & nan±nan & nan±nan & nan±nan \\
 & MCAT \cite{chen2021multimodal} & nan±nan & nan±nan & nan±nan & nan±nan & nan±nan & nan±nan \\
 & PorpoiseMMF \cite{chen2022pan} & nan±nan & nan±nan & nan±nan & nan±nan & nan±nan & nan±nan \\
 & SurvPath 60 PPI \cite{jaume2023modeling} & nan±nan & nan±nan & nan±nan & nan±nan & nan±nan & nan±nan \\
 & SurvPath \cite{jaume2023modeling} & nan±nan & nan±nan & nan±nan & nan±nan & nan±nan & nan±nan \\
\midrule
\multirow{1}{*}{\rotatebox[origin=c]{90}{Omics}} 
 & Ensemble \cite{chowdhury2023proteogenomic} & 0.768±0.033 & nan & nan & 

In [5]:
df_pivot.head()

Unnamed: 0,model,category,Mayo CTransPath,Mayo Lunit-Dino,Mayo OV-Dino,UAB CTransPath,UAB Lunit-Dino,UAB OV-Dino
0,Ensemble \cite{chowdhury2023proteogenomic},Omics,0.768±0.033,,,0.783±0.054,,
1,MCAT PPI \cite{chen2021multimodal},Multimodal,nan±nan,nan±nan,nan±nan,nan±nan,nan±nan,nan±nan
2,MCAT \cite{chen2021multimodal},Multimodal,nan±nan,nan±nan,nan±nan,nan±nan,nan±nan,nan±nan
3,PorpoiseMMF \cite{chen2022pan},Multimodal,nan±nan,nan±nan,nan±nan,nan±nan,nan±nan,nan±nan
4,SurvPath 60 PPI \cite{jaume2023modeling},Multimodal,nan±nan,nan±nan,nan±nan,nan±nan,nan±nan,nan±nan
