# Model Selection
This notebook selects the best hyperparameters configurations for the DGN models.

# Imports

In [1]:
import pandas as pd
import pickle
from pathlib import Path
import wandb
import numpy as np
import os

# Load data from wandb

In [2]:
api = wandb.Api()

def get_project_df(project: str, out_path: Path = None) -> pd.DataFrame:
    runs = api.runs(project)
    summary_list, config_list, name_list = [], [], []
    for run in runs: 
        # .summary contains the output keys/values for metrics like accuracy.
        #  We call ._json_dict to omit large files 
        summary_list.append(run.summary._json_dict)

        # .config contains the hyperparameters.
        #  We remove special values that start with _.
        config_list.append(
            {k: v for k,v in run.config.items()
            if not k.startswith('_')})

        # .name is the human-readable name of the run.
        name_list.append(run.name)

    runs_df = pd.DataFrame({
        "summary": summary_list,
        "config": config_list,
        "name": name_list
        })
    
    # expand the config column that contains dictionaries
    runs_df = pd.concat([runs_df.drop(['config'], axis=1), runs_df['config'].apply(pd.Series)], axis=1)
    runs_df = pd.concat([runs_df.drop(['summary'], axis=1), runs_df['summary'].apply(pd.Series)], axis=1)
    # drop all the "parameters/..." columns
    runs_df = runs_df.drop(columns=[col for col in runs_df.columns if "parameters/" in col])
    runs_df = runs_df.drop(columns=[col for col in runs_df.columns if "gradients/" in col])

    # runs_df['hold_out_by'] = runs_df['hold_out_by'].replace({'model':'UC3','protein':'UC2','random':'UC1'})

    if out_path:
        runs_df.to_csv(out_path, index=False)
    return runs_df

In [3]:
os.makedirs("results", exist_ok=True)

In [5]:
# results_df = get_project_df("tesi-gnn/peppina-final", out_path=Path("results/results_final.csv"))
# other_conv_df = get_project_df("tesi-gnn/gnn-ppi-sens", out_path=Path("results/results_other_conv.csv"))
# rev_df = get_project_df("hetgesn/peppina-rev", out_path=Path("results/results_rev.csv"))

# Load runs data

In [6]:
gnn_df = pd.read_csv("results/gcn.csv", low_memory=False)

In [7]:
other_conv_df = pd.read_csv("results/results_rev.csv", low_memory=False)
other_conv_df = other_conv_df[other_conv_df['property']=='sensitivity']

In [8]:
baselines_df = pd.read_csv("results/baselines.csv")
baselines_df = baselines_df[baselines_df["model"] == "deepsets"]

In [9]:
baselines_df.embeddings_len = baselines_df.embeddings_len.apply(str)
gnn_df.embeddings_len = gnn_df.embeddings_len.apply(str)

In [10]:
metrics = ["acc","auroc","f1","mcc"]
metrics_cols = [f"{s}_{m}" for s in ["train","test","val"] for m in metrics]

config_cols = ['model','conv', 'layers','hidden_dim', 'batch_size','lr', 'pool_from', 'weight_decay','warmup_steps','undirected','dirgnn_alpha','dirgnn_conv','es_eps','patience','weight_initializer','aggr','weighted_sampler','uniform_bound','dropout','gin_layers','gin_hidden_dim']

In [11]:
def select_bests(df, selection_metric='val_f1', features=["0","128","onehot"]):
    #select the best config for each fold
    baselines_table=[]
    agg_dict = {m: ['mean','std','count'] for m in metrics_cols}
    # for the trial names, concatenate them
    agg_dict['name'] = list
    all_the_bests = []
    for hold_out_by in ["UC1","UC2","UC3"]:
        for embeddings_len in features:
            best_per_fold = []
            for fold in range(4):
                try:
                    best = df[(df.hold_out_by==hold_out_by) & (df.embeddings_len==embeddings_len) & (df.test_fold==fold)][metrics_cols+config_cols+['embeddings_len','hold_out_by','name','test_fold','_runtime','epoch']]\
                            .groupby(config_cols+['embeddings_len','hold_out_by'], dropna=False)\
                            .agg('max')\
                            .sort_values((selection_metric), ascending=False).reset_index().loc[0]
                    best_per_fold.append(best)
                except:
                    print(f"no results for {hold_out_by}, {embeddings_len}, {fold}")
                    
            if len(best_per_fold)>0:
                all_the_bests.append(pd.concat(best_per_fold, axis=1).T)

    return pd.concat(all_the_bests, axis=0)

In [12]:
def format_value(s):
    # format a float to have 3 decimal digits without zero before the decimal point
    return f"{s:.3f}".replace("0.",".")

table_metrics=["mcc","acc","f1","auroc"]

def format_metrics(df, metrics=table_metrics, sets=['test']):
    formatted_metrics = []
    for m in metrics:
        for s in sets:
            sorted= df[(f"{s}_{m}","mean")].sort_values(ascending=False)
            best = sorted.index[0]
            second = sorted.index[1]

            col = "\\textbf{" + f"{m.upper()}" + "}"
            df[col] = "$" + (df[(f"{s}_{m}","mean")]).apply(format_value) + "\scriptstyle \pm " + (df[(f"{s}_{m}","std")]).apply(format_value) +"$"
            
            # put the best in bold
            def mathbf(x):
                # insert \mathbf{ after the first $
                x = x.replace("$","$\\mathbf{",1)
                # insert } befort the last character
                x = x[:-1] + "}$"
                return x
            
            df.loc[best, col] = df.loc[best, col].apply(lambda x: mathbf(x)).iloc[0]
            # put the second in underline
            # df.loc[second, col] = df.loc[second, col].apply(lambda x: "\\underline{" + x + "}")[0]
            
            df = df.sort_index(axis=1).drop(columns=f"{s}_{m}")

            formatted_metrics.append(col)
        
    df = df[["model","hold_out_by","embeddings_len"]+formatted_metrics]

    # rename columns to be less pythonish
    df.columns = ['Model','Hold-out by','Embeddings length'] + formatted_metrics

    df.Model = df.Model.replace({"gcn":"DGN","deepsets":"DeepSets"})
    return df

  df[col] = "$" + (df[(f"{s}_{m}","mean")]).apply(format_value) + "\scriptstyle \pm " + (df[(f"{s}_{m}","std")]).apply(format_value) +"$"


In [13]:
baselines_perf = select_bests(baselines_df, 'val_f1')[['hold_out_by','embeddings_len','name','test_fold']+config_cols+metrics_cols]

In [14]:
baselines_perf_disp = baselines_perf[metrics_cols+['hold_out_by','embeddings_len','test_fold','model']].groupby(['hold_out_by','embeddings_len','model']).agg(['mean','std','count']).reset_index()

baselines_perf_disp = baselines_perf_disp[['hold_out_by','embeddings_len','model']+['test_acc','test_f1','test_auroc','test_mcc']]

baselines_perf_disp = format_metrics(baselines_perf_disp)
baselines_perf_disp['Model'] = "DeepSets"
baselines_perf_disp

Unnamed: 0,Model,Hold-out by,Embeddings length,\textbf{MCC},\textbf{ACC},\textbf{F1},\textbf{AUROC}
0,DeepSets,UC1,0,$.062\scriptstyle \pm .043$,$.574\scriptstyle \pm .167$,$.284\scriptstyle \pm .140$,$.650\scriptstyle \pm .004$
1,DeepSets,UC1,128,$.472\scriptstyle \pm .006$,$.764\scriptstyle \pm .004$,$.647\scriptstyle \pm .004$,$.817\scriptstyle \pm .002$
2,DeepSets,UC1,onehot,$\mathbf{.498\scriptstyle \pm .009}$,$\mathbf{.778\scriptstyle \pm .007}$,$\mathbf{.661\scriptstyle \pm .008}$,$\mathbf{.834\scriptstyle \pm .003}$
3,DeepSets,UC2,0,$.021\scriptstyle \pm .036$,$.572\scriptstyle \pm .169$,$.218\scriptstyle \pm .200$,$.560\scriptstyle \pm .142$
4,DeepSets,UC2,128,$.415\scriptstyle \pm .031$,$.741\scriptstyle \pm .013$,$.605\scriptstyle \pm .038$,$.783\scriptstyle \pm .033$
5,DeepSets,UC2,onehot,$.427\scriptstyle \pm .032$,$.759\scriptstyle \pm .015$,$.595\scriptstyle \pm .047$,$.794\scriptstyle \pm .026$
6,DeepSets,UC3,0,$.048\scriptstyle \pm .037$,$.565\scriptstyle \pm .174$,$.259\scriptstyle \pm .169$,$.634\scriptstyle \pm .037$
7,DeepSets,UC3,128,$.104\scriptstyle \pm .094$,$.637\scriptstyle \pm .047$,$.320\scriptstyle \pm .186$,$.554\scriptstyle \pm .087$
8,DeepSets,UC3,onehot,$.138\scriptstyle \pm .063$,$.673\scriptstyle \pm .028$,$.281\scriptstyle \pm .102$,$.620\scriptstyle \pm .046$


In [15]:
metric = 'val_f1'

In [16]:
gnn_perf = select_bests(gnn_df, metric)
gnn_df.hold_out_by = gnn_df.hold_out_by.apply(str)

In [17]:
gnn_perf['runtime_min'] = gnn_perf['_runtime']/60

### Training times per UC/embedding

In [18]:
pd.set_option("display.max_columns", None)
gnn_perf[metrics_cols+['hold_out_by','embeddings_len','test_fold','runtime_min','epoch']].groupby(['hold_out_by','embeddings_len']).agg(['mean']).reset_index()[['hold_out_by','embeddings_len']+['train_f1','val_f1','test_f1','runtime_min','epoch']]


Unnamed: 0_level_0,hold_out_by,embeddings_len,train_f1,val_f1,test_f1,runtime_min,epoch
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,mean,mean,mean,mean
0,UC1,0,0.819709,0.747035,0.750113,195.350359,1744.5
1,UC1,128,0.89638,0.840223,0.843225,49.078832,561.0
2,UC1,onehot,0.899549,0.839345,0.841645,19.443406,180.5
3,UC2,0,0.751345,0.645942,0.604067,248.278705,2608.75
4,UC2,128,0.831149,0.698766,0.666694,20.482214,331.25
5,UC2,onehot,0.860387,0.717582,0.690009,17.173141,165.25
6,UC3,0,0.754066,0.606494,0.527726,26.106764,230.25
7,UC3,128,0.789202,0.632069,0.515358,12.138751,137.5
8,UC3,onehot,0.836639,0.62763,0.48568,19.252191,148.25


In [19]:
disp = gnn_perf[metrics_cols+['hold_out_by','embeddings_len','test_fold','runtime_min','epoch','_runtime']].groupby(['hold_out_by','embeddings_len']).agg(['mean']).reset_index()[['hold_out_by','embeddings_len']+['train_f1','val_f1','test_f1','runtime_min','_runtime','epoch']]
(disp['_runtime']/disp['epoch']).mean()

mean    5.997607
dtype: object

In [20]:
gnn_perf[metrics_cols+['hold_out_by','embeddings_len','test_fold']]

Unnamed: 0,train_acc,train_auroc,train_f1,train_mcc,test_acc,test_auroc,test_f1,test_mcc,val_acc,val_auroc,val_f1,val_mcc,hold_out_by,embeddings_len,test_fold
0,0.874023,0.946074,0.80245,0.710485,0.832285,0.907119,0.746607,0.621166,0.840062,0.908387,0.752106,0.634066,UC1,0,0
0,0.849854,0.931418,0.852765,0.70084,0.819198,0.902227,0.746751,0.613799,0.813276,0.895923,0.74374,0.608996,UC1,0,1
0,0.862429,0.929304,0.776235,0.677211,0.840867,0.907519,0.748672,0.632695,0.836568,0.905222,0.743761,0.624013,UC1,0,2
0,0.843994,0.926291,0.847385,0.688117,0.826148,0.912407,0.758421,0.631464,0.817158,0.90577,0.748532,0.616407,UC1,0,3
0,0.93042,0.981686,0.893058,0.84151,0.894479,0.954717,0.842584,0.763803,0.897516,0.955005,0.845433,0.769313,UC1,128,0
0,0.928368,0.981176,0.890817,0.837669,0.898416,0.957806,0.842052,0.767323,0.890916,0.951582,0.832439,0.751603,UC1,128,1
0,0.939209,0.987025,0.906846,0.862129,0.900513,0.959449,0.846363,0.772893,0.899068,0.958582,0.844869,0.770082,UC1,128,2
0,0.932373,0.981723,0.894797,0.845432,0.894663,0.957216,0.841903,0.763382,0.891304,0.956359,0.83815,0.757507,UC1,128,3
0,0.935791,0.987526,0.897864,0.851046,0.899138,0.951327,0.846006,0.770973,0.893245,0.949571,0.835821,0.756746,UC1,onehot,0
0,0.933112,0.98534,0.898342,0.848514,0.893523,0.952755,0.840858,0.761334,0.887034,0.945434,0.833238,0.749744,UC1,onehot,1


In [21]:
best_trials_gcn = {}
for i, r in gnn_perf.iterrows():
    best_trials_gcn[(r.hold_out_by, r.embeddings_len, r.test_fold)] = r['name']

pickle.dump(best_trials_gcn, open("gcn_best_trial_names.pkl",'wb'))

# Different conv operators comparisons

In [22]:
other_conv_df.embeddings_len = other_conv_df.embeddings_len.apply(str)
other_conv_df.hold_out_by = other_conv_df.use_case.apply(str)

  other_conv_df.hold_out_by = other_conv_df.use_case.apply(str)


In [23]:
other_conv_df['hold_out_by'] = other_conv_df['use_case']
gcn_perf = select_bests(other_conv_df[(other_conv_df.conv.isin(['SAGEConv','GCNConv'])) & (other_conv_df.model=='gcn') ], metric)
gcn_perf[metrics_cols+['hold_out_by','embeddings_len','test_fold']].groupby(['hold_out_by','embeddings_len']).agg(['mean']).reset_index()[['hold_out_by','embeddings_len']+['train_f1','val_f1','test_f1']]

Unnamed: 0_level_0,hold_out_by,embeddings_len,train_f1,val_f1,test_f1
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,mean,mean
0,UC1,0,0.372252,0.392615,0.391564
1,UC1,128,0.625299,0.591145,0.582408
2,UC1,onehot,0.872721,0.687617,0.677718
3,UC2,0,0.053233,0.03619,0.05762
4,UC2,128,0.516455,0.448403,0.424783
5,UC2,onehot,0.779843,0.511922,0.484257
6,UC3,0,0.041433,0.019314,0.015757
7,UC3,128,0.283153,0.018731,0.046052
8,UC3,onehot,0.84265,0.284894,0.355524


In [24]:
gin_perf = select_bests(other_conv_df[(other_conv_df.conv=='GINConv') & (other_conv_df.model=='gcn') ], metric)
gin_perf[metrics_cols+['hold_out_by','embeddings_len','test_fold']].groupby(['hold_out_by','embeddings_len']).agg(['mean']).reset_index()[['hold_out_by','embeddings_len']+['train_f1','val_f1','test_f1']]

Unnamed: 0_level_0,hold_out_by,embeddings_len,train_f1,val_f1,test_f1
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,mean,mean
0,UC1,0,0.660841,0.580352,0.572189
1,UC1,128,0.83942,0.763753,0.771276
2,UC1,onehot,0.796302,0.659992,0.64177
3,UC2,0,0.483032,0.337581,0.3293
4,UC2,128,0.720917,0.580568,0.550652
5,UC2,onehot,0.796951,0.51069,0.483354
6,UC3,0,0.203623,0.114399,0.109245
7,UC3,128,0.636421,0.279199,0.277772
8,UC3,onehot,0.712753,0.255245,0.267982


In [25]:

for trials, name in zip([gcn_perf, gin_perf],["gcnconv","ginconv"]):
    best_trials = {}
    for i, r in trials.iterrows():
        best_trials[(r.hold_out_by, r.embeddings_len, r.test_fold)] = r['name']

        pickle.dump(best_trials, open(f"{name}_best_trial_names.pkl",'wb'))

In [26]:
config_cols_old = ['model','conv', 'layers','hidden_dim', 'batch_size','lr', 'pool_from', 'weight_decay','warmup_steps','undirected','es_eps','patience','dropout','gin_hidden_dim','gin_layers']
gcn_perf[config_cols_old+['hold_out_by','embeddings_len','test_fold']]

Unnamed: 0,model,conv,layers,hidden_dim,batch_size,lr,pool_from,weight_decay,warmup_steps,undirected,es_eps,patience,dropout,gin_hidden_dim,gin_layers,hold_out_by,embeddings_len,test_fold
0,gcn,SAGEConv,4,512,4096,0.0005,last,1e-05,15,True,1e-06,100,0.5,,,UC1,0,0
0,gcn,SAGEConv,4,512,4096,0.0005,last,0.0001,15,True,1e-06,100,0.5,,,UC1,0,1
0,gcn,SAGEConv,4,512,4096,0.0005,last,0.0001,15,True,1e-06,100,0.5,,,UC1,0,2
0,gcn,SAGEConv,4,512,4096,0.0005,last,0.0001,15,True,1e-06,100,0.5,,,UC1,0,3
0,gcn,SAGEConv,4,256,512,0.0005,last,0.001,0,True,0.0001,100,0.5,,,UC1,128,0
0,gcn,SAGEConv,4,512,4096,0.0005,last,0.0001,0,True,0.0001,100,0.5,,,UC1,128,1
0,gcn,SAGEConv,6,512,4096,0.0005,last,0.001,15,True,1e-06,100,0.5,,,UC1,128,2
0,gcn,SAGEConv,4,256,512,0.0005,last,0.001,0,True,0.0001,100,0.5,,,UC1,128,3
0,gcn,SAGEConv,3,256,128,0.001,last,0.01,0,True,0.0001,100,0.5,,,UC1,onehot,0
0,gcn,SAGEConv,3,256,128,0.001,all,0.01,0,True,0.0001,100,0.5,,,UC1,onehot,1


In [27]:
gin_perf[config_cols_old+['hold_out_by','embeddings_len','test_fold']]

Unnamed: 0,model,conv,layers,hidden_dim,batch_size,lr,pool_from,weight_decay,warmup_steps,undirected,es_eps,patience,dropout,gin_hidden_dim,gin_layers,hold_out_by,embeddings_len,test_fold
0,gcn,GINConv,4,256,4096,0.0005,last,0.0001,0,True,0.0001,100,0.5,256.0,3.0,UC1,0,0
0,gcn,GINConv,4,512,4096,0.0005,all,0.0001,0,True,0.0001,100,0.5,512.0,3.0,UC1,0,1
0,gcn,GINConv,4,512,4096,0.0005,all,0.0001,0,True,0.0001,100,0.5,512.0,3.0,UC1,0,2
0,gcn,GINConv,4,256,4096,0.0005,last,0.0001,0,True,0.0001,100,0.5,256.0,3.0,UC1,0,3
0,gcn,GINConv,4,256,4096,0.0005,all,0.001,0,True,0.0001,100,0.5,256.0,2.0,UC1,128,0
0,gcn,GINConv,4,256,4096,0.0005,all,0.0001,0,True,0.0001,100,0.5,256.0,3.0,UC1,128,1
0,gcn,GINConv,4,256,4096,0.0005,all,0.001,0,True,0.0001,100,0.5,256.0,3.0,UC1,128,2
0,gcn,GINConv,4,256,4096,0.0005,all,0.0001,0,True,0.0001,100,0.5,256.0,3.0,UC1,128,3
0,gcn,GINConv,4,512,128,0.001,all,0.01,0,True,0.0001,100,0.5,512.0,3.0,UC1,onehot,0
0,gcn,GINConv,4,512,128,0.0005,all,0.01,0,True,0.0001,100,0.5,512.0,3.0,UC1,onehot,1


# Tables export

In [29]:
tables_path = "tables/"
Path(tables_path).mkdir(exist_ok=True)

In [30]:
gnn_perf_disp = gnn_perf[metrics_cols+['hold_out_by','embeddings_len','test_fold']].groupby(['hold_out_by','embeddings_len']).agg(['mean','std','count']).reset_index()
gnn_perf_disp = gnn_perf_disp[['hold_out_by','embeddings_len']+[c for c in metrics_cols if 'test' in c]]
gnn_perf_disp['model'] = "DGN"
gnn_perf_disp = format_metrics(gnn_perf_disp)

In [31]:
null_models_rows = {
    "UC1": ["Null","$.000{\scriptstyle \pm .000}$","$.000{\scriptstyle \pm .000}$","$.676{\scriptstyle \pm .000}$","$.500{\scriptstyle \pm .000}$"],
    "UC2": ["Null","$.000{\scriptstyle \pm .000}$","$.000{\scriptstyle \pm .000}$","$.680{\scriptstyle \pm .024}$","$.500{\scriptstyle \pm .000}$"],
    "UC3": ["Null","$.000{\scriptstyle \pm .000}$","$.000{\scriptstyle \pm .000}$","$.678{\scriptstyle \pm .014}$","$.500{\scriptstyle \pm .000}$"]
}

  "UC1": ["Null","$.000{\scriptstyle \pm .000}$","$.000{\scriptstyle \pm .000}$","$.676{\scriptstyle \pm .000}$","$.500{\scriptstyle \pm .000}$"],
  "UC1": ["Null","$.000{\scriptstyle \pm .000}$","$.000{\scriptstyle \pm .000}$","$.676{\scriptstyle \pm .000}$","$.500{\scriptstyle \pm .000}$"],
  "UC1": ["Null","$.000{\scriptstyle \pm .000}$","$.000{\scriptstyle \pm .000}$","$.676{\scriptstyle \pm .000}$","$.500{\scriptstyle \pm .000}$"],
  "UC1": ["Null","$.000{\scriptstyle \pm .000}$","$.000{\scriptstyle \pm .000}$","$.676{\scriptstyle \pm .000}$","$.500{\scriptstyle \pm .000}$"],
  "UC2": ["Null","$.000{\scriptstyle \pm .000}$","$.000{\scriptstyle \pm .000}$","$.680{\scriptstyle \pm .024}$","$.500{\scriptstyle \pm .000}$"],
  "UC2": ["Null","$.000{\scriptstyle \pm .000}$","$.000{\scriptstyle \pm .000}$","$.680{\scriptstyle \pm .024}$","$.500{\scriptstyle \pm .000}$"],
  "UC2": ["Null","$.000{\scriptstyle \pm .000}$","$.000{\scriptstyle \pm .000}$","$.680{\scriptstyle \pm .024}$","$.50

In [32]:
# merge the tables by hold out
tables = []
for hold_out_by in ["UC1","UC2","UC3"]:
    table = pd.concat([
        baselines_perf[baselines_perf["hold_out_by"]==hold_out_by],
        gnn_perf[gnn_perf["hold_out_by"]==hold_out_by]
    ])[['model','embeddings_len','hold_out_by']+metrics_cols].groupby(['model','embeddings_len','hold_out_by']).agg(['mean','std']).reset_index()

    # # remove the onehot related rows
    table = table[table.embeddings_len!="onehot"]
    table.embeddings_len = table.embeddings_len.replace({"0":"", "128":"+emb","onehot":"+onehot"})

    table = format_metrics(table.reset_index(), metrics=["mcc","f1","acc","auroc"], sets=["val"]).drop(columns=['Hold-out by'])

    # concatenate embedding len and model name
    table['Model'] = table['Model'].replace({"deepsets":"DeepSets","gcn":"DGN"})
    table['Model'] = table['Model']+table['Embeddings length']

    
    table = table.drop(columns=['Embeddings length'])
    null_row = pd.DataFrame([null_models_rows[hold_out_by]], columns=table.columns)
    table = pd.concat([null_row, table], ignore_index=True)
    tables.append(table)
    table.to_latex(f"{tables_path}{hold_out_by}.tex", index=False)

In [33]:
# create unique table with onehot results
table = pd.concat([
    baselines_perf[baselines_perf["embeddings_len"]=="onehot"],
    gnn_perf[gnn_perf["embeddings_len"]=="onehot"]
])[['model','hold_out_by','embeddings_len']+metrics_cols].groupby(['hold_out_by','model','embeddings_len']).agg(['mean','std']).reset_index().sort_values(by=['hold_out_by','model'])

onehot_table = format_metrics(table, metrics=["mcc","f1","acc","auroc"], sets=["test"]).drop(columns=['Embeddings length'])

onehot_table['Hold-out by'] = onehot_table['Hold-out by'].replace({"UC1":"1", "UC2":"2", "UC3":"3"})
onehot_table.rename(columns={"Hold-out by":"UC"}, inplace=True)

onehot_table.to_latex(tables_path+f'onehot_performances.tex', index=False)

In [34]:
table_metrics=["mcc","acc","f1","auroc"]

def format_metrics_conv(df, metrics=table_metrics, sets=['test']):
    formatted_metrics = []
    for m in metrics:
        for s in sets:
            sorted= df[(f"{s}_{m}","mean")].sort_values(ascending=False)
            best = sorted.index[0]
            second = sorted.index[1]

            col = "\\textbf{" + f"{m.upper()}" + "}"
            if len(sets)>1:
                col = f"{s.capitalize()} " + col
            df[col] = "$" + (df[(f"{s}_{m}","mean")]).apply(format_value) + "\scriptstyle \pm " + (df[(f"{s}_{m}","std")]).apply(format_value) +"$"
            
            # put the best in bold
            def mathbf(x):
                # insert \mathbf{ after the first $
                x = x.replace("$","$\\mathbf{",1)
                # insert } befort the last character
                x = x[:-1] + "}$"
                return x
            
            df.loc[best, col] = df.loc[best, col].apply(lambda x: mathbf(x)).iloc[0]
            # put the second in underline
            # df.loc[second, col] = df.loc[second, col].apply(lambda x: "\\underline{" + x + "}")[0]
            
            df = df.sort_index(axis=1).drop(columns=f"{s}_{m}")

            formatted_metrics.append(col)
        
    df = df[["conv","hold_out_by","embeddings_len"]+formatted_metrics]

    # rename columns to be less pythonish
    df.columns = ['Conv','Hold-out by','Embeddings length'] + formatted_metrics

    # df.Model = df.Conv.replace({"gcn":"DGN","deepsets":"DeepSets"})
    return df

  df[col] = "$" + (df[(f"{s}_{m}","mean")]).apply(format_value) + "\scriptstyle \pm " + (df[(f"{s}_{m}","std")]).apply(format_value) +"$"


In [36]:
os.makedirs("tables_conv_compare", exist_ok=True)

# export gin and gcn results
gin_perf_disp = gin_perf[metrics_cols+['hold_out_by','embeddings_len','test_fold']].groupby(['hold_out_by','embeddings_len']).agg(['mean','std','count']).reset_index()
gin_perf_disp = gin_perf_disp[['hold_out_by','embeddings_len']+[c for c in metrics_cols if 'test' in c]]
gin_perf_disp['model'] = "GIN"
gin_perf_disp = format_metrics(gin_perf_disp)

gcn_perf_disp = gcn_perf[metrics_cols+['hold_out_by','embeddings_len','test_fold']].groupby(['hold_out_by','embeddings_len']).agg(['mean','std','count']).reset_index()
gcn_perf_disp = gcn_perf_disp[['hold_out_by','embeddings_len']+[c for c in metrics_cols if 'test' in c]]
gcn_perf_disp['model'] = "GCN"
gcn_perf_disp = format_metrics(gcn_perf_disp)

for hold_out_by in ["UC1","UC2","UC3"]:
    table = pd.concat([
        gin_perf[gin_perf["hold_out_by"]==hold_out_by],
        gcn_perf[gcn_perf["hold_out_by"]==hold_out_by],
        gnn_perf[gnn_perf["hold_out_by"]==hold_out_by]
    ])[['conv','embeddings_len','hold_out_by']+metrics_cols].groupby(['conv','embeddings_len','hold_out_by']).agg(['mean','std']).reset_index()

    table = table[table.embeddings_len!="onehot"]
    table.embeddings_len = table.embeddings_len.replace({"0":"", "128":"+emb","onehot":"+onehot"})

    table = format_metrics_conv(table.reset_index(), metrics=["f1"], sets=["train","val"]).drop(columns=['Hold-out by'])

    table['Conv'] = table['Conv'].replace({"GINConv":"GIN","SAGEConv":"GCN","GCNConv":"GCN","DirGNNConv":"DirGNN"})
    table['Conv'] = table['Conv']+table['Embeddings length']
    
    table = table.drop(columns=['Embeddings length'])
    table.to_latex(f"tables_conv_compare/{hold_out_by}_gnn.tex", index=False)
    