In [1]:
import pandas as pd
import json
import os
import time

In [2]:
'''
Change result dict when you run new experiments 

folder - Enter list of folders to check, each folder will be traversed to check if file exists in folder
seed
arch
size_reg
dataset

**IMPORTANT** : Change EXPT_BASE_NAME if you update the folder name to where your results are stored


results_dict = {
    
    'folder' : [38,39,40,41],
    'seed' : [2,3],
    'arch' : ['GINConv','GCNConv','GATConv'],
    'size_reg' : [0,0.001,0.1],
    'dataset'  : {'BBBP':'BBBP','MUTAG':'Mutagenicity','HERG':'hERG'}
}


results_dict = {
    
    'folder' : [27,29,30,31,32,33,34,35],
    'seed' : [0,1],
    'arch' : ['GINConv','GCNConv','GATConv'],
    'size_reg' : [0,0.001,0.1],
    'dataset'  : {'BBBP':'BBBP','MUTAG':'Mutagenicity','HERG':'hERG'}
}


results_dict = {
    
    'folder' : [42,43],
    'seed' : [2,3],
    'arch' : ['GINConv','GCNConv','GATConv'],
    'channel' : ['Vanilla'],
    'dataset'  : {'BBBP':'BBBP','MUTAG':'Mutagenicity'}
}

'''
expt_dict = [
    
    {"dataset":"HERG", "seed_info" : {1:49,3:49,0:48,2:48}},
    {"dataset":"MUTAG", "seed_info" : {1:47,3:47,0:46,2:46}},
    {"dataset":"BBBP", "seed_info" : {1:47,3:47,0:46,2:46}}]

dataset_dict = {'BBBP':'BBBP','MUTAG':'Mutagenicity','HERG':'hERG'}

def return_expt_base_name(seed,layer_type,channel,folder,dataset):

    BASE_PATH = f'../Cluster_JOBS/LoG_plot/{folder}/EXPT-32A{dataset}-{seed}-{layer_type}-{channel}'
    return BASE_PATH


result_path = 'EXPERIMENT_RESULTS_1.csv'

In [3]:
#check if main csv exists
if os.path.exists(result_path):
    all_results = pd.read_csv(result_path)

In [4]:
result_list = []

print(f'Adding the Following Config to Final Results, copy them over to add to the plots pdf')

for dataset in dataset_dict.keys():
    for layer_type in ["GINConv", "GCNConv", "GATConv"]:
        for channel in ["DualParam","SingleChannel","Vanilla"]:
            for seed in [0,1,2,3]:
                
                for folder_dict in expt_dict:
                    if folder_dict['dataset'] == dataset:
                        folder = folder_dict['seed_info'][seed]
                
                EXPT_PATH = return_expt_base_name(seed,layer_type,channel,folder,dataset)
                                    
                found = True

                if os.path.exists(EXPT_PATH):

                    #TRY loading the classification csv
                    try:
                        with open(EXPT_PATH + f'/{dataset_dict[dataset]}_classification_result.json') as file:
                            expt_result = json.load(file)
                        for key in expt_result.keys():
                            expt_result[key] = [expt_result[key]]
                        expt_result = pd.DataFrame(expt_result)
                        expt_result['creation_date'] = time.strftime('%m/%d/%Y', time.gmtime(os.path.getmtime(EXPT_PATH + f'/{dataset_dict[dataset]}_classification_result.json')))                             
                    except Exception as e:
                        print(f"error when trying to load Results CSV : {e}")
                        found = False

                    #TRY loading the json file
                    try:
                        with open(EXPT_PATH+f'/{dataset_dict[dataset]}config.json') as file:
                            config = json.load(file)
                    except Exception as e:
                        print(f"error when trying to load config Json : {e}")
                        found = False
                else:
                    found = False

                if found: 
                    print(f'{dataset} - {layer_type} - {channel} - {seed}')

                    expt_result['folder']        = folder
                    expt_result['seed']          = seed
                    expt_result['dataset']       = dataset
                    expt_result['layer_type']    = layer_type
                    expt_result['channel']       = channel

                    expt_result = expt_result[['creation_date','folder','dataset','seed','layer_type','channel','Trained_explainations_train_rocauc','Trained_explainations_validation_rocauc','Trained_explainations_test_rocauc']]
                    result_list.append(expt_result)


print('Creating new results csv')
if len(result_list) > 0:
    expt_result_df = pd.concat(result_list, axis = 0).reset_index(drop = True)
    expt_result_df.to_csv(result_path, index = False)
else:
    print('No new experiments found to add to results csv')                  

Adding the Following Config to Final Results, copy them over to add to the plots pdf
BBBP - GINConv - DualParam - 0
BBBP - GINConv - DualParam - 1
BBBP - GINConv - DualParam - 2
BBBP - GINConv - DualParam - 3
BBBP - GINConv - SingleChannel - 0
BBBP - GINConv - SingleChannel - 1
BBBP - GINConv - SingleChannel - 2
BBBP - GINConv - SingleChannel - 3
BBBP - GINConv - Vanilla - 0
BBBP - GINConv - Vanilla - 1
BBBP - GINConv - Vanilla - 2
BBBP - GINConv - Vanilla - 3
BBBP - GCNConv - DualParam - 0
BBBP - GCNConv - DualParam - 1
BBBP - GCNConv - DualParam - 2
BBBP - GCNConv - DualParam - 3
BBBP - GCNConv - SingleChannel - 0
BBBP - GCNConv - SingleChannel - 1
BBBP - GCNConv - SingleChannel - 2
BBBP - GCNConv - SingleChannel - 3
BBBP - GCNConv - Vanilla - 0
BBBP - GCNConv - Vanilla - 1
BBBP - GCNConv - Vanilla - 2
BBBP - GCNConv - Vanilla - 3
BBBP - GATConv - DualParam - 0
BBBP - GATConv - DualParam - 1
BBBP - GATConv - DualParam - 2
BBBP - GATConv - DualParam - 3
BBBP - GATConv - SingleChannel 

In [5]:
# df = pd.read_csv(result_path,index_col = None)
# df['model_type']='DualParam'
# df.to_csv(result_path, index = False)

In [5]:
df = pd.read_csv('EXPERIMENT_RESULTS_1.csv')

In [6]:
df

Unnamed: 0,creation_date,folder,dataset,seed,layer_type,channel,Trained_explainations_train_rocauc,Trained_explainations_validation_rocauc,Trained_explainations_test_rocauc
0,10/18/2024,46,BBBP,0,GINConv,DualParam,0.933364,0.898709,0.853710
1,10/18/2024,47,BBBP,1,GINConv,DualParam,0.956179,0.939526,0.865436
2,10/19/2024,46,BBBP,2,GINConv,DualParam,0.938834,0.902161,0.826797
3,10/19/2024,47,BBBP,3,GINConv,DualParam,0.939095,0.902011,0.850250
4,10/18/2024,46,BBBP,0,GINConv,SingleChannel,0.930259,0.887155,0.849673
...,...,...,...,...,...,...,...,...,...
103,10/21/2024,49,HERG,3,GATConv,SingleChannel,0.781973,0.770928,0.760036
104,10/19/2024,48,HERG,0,GATConv,Vanilla,0.679690,0.690392,0.665537
105,10/19/2024,49,HERG,1,GATConv,Vanilla,0.737555,0.736938,0.707690
106,10/21/2024,48,HERG,2,GATConv,Vanilla,0.670511,0.678791,0.654809


In [7]:
df_formatted = df.groupby(['dataset','layer_type','channel']).agg(mean_train_auroc=('Trained_explainations_train_rocauc', 'mean'),
                                                   std_train_auroc =('Trained_explainations_train_rocauc', 'std'),
                                                   mean_valid_auroc=('Trained_explainations_validation_rocauc', 'mean'),
                                                   std_valid_auroc =('Trained_explainations_validation_rocauc', 'std'),
                                                   mean_test_auroc=('Trained_explainations_test_rocauc', 'mean'),
                                                   std_test_auroc =('Trained_explainations_test_rocauc', 'std')).reset_index()

In [9]:
df_formatted['train_mean_std_format'] = df_formatted.apply(lambda row: f"{row['mean_train_auroc']:.3f} ± {row['std_train_auroc']:.2f}", axis=1)
df_formatted['valid_mean_std_format'] = df_formatted.apply(lambda row: f"{row['mean_valid_auroc']:.3f} ± {row['std_valid_auroc']:.2f}", axis=1)
df_formatted['test_mean_std_format'] = df_formatted.apply(lambda row: f"{row['mean_test_auroc']:.3f} ± {row['std_test_auroc']:.2f}", axis=1)

df_formatted.to_csv('new_FORMATTED_EXPERIMENT_RESULTS.csv', index = False)