In [1]:
import pandas as pd
import json
import os
import numpy as np
import time

In [2]:
'''
Change result dict when you run new experiments 

folder - Enter list of folders to check, each folder will be traversed to check if file exists in folder
seed
arch
size_reg
dataset

**IMPORTANT** : Change EXPT_BASE_NAME if you update the folder name to where your results are stored


results_dict = {
    
    'folder' : [38,39,40,41],
    'seed' : [2,3],
    'arch' : ['GINConv','GCNConv','GATConv'],
    'size_reg' : [0,0.001,0.1],
    'dataset'  : {'BBBP':'BBBP','MUTAG':'Mutagenicity','HERG':'hERG'}
}


results_dict = {
    
    'folder' : [27,29,30,31,32,33,34,35],
    'seed' : [0,1],
    'arch' : ['GINConv','GCNConv','GATConv'],
    'size_reg' : [0,0.001,0.1],
    'dataset'  : {'BBBP':'BBBP','MUTAG':'Mutagenicity','HERG':'hERG'}
}


results_dict = {
    
    'folder' : [42,43],
    'seed' : [2,3],
    'arch' : ['GINConv','GCNConv','GATConv'],
    'channel' : ['Vanilla'],
    'dataset'  : {'BBBP':'BBBP','MUTAG':'Mutagenicity'}
}

'''

expt_dict = [
    
    {"dataset":"HERG", "seed_info" : {1:49,3:49,0:48,2:48}},
    {"dataset":"MUTAG", "seed_info" : {1:47,3:47,0:46,2:46}},
    {"dataset":"BBBP", "seed_info" : {1:47,3:47,0:46,2:46}}]

dataset_dict = {'BBBP':'BBBP','MUTAG':'Mutagenicity','HERG':'hERG'}
#dataset_dict = {'BBBP':'BBBP'}

def return_expt_base_name(seed,layer_type,channel,folder,dataset):

    BASE_PATH = f'../Cluster_JOBS/LoG_plot/{folder}/EXPT-32A{dataset}-{seed}-{layer_type}-{channel}'
    return BASE_PATH


result_path = 'EXPERIMENT_RESULTS_QUADS.csv'

In [3]:
#check if main csv exists
if os.path.exists(result_path):
    all_results = pd.read_csv(result_path)

In [4]:
def return_quadrant_of_motif(x,y):
    
    if x>0.5 and y>0.5:
        return 1
    if x<0.5 and y>0.5:
        return 2
    if x<0.5 and y<0.5:
        return 3
    if x>0.5 and y<0.5:
        return 4

In [5]:
result_list = []

print(f'Adding the Following Config to Final Results, copy them over to add to the plots pdf')

for dataset in dataset_dict.keys():
    #for layer_type in ["GATConv"]:
    for layer_type in ["GINConv", "GCNConv", "GATConv"]:
        for channel in ["DualParam"]:
            for seed in [0,1,2,3]:
                
                for folder_dict in expt_dict:
                    if folder_dict['dataset'] == dataset:
                        folder = folder_dict['seed_info'][seed]
                
                EXPT_PATH = return_expt_base_name(seed,layer_type,channel,folder,dataset)
                                    
                found = True

                if os.path.exists(EXPT_PATH):

                    try:

                        #Recreate loss metrics
                        logit_val = pd.read_csv(EXPT_PATH+f'/{dataset_dict[dataset]}_explanation_result_with_validation.csv')
                        logit_test = pd.read_csv(EXPT_PATH+f'/{dataset_dict[dataset]}_explanation_result_with_test.csv')
                    except Exception as e:
                        print(f"error when trying to validation motif logit differences: {e}")
                        found = False
                    
                    
                    dist_type = logit_val.copy(deep = True)
                    dist_type = dist_type.groupby('motif_id').first().reset_index()
                    
                    y_equals_x = np.sum(abs(dist_type['sigmoid_importance_for_class_0'] - dist_type['sigmoid_importance_for_class_1']).tolist())
                    y_equals_minus_x = np.sum(abs(dist_type['sigmoid_importance_for_class_1'] + dist_type['sigmoid_importance_for_class_0']))
                    
                    logit_val['logit_diff_class_0'] = logit_val['new_logit_class_0'] - logit_val['original_logit_class_0']
                    logit_val['logit_diff_class_1'] = logit_val['new_logit_class_1'] - logit_val['original_logit_class_1']
                    logit_val['quadrant'] = logit_val.apply(lambda x : return_quadrant_of_motif(x['sigmoid_importance_for_class_0'],x['sigmoid_importance_for_class_1']), axis = 1)


                    logit_val = logit_val.groupby('quadrant').agg(
                                                    mean_logit_diff_class_0   = ('logit_diff_class_0', 'mean'),
                                                    mean_logit_diff_class_1   = ('logit_diff_class_1', 'mean')).reset_index()
                    
                    logit_val['folder']        = folder
                    logit_val['seed']          = seed
                    logit_val['dataset']       = dataset
                    logit_val['layer_type']    = layer_type
                    logit_val['channel']       = channel
                    logit_val['y_equals_x'] = y_equals_x
                    logit_val['y_equals_minus_x'] = y_equals_minus_x
                    
                    
                    result_list.append(logit_val)
                    
final_result = pd.concat(result_list, axis = 0)

Adding the Following Config to Final Results, copy them over to add to the plots pdf


In [6]:
final_result.pivot_table(index = ['folder','seed','dataset','layer_type','channel','y_equals_x','y_equals_minus_x'], columns = ['quadrant'],values = ['mean_logit_diff_class_0','mean_logit_diff_class_1']).reset_index().to_csv('quadrant_wise_results.csv')