In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import torch
import pickle
import glob
import os.path
import pandas as pd

import qgrid
import core.train_engine
from core.utils_ipynb import read_exp_result_files


def get_keychain_value_iter(d, key_chain=None):
    key_chain = [] if key_chain is None else list(key_chain).copy()       
    
    if not isinstance(d, dict):
        
        yield tuple(key_chain), d
    else:
        for k, v in d.items():
            yield from get_keychain_value_iter(v, key_chain + [k])
            
def get_keychain_value(d, key_chain):
    
    try:
        for k in key_chain:
            d = d[k]
            
    except Exception as ex:
        raise KeyError() from ex
        
    return d

In [3]:
training_cfg = {
    'lr': float, 
    'lr_drop_fact': float, 
    'num_epochs': int,
    'epoch_step': int,
    'batch_size': int,
    'weight_decay': float,
    'validation_ratio': float, 
}

model_cfg_meta = {
    'model_type': 'PershomModel',
    'model_kwargs': {
        'use_sup_lvlset_filt': bool,
        'filtration_kwargs': {
            'use_node_deg': bool,
            'use_node_lab': bool,
            'num_gin': int,
            'hidden_dim': int, 
            'use_mlp': bool
        }, 
        'classifier_kwargs': {
            'num_struct_elem': int
        }
    }
}

exp_cfg_meta = {
    'dataset_name': str, 
    'training': training_cfg, 
    'model': model_cfg_meta
}

exp_res_meta = {
    'exp_cfg': exp_cfg_meta, 
    'cv_test_acc': list, 
    'cv_val_acc': list, 
    'cv_indices_trn_tst_val': list, 
    'cv_epoch_loss': list,
    'start_time': list, 
    'id': str    
}

In [4]:
kc = {k: k[-1] for k, v in list(get_keychain_value_iter(core.train_engine.__exp_res_meta))}
kc

{('exp_cfg', 'dataset_name'): 'dataset_name',
 ('exp_cfg', 'training', 'lr'): 'lr',
 ('exp_cfg', 'training', 'lr_drop_fact'): 'lr_drop_fact',
 ('exp_cfg', 'training', 'num_epochs'): 'num_epochs',
 ('exp_cfg', 'training', 'epoch_step'): 'epoch_step',
 ('exp_cfg', 'training', 'batch_size'): 'batch_size',
 ('exp_cfg', 'training', 'weight_decay'): 'weight_decay',
 ('exp_cfg', 'training', 'validation_ratio'): 'validation_ratio',
 ('exp_cfg', 'model', 'model_type'): 'model_type',
 ('exp_cfg',
  'model',
  'use_super_level_set_filtration'): 'use_super_level_set_filtration',
 ('exp_cfg', 'model', 'use_node_degree'): 'use_node_degree',
 ('exp_cfg',
  'model',
  'set_node_degree_uninformative'): 'set_node_degree_uninformative',
 ('exp_cfg', 'model', 'pooling_strategy'): 'pooling_strategy',
 ('exp_cfg', 'model', 'use_node_label'): 'use_node_label',
 ('exp_cfg', 'model', 'gin_number'): 'gin_number',
 ('exp_cfg', 'model', 'gin_dimension'): 'gin_dimension',
 ('exp_cfg', 'model', 'gin_mlp_type'): 'gi

In [5]:
COL_NAMES = {
    ('exp_cfg', 'dataset_name'): 'dataset_name',
    #('exp_cfg', 'tag'): 'tag', 
#     ('exp_cfg', 'training', 'lr'): 'lr',
#     ('exp_cfg', 'training', 'lr_drop_fact'): 'lr_drop_fact',
#     ('exp_cfg', 'training', 'num_epochs'): 'num_epochs',
#     ('exp_cfg', 'training', 'epoch_step'): 'epoch_step',
    ('exp_cfg', 'training', 'batch_size'): 'batch_size',
#     ('exp_cfg', 'training', 'weight_decay'): 'weight_decay',
#     ('exp_cfg', 'training', 'validation_ratio'): 'validation_ratio',
    ('exp_cfg', 'model', 'model_type'): 'model_type',
    ('exp_cfg', 'model', 'use_super_level_set_filtration'): 'use_super_level_set_filtration',
    ('exp_cfg', 'model', 'use_node_degree'): 'use_node_degree',
    ('exp_cfg', 'model', 'use_node_label'): 'use_node_label',
    ('exp_cfg', 'model', 'gin_number'): 'gin_number',
    ('exp_cfg', 'model', 'gin_dimension'): 'gin_dimension',
    #('exp_cfg', 'model', 'gin_mlp_type'): 'gin_mlp_type',
    ('exp_cfg', 'model', 'set_node_degree_uninformative'): 'set_node_degree_uninformative',
    ('exp_cfg', 'model', 'num_struct_elements'): 'num_struct_elements',
    ('exp_cfg', 'model', 'drop_out'): 'drop_out',
    ('exp_cfg', 'model', 'pooling_strategy'): 'pooling_strategy',
#     ('cv_test_acc',): 'cv_test_acc',
#     ('cv_val_acc',): 'cv_val_acc',
#     ('cv_indices_trn_tst_val',): 'cv_indices_trn_tst_val',
#     ('cv_epoch_loss',): 'cv_epoch_loss',
#     ('start_time',): 'start_time',
#     ('id',): 'id',
    ('finished_training',): 'finished_training'
}

In [6]:
def pd_frame(path):
    
    f = read_exp_result_files(path)
    
    data_frames = []
    for i, res in enumerate(f):
        row = {}
        
        cv_acc_last = [x[-1] for x in res['cv_test_acc'] if len(x) > 0]
        
        row['acc_last_mean'] = np.mean(cv_acc_last)
        row['acc_last_std'] = np.std(cv_acc_last)
        
        cv_acc_validated = []
        for test, val in zip(res['cv_test_acc'], res['cv_val_acc']):
            if not len(test) == res['exp_cfg']['training']['num_epochs']:
                continue
            n = len(test)//2
            test = torch.tensor(test[n:])
            val = torch.tensor(val[n:])
            #test = torch.tensor(test)
            #val = torch.tensor(val)
            
            
            _, i_max = val.max(0)
            cv_acc_validated.append(test[i_max].item())
            
        row['acc_val_mean'] = np.mean(cv_acc_validated)
        row['acc_val_std'] = np.std(cv_acc_validated)
        
        
        cv_folds_available = sum([1 for cv in res['cv_test_acc'] if len(cv) == res['exp_cfg']['training']['num_epochs']])
        row['cv_folds_available'] = cv_folds_available
        
        
        for k, v in COL_NAMES.items():
            try:
                row[v] = get_keychain_value(res, k)
            except KeyError:
                pass

        row = pd.DataFrame(row, index=[i])
        
        data_frames.append(row)
        
    return pd.concat(data_frames, sort=True), f

In [7]:
path = './results/'
RES, RES_FILES = pd_frame(path)
qgrid_widget = qgrid.show_grid(RES, show_toolbar=True)

In [8]:
qgrid_widget

QgridWidget(grid_options={'fullWidthRows': True, 'syncColumnCellResize': True, 'forceFitColumns': True, 'defau…

In [9]:
qgrid_widget.get_changed_df()

Unnamed: 0,acc_last_mean,acc_last_std,acc_val_mean,acc_val_std,batch_size,cv_folds_available,dataset_name,drop_out,finished_training,gin_dimension,gin_number,model_type,num_struct_elements,pooling_strategy,set_node_degree_uninformative,use_node_degree,use_node_label,use_super_level_set_filtration
0,73.313224,1.433772,72.771075,2.718653,64,10,PROTEINS,0.0,True,64.0,1.0,PershomLearnedFilt,100.0,,False,True,False,True
1,50.066667,3.217314,50.666667,2.616189,64,10,IMDB-MULTI,0.5,True,64.0,1.0,GIN,,,False,True,False,
2,49.333333,4.381780,50.600000,3.032418,64,10,IMDB-MULTI,0.5,True,64.0,5.0,GIN,,,False,True,False,
3,72.600000,3.231099,73.600000,3.261901,64,10,IMDB-BINARY,0.5,True,64.0,1.0,GIN,,,False,True,False,
4,74.900000,4.437342,73.100000,3.176476,64,10,IMDB-BINARY,0.5,True,64.0,5.0,GIN,,,False,True,False,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
154,65.109775,2.467928,65.134578,2.239018,32,10,NCI1,0.5,True,64.0,,SimpleNNBaseline,,sum,False,False,True,
155,66.203016,2.388689,66.252386,2.540845,32,10,NCI1,0.0,True,64.0,,SimpleNNBaseline,,sum,False,True,False,
156,65.839116,2.867227,65.498125,3.192981,32,10,NCI1,0.5,True,64.0,,SimpleNNBaseline,,sum,False,True,False,
157,76.800000,3.565109,76.900000,4.259108,32,10,REDDIT-BINARY,0.5,True,64.0,1.0,GIN,,sort,True,True,False,


The following cells contain some utility for messing around with results, i.e., deleting etc. 

In [None]:
# files = glob.glob(os.path.join(path, "*.pickle"))
# for f in files:     
#     with open(f, 'rb') as fid:
#         res = pickle.load(fid)
#     finished_training = (len(res['cv_test_acc'][-1]) == res['exp_cfg']['training']['num_epochs'])
#     res['finished_training'] = finished_training
    
#     with open(f, 'wb') as fid:
#         pickle.dump(obj=res, file=fid)

In [68]:
with open('results/errors.pickle', 'br') as fid:
    errors = pickle.load(fid)

for e in errors:
    print(e)
    print(e.exp_cfg)
    print("=============")

CUDA out of memory. Tried to allocate 1.35 GiB (GPU 0; 10.92 GiB total capacity; 9.49 GiB already allocated; 903.50 MiB free; 33.32 MiB cached)
{'dataset_name': 'COLLAB', 'training': {'lr': 0.01, 'lr_drop_fact': 0.5, 'num_epochs': 100, 'epoch_step': 20, 'batch_size': 64, 'weight_decay': 1e-05, 'validation_ratio': 0.1}, 'model': {'model_type': 'PershomLearnedFilt', 'use_super_level_set_filtration': True, 'use_node_degree': True, 'use_node_label': False, 'gin_number': 1, 'gin_dimension': 64, 'gin_mlp_type': 'lin_bn_lrelu_lin', 'num_struct_elements': 100, 'cls_hidden_dimension': 64, 'drop_out': 0.0}, 'tag': '1.0'}


In [10]:
a, b = RES_FILES[110], RES_FILES[56]

In [11]:
for k in a['exp_cfg']:
    print(k, a['exp_cfg'][k] == b['exp_cfg'][k])

dataset_name False
training True
model True
tag True


In [14]:
import json
with open('results_rebuttal/exp_cfgs.json', 'w') as fid:
    json.dump([a['exp_cfg'], b['exp_cfg']], fid)

In [46]:
model_a = a['exp_cfg']['model']
model_b b['exp_cfg']['model']

{'model_type': 'PershomLearnedFilt',
 'use_super_level_set_filtration': True,
 'use_node_degree': True,
 'use_node_label': False,
 'gin_number': 1,
 'gin_dimension': 64,
 'gin_mlp_type': 'lin_bn_lrelu_lin',
 'num_struct_elements': 100,
 'cls_hidden_dimension': 64,
 'drop_out': 0.0,
 'set_node_degree_uninformative': False}

{'model_type': 'PershomLearnedFilt',
 'use_super_level_set_filtration': True,
 'use_node_degree': True,
 'set_node_degree_uninformative': True,
 'use_node_label': False,
 'gin_number': 1,
 'gin_dimension': 64,
 'gin_mlp_type': 'lin_bn_lrelu_lin',
 'num_struct_elements': 100,
 'cls_hidden_dimension': 64,
 'drop_out': 0.0}