# Generate experiment configuration files

The purpos of this notebook is to generate configurations files. Those files are the input to the 
train script (`train.py`) 

In [5]:
import os.path as osp
import itertools
import copy
import json

from core.data import dataset_factory

In [6]:
# Check which datasets have nodelabels attached. This may take a while. 
dataset_names = [
        'REDDIT-BINARY',
#         'REDDIT-MULTI-5K',
#         'COLLAB',
        #'IMDB-MULTI',
        #'IMDB-BINARY',
         #'ENZYMES',
         #'PTC_PGNN',
         #'PTC_FM',
         #'PTC_FR',
         #'PTC_MM',
         #'PTC_MR',
         #'PROTEINS',
         #'DD',
         #'NCI1',
         #'MUTAG'
]

dataset_has_node_lab = {n: dataset_factory(n, verbose=False).num_node_lab is not None for n in dataset_names}
dataset_has_node_lab

{'REDDIT-BINARY': False}

### The optimization related part of the configuration...

In [7]:
training_cfg = {
    'lr': 0.01, 
    'lr_drop_fact': 0.5, 
    'num_epochs': 100,
    'epoch_step': 20,
    'batch_size': 32,
    'weight_decay': 10e-06,
    'validation_ratio': 0.1
}
training_cfgs = [training_cfg]

### The model related part of the configuration...

In [8]:
# Pershom rigid filtration ...
proto = {
    'model_type': 'PershomRigidDegreeFilt',
    'use_super_level_set_filtration': None, 
    'num_struct_elements': 100, 
    'cls_hidden_dimension': 64, 
    'drop_out': 0.0
}
model_cfgs_PershomRigidDegreeFilt = []
for b in [False, True]:
    tmp = copy.deepcopy(proto)
    
    tmp['use_super_level_set_filtration'] = b
    
    model_cfgs_PershomRigidDegreeFilt.append(tmp)
    
len(model_cfgs_PershomRigidDegreeFilt)

2

In [9]:
# Pershom learnt filtration ...
proto = {
    'model_type': 'PershomLearnedFilt',
    'use_super_level_set_filtration': None, 
    'use_node_degree': None, 
    'set_node_degree_uninformative': True, 
    'use_node_label': None, 
    'gin_number': 1, 
    'gin_dimension': 64,
    'gin_mlp_type': 'lin_bn_lrelu_lin', 
    'num_struct_elements': 100, 
    'cls_hidden_dimension': 64, 
    'drop_out': 0.0   
}
model_cfgs_PershomLearnedFilt = []

B = [(True, True), (False, True), (True, False)]

for (a, b), c, d, e in itertools.product(B, [True], [64], [1]):
    tmp = copy.deepcopy(proto)

    tmp['use_node_degree'] = a
    tmp['use_node_label']  = b
    tmp['use_super_level_set_filtration'] = c    

    tmp['gin_dimension'] = d
    tmp['gin_number'] = e

    model_cfgs_PershomLearnedFilt.append(tmp)
    
len(model_cfgs_PershomLearnedFilt)

3

In [10]:
# GIN ... 
proto = {
    'model_type': 'GIN',
    'use_node_degree': None, 
    'use_node_label': None, 
    'gin_number': None, 
    'gin_dimension': 64,
    'gin_mlp_type': 'lin_bn_lrelu_lin', 
    'cls_hidden_dimension': 64, 
    'set_node_degree_uninformative': None,
    'pooling_strategy': 'sort',
    'drop_out': 0.5 
}
model_cfgs_GIN = []

B = [(True, True), (False, True), (True, False)]

for (a, b), c, d in itertools.product(B, [1], [True]):
    tmp = copy.deepcopy(proto)

    tmp['use_node_degree'] = a
    tmp['use_node_label'] = b
    tmp['gin_number'] = c
    tmp['set_node_degree_uninformative'] = d

    model_cfgs_GIN.append(tmp)
    
len(model_cfgs_GIN)

3

In [11]:
# SimpleNNBaseline ... 
proto = {
    'model_type': 'SimpleNNBaseline',
    'use_node_degree': None, 
    'use_node_label': None, 
    'gin_dimension': 64,
    'gin_mlp_type': 'lin_bn_lrelu_lin', 
    'cls_hidden_dimension': 64, 
    'set_node_degree_uninformative': None,
    'pooling_strategy': 'sum',
    'drop_out': None 
}
model_cfgs_SimpleNNBaseline = []

B = [(True, True), (False, True), (True, False)]

for (a, b), c, d in itertools.product(B, [False], [0.0, 0.5]):
    tmp = copy.deepcopy(proto)

    tmp['use_node_degree'] = a
    tmp['use_node_label'] = b
    tmp['set_node_degree_uninformative'] = c
    tmp['drop_out'] = d

    model_cfgs_SimpleNNBaseline.append(tmp)
    
len(model_cfgs_SimpleNNBaseline)

6

### Now we combine those parts and write the cfg file ...

In [12]:
def combine(dataset_names, training_cfgs, model_cfgs, tag=""):
    exp_cfgs = []
    continued = 0
    for a, b, c in itertools.product(dataset_names, training_cfgs, model_cfgs):

        # filter out datasets which have no node labels
        ds_has_node_lab = dataset_has_node_lab[a]

        if 'use_node_label' in c:
            use_node_lab = c['use_node_label']

            if (not ds_has_node_lab) and use_node_lab:
#                 print(a, c['model_type'])
                continue

        tmp = {
            'dataset_name': a, 
            'training': b, 
            'model': c, 
            'tag': tag
        }
        exp_cfgs.append(tmp)
        
    return exp_cfgs

def write_file(dataset_names, training_cfgs, model_cfgs, output_dir, tag="", file_name=None):
    exp_cfgs = combine(dataset_names, training_cfgs, model_cfgs, tag=tag)
    if file_name is None:
        file_name = "exp_cfgs__" + "_".join(dataset_names) + ".json"
        
    with open(file_name, 'w') as fid:
        json.dump(exp_cfgs, fid)
        
    print('Num cfgs: ', len(exp_cfgs))

In [13]:
output_dir = 'results'

In [14]:
# Write cfg file for, e.g., learned filtration setup...
write_file(dataset_names, 
           training_cfgs, 
           model_cfgs_PershomLearnedFilt, 
           output_dir, 
           file_name='my_config.json', 
           tag="")

Num cfgs:  1
