In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os.path as osp
import itertools
import copy
import uuid
import pickle


import torch
import torch.nn.functional as F
import torch_geometric
import numpy as np

import torch.nn as nn
import torch.optim as optim

import chofer_torchex.pershom as pershom

from torch.nn import Sequential, Linear, ReLU
from torch.optim.lr_scheduler import MultiStepLR

from torch_geometric.datasets import TUDataset
from torch_geometric.nn import GINConv, global_add_pool

from chofer_torchex import pershom
ph = pershom.pershom_backend.__C.VertFiltCompCuda__vert_filt_persistence_batch

from chofer_torchex.nn import SLayerRationalHat
from collections import defaultdict, Counter

device = 'cuda:1'

import core.model
from core.data import dataset_factory
from core.utils import my_collate, evaluate
from core.train_engine import *

from tensorboardX import SummaryWriter



Using /tmp/chofer_torch_extensions as PyTorch extensions root...
Detected CUDA files, patching ldflags
Emitting ninja build file /tmp/chofer_torch_extensions/pershom_cuda_ext/build.ninja...
Building extension module pershom_cuda_ext...
Loading extension module pershom_cuda_ext...


In [3]:
dataset_names = [
        'REDDIT-BINARY',
        'REDDIT-MULTI-5K',
#         'COLLAB',
        #'IMDB-MULTI',
        #'IMDB-BINARY',
         #'ENZYMES',
         #'PTC_PGNN',
         #'PTC_FM',
         #'PTC_FR',
         #'PTC_MM',
         #'PTC_MR',
         #'PROTEINS',
         #'DD',
         #'NCI1',
         #'MUTAG'
]

dataset_has_node_lab = {n: dataset_factory(n, verbose=False).num_node_lab is not None for n in dataset_names}
dataset_has_node_lab



{'REDDIT-BINARY': False, 'REDDIT-MULTI-5K': False}

In [4]:
training_cfg = {
    'lr': 0.01, 
    'lr_drop_fact': 0.5, 
    'num_epochs': 100,
    'epoch_step': 20,
    'batch_size': 32,
    'weight_decay': 10e-06,
    'validation_ratio': 0.1
}
training_cfgs = [training_cfg]

In [5]:
# Pershom rigid filtration ...
proto = {
    'model_type': 'PershomRigidDegreeFilt',
    'use_super_level_set_filtration': None, 
    'num_struct_elements': 100, 
    'cls_hidden_dimension': 64, 
    'drop_out': 0.0
}
model_cfgs_PershomRigidDegreeFilt = []
#for b in [False, True]:
for b in [True]:
    tmp = copy.deepcopy(proto)
    
    tmp['use_super_level_set_filtration'] = b
    
    model_cfgs_PershomRigidDegreeFilt.append(tmp)
    
len(model_cfgs_PershomRigidDegreeFilt)

1

In [6]:
# Pershom learnt filtration ...
proto = {
    'model_type': 'PershomLearnedFilt',
    'use_super_level_set_filtration': None, 
    'use_node_degree': None, 
    'set_node_degree_uninformative': True, 
    'use_node_label': None, 
    'gin_number': 1, 
    'gin_dimension': 64,
    'gin_mlp_type': 'lin_bn_lrelu_lin', 
    'num_struct_elements': 100, 
    'cls_hidden_dimension': 64, 
    'drop_out': 0.0   
}
model_cfgs_PershomLearnedFilt = []

B = [(True, True), (False, True), (True, False)]

for (a, b), c, d, e in itertools.product(B, [True], [64], [1]):
    tmp = copy.deepcopy(proto)

    tmp['use_node_degree'] = a
    tmp['use_node_label']  = b
    tmp['use_super_level_set_filtration'] = c    

    tmp['gin_dimension'] = d
    tmp['gin_number'] = e

    model_cfgs_PershomLearnedFilt.append(tmp)
    
len(model_cfgs_PershomLearnedFilt)

3

In [7]:
# GIN ... 
proto = {
    'model_type': 'GIN',
    'use_node_degree': None, 
    'use_node_label': None, 
    'gin_number': None, 
    'gin_dimension': 64,
    'gin_mlp_type': 'lin_bn_lrelu_lin', 
    'cls_hidden_dimension': 64, 
    'set_node_degree_uninformative': None,
    'pooling_strategy': 'sort',
    'drop_out': 0.5 
}
model_cfgs_GIN = []

B = [(True, True), (False, True), (True, False)]

for (a, b), c, d in itertools.product(B, [1], [True]):
    tmp = copy.deepcopy(proto)

    tmp['use_node_degree'] = a
    tmp['use_node_label'] = b
    tmp['gin_number'] = c
    tmp['set_node_degree_uninformative'] = d

    model_cfgs_GIN.append(tmp)
    
len(model_cfgs_GIN)

3

In [10]:
# SimpleNNBaseline ... 
proto = {
    'model_type': 'SimpleNNBaseline',
    'use_node_degree': None, 
    'use_node_label': None, 
    'gin_dimension': 64,
    'gin_mlp_type': 'lin_bn_lrelu_lin', 
    'cls_hidden_dimension': 64, 
    'set_node_degree_uninformative': None,
    'pooling_strategy': 'sum',
    'drop_out': None 
}
model_cfgs_SimpleNNBaseline = []

B = [(True, True), (False, True), (True, False)]

for (a, b), c, d in itertools.product(B, [False], [0.0, 0.5]):
    tmp = copy.deepcopy(proto)

    tmp['use_node_degree'] = a
    tmp['use_node_label'] = b
    tmp['set_node_degree_uninformative'] = c
    tmp['drop_out'] = d

    model_cfgs_SimpleNNBaseline.append(tmp)
    
len(model_cfgs_SimpleNNBaseline)

6

In [8]:
def combine(dataset_names, training_cfgs, model_cfgs, tag=""):
    exp_cfgs = []
    continued = 0
    for a, b, c in itertools.product(dataset_names, training_cfgs, model_cfgs):

        # filter out datasets which have no node labels
        ds_has_node_lab = dataset_has_node_lab[a]

        if 'use_node_label' in c:
            use_node_lab = c['use_node_label']

            if (not ds_has_node_lab) and use_node_lab:
#                 print(a, c['model_type'])
                continue

        tmp = {
            'dataset_name': a, 
            'training': b, 
            'model': c, 
            'tag': tag
        }
        exp_cfgs.append(tmp)
        
    return exp_cfgs

def write_file(dataset_names, training_cfgs, model_cfgs, output_dir, tag="", file_name=None):
    exp_cfgs = combine(dataset_names, training_cfgs, model_cfgs, tag=tag)
    if file_name is None:
        file_name = "exp_cfgs__" + "_".join(dataset_names) + ".json"
        
    with open(os.path.join(output_dir, file_name), 'w') as fid:
        json.dump(exp_cfgs, fid)
        
    print('Num cfgs: ', len(exp_cfgs))

In [9]:
output_dir = '/home/pma/chofer/repositories/nips_2019_code/results'

In [10]:
write_file(dataset_names, 
           training_cfgs, 
           model_cfgs_GIN,
           output_dir, 
           file_name='exp_cfg_REDDIT_uninformative_SortPooling.json', 
           tag="1.0")

Num cfgs:  2


In [None]:
# exp_cfgs = combine(dataset_names, 
#                    training_cfgs,
#                    model_cfgs_PershomRigidDegreeFilt)
# print(len(exp_cfgs))
# with open(os.path.join(output_dir, 'exp_cfgs_rigid_degree_filtration.json'), 'w') as fid:
#     json.dump(exp_cfgs, fid)
    
    
# exp_cfgs = combine(dataset_names, 
#                    training_cfgs,
#                    model_cfgs_PershomLearnedFilt)
# print(len(exp_cfgs))
# with open(os.path.join(output_dir, 'exp_cfgs_learnt_filtration.json'), 'w') as fid:
#     json.dump(exp_cfgs, fid)

    
# exp_cfgs = combine(dataset_names, 
#                    training_cfgs,
#                    model_cfgs_GIN)
# print(len(exp_cfgs))
# with open(os.path.join(output_dir, 'exp_cfgs_learnt_filtration.json'), 'w') as fid:
#     json.dump(exp_cfgs, fid)

In [None]:
cfgs = combine(dataset_names, training_cfgs, model_cfgs_PershomRigidDegreeFilt)
print(len(cfgs))

In [None]:
# for c in cfgs:
#     experiment(c, device='cuda:3')