In [1]:
import json
import numpy as np

js_list = []
jsons_files = ['best_parameters_cell.json', 'best_parameters_simplicial.json', 'best_parameters_graph.json', 'best_parameters_hypergraph.json']
for file in jsons_files:
    with open(file, 'r') as fp:
        js_list.append(json.load(fp))


final_json = {}
for js in js_list:
    for key in js.keys():
        if key not in final_json:
            final_json[key] = js[key]
        else:
            final_json[key].update(js[key])


pairs = []

for dataset in final_json.keys():
    for model_name in final_json[dataset].keys():
        if model_name == 'cwn_dcm':
            pairs.append((dataset, model_name))
            
for dataset, model_name in pairs:
    poped = final_json[dataset].pop(model_name)
    final_json[dataset]['cccn'] = poped
            

In [2]:
dataset_names = final_json.keys()

model2domain = {
    'ccxn': 'cell',
    'cwn': 'cell',
    'cccn': 'cell',
    'scn': 'simplicial',
    'sccn': 'simplicial',
    'sccnn_custom': 'simplicial',
    'gcn': 'graph',
    'gin': 'graph',
    'gat': 'graph',
    'unignn2': 'hypergraph',
    'edgnn': 'hypergraph',
    'allsettransformer': 'hypergraph'
}

dataset2task = {
    'ZINC': 'graph',
    'REDDIT-BINARY': 'graph',
    'IMDB-MULTI': 'graph',
    'IMDB-BINARY': 'graph',
    'PROTEINS': 'graph',
    'MUTAG': 'graph',
    'NCI1': 'graph',
    'NCI109': 'graph',
    'citeseer': 'node',
    'PubMed': 'node',
    'Cora': 'node',
    'roman_empire': 'node',
    'minesweeper': 'node',
    'amazon_ratings': 'node',
    'tolokers': 'node',
    'US-county-demos-UnemploymentRate': 'node',
    'US-county-demos-BachelorRate': 'node',
    'US-county-demos-DeathRate': 'node',
    'US-county-demos-BirthRate': 'node',
    'US-county-demos-MigraRate': 'node',
    'US-county-demos-MedianIncome': 'node',
    'US-county-demos-Election': 'node'
}

# task2trainer={
#     'node' : 
#         ['trainer.max_epochs=1000', 
#         'trainer.min_epochs=50',
#         'trainer.check_val_every_n_epoch=1', 
#         'callbacks.early_stopping.patience=50'
#         ],

#     'graph': 
#         [
#         'trainer.max_epochs=500', 
#         'trainer.min_epochs=50',
#         'trainer.check_val_every_n_epoch=5', 
#         'callbacks.early_stopping.patience=10'
#         ]
# }

task2trainer={
    'node' : 
        ['trainer.max_epochs=1', 
        'trainer.min_epochs=1',
        'trainer.check_val_every_n_epoch=1', 
        'callbacks.early_stopping.patience=1'
        ],

    'graph': 
        [
        'trainer.max_epochs=1', 
        'trainer.min_epochs=1',
        'trainer.check_val_every_n_epoch=1', 
        'callbacks.early_stopping.patience=1'
        ]
}

additional_strs = {
    'cell': 'transforms.graph2cell_lifting.max_cell_length=10',
    'ZINC':['callbacks.early_stopping.min_delta=0.005', 'transforms.one_hot_node_degree_features.degrees_fields=x', 'seed=42'], #,3,5,23,150'],
    'US-county-demos-Election':['dataset.loader.parameters.task_variable=Election', 'dataset.loader.parameters.year=2012'],
    'US-county-demos-MedianIncome':['dataset.loader.parameters.task_variable=MedianIncome','dataset.loader.parameters.year=2012'],
    'US-county-demos-MigraRate':['dataset.loader.parameters.task_variable=MigraRate', 'dataset.loader.parameters.year=2012'],
    'US-county-demos-BirthRate':['dataset.loader.parameters.task_variable=BirthRate', 'dataset.loader.parameters.year=2012'],
    'US-county-demos-DeathRate':['dataset.loader.parameters.task_variable=DeathRate', 'dataset.loader.parameters.year=2012'],
    'US-county-demos-BachelorRate':['dataset.loader.parameters.task_variable=BachelorRate', 'dataset.loader.parameters.year=2012'],
    'US-county-demos-UnemploymentRate':['dataset.loader.parameters.task_variable=UnemploymentRate', 'dataset.loader.parameters.year=2012']
    
}


rename_datasets = {
    'Cora':'cocitation_cora',
    'citeseer':'cocitation_citeseer',
    'PubMed':'cocitation_pubmed',
    'US-county-demos-UnemploymentRate':'US-county-demos',
    'US-county-demos-BachelorRate':'US-county-demos',
    'US-county-demos-BirthRate':'US-county-demos',
    'US-county-demos-DeathRate':'US-county-demos',
    'US-county-demos-MedianIncome':'US-county-demos',
    'US-county-demos-MigraRate':'US-county-demos',
    'US-county-demos-Election':'US-county-demos',
    'ZINC':'ZINC',
}

In [3]:
def is_nan(value):
    try:
        return np.isnan(value)
    except TypeError:
        return False



execute_strings = []

for dataset in dataset_names:
    # Skip questions dataset
    if dataset == 'questions':
        continue
    
    for model_name in final_json[dataset].keys():
        
        python_string = f"python -m topobenchmarkx model={model2domain[model_name]}/{model_name} dataset=graph/{rename_datasets.get(dataset, dataset)}"
        
        # Assign best parameters
        for key, value in final_json[dataset][model_name].items():
            if not is_nan(value):
                if key in ['model.feature_encoder.out_channels', 'model.backbone.num_layers', 'model.backbone.n_layers', 'model.backbone.All_num_layers']:
                    python_string += f" {key}={int(value)}"
                
                elif key == 'model.optimizer.lr':
                    python_string += f" optimizer.parameters.lr={value}"
                
                elif key == 'dataset.parameters.batch_size':    
                    python_string += f" dataset.dataloader_params.batch_size={int(value)}"
                    
                elif key == 'dataset.transforms.graph2simplicial_lifting.signed':
                    python_string += f" transforms.graph2simplicial_lifting.signed={value}"
                else:
                    python_string += f" {key}={value}"
        
        # Workout exeptions
        if additional_strs.get(model2domain[model_name], None):
            python_string += f" {additional_strs[model2domain[model_name]]}"
        
        # Workout trainer
        if additional_strs.get(dataset, None):
            for add in additional_strs.get(dataset, None):
                python_string += f" {add}"
        
        if dataset != "ZINC":
            python_string += f" dataset.split_params.data_seed=0" #0,3,5,7,9"

        # Add task specific trainer parameters
       
        for add in task2trainer[dataset2task[dataset]]:
            python_string += f" {add}"

        # Add multirun    
        python_string += f" logger.wandb.project=TopoBenchmarkX_main --multirun"

        execute_strings.append(python_string)
        


In [4]:
execute_strings.sort()

In [5]:
for i in execute_strings:
    print(f"'{i}'")

'python -m topobenchmarkx model=cell/cccn dataset=graph/IMDB-BINARY optimizer.parameters.lr=0.001 model.feature_encoder.out_channels=128 model.backbone.n_layers=3 model.readout.readout_name=NoReadOut model.feature_encoder.proj_dropout=0.5 dataset.dataloader_params.batch_size=256 transforms.graph2cell_lifting.max_cell_length=10 dataset.split_params.data_seed=0 trainer.max_epochs=500 trainer.min_epochs=50 trainer.check_val_every_n_epoch=5 callbacks.early_stopping.patience=10 logger.wandb.project=TopoBenchmarkX_main --multirun'
'python -m topobenchmarkx model=cell/cccn dataset=graph/IMDB-MULTI optimizer.parameters.lr=0.01 model.feature_encoder.out_channels=64 model.backbone.n_layers=1 model.readout.readout_name=PropagateSignalDown model.feature_encoder.proj_dropout=0.5 dataset.dataloader_params.batch_size=256 transforms.graph2cell_lifting.max_cell_length=10 dataset.split_params.data_seed=0 trainer.max_epochs=500 trainer.min_epochs=50 trainer.check_val_every_n_epoch=5 callbacks.early_stopp

In [55]:
a = [i for i in execute_strings if 'cwn' in i]
for row in a:
    print(f"'{row}'")

'python -m topobenchmarkx model=cell/cwn dataset=graph/tolokers optimizer.parameters.lr=0.001 model.feature_encoder.out_channels=32 model.backbone.n_layers=1 model.readout.readout_name=PropagateSignalDown model.feature_encoder.proj_dropout=0.25 dataset.dataloader_params.batch_size=1 transforms.graph2cell_lifting.max_cell_length=10 transforms.one_hot_node_degree_features.degrees_fields=x dataset.split_params.data_seed=0 trainer.max_epochs=1000 trainer.min_epochs=50 trainer.check_val_every_n_epoch=1 callbacks.early_stopping.patience=50 logger.wandb.project=TopoBenchmarkX_main --multirun'
'python -m topobenchmarkx model=cell/cwn dataset=graph/ZINC optimizer.parameters.lr=0.001 model.feature_encoder.out_channels=64 model.backbone.n_layers=2 model.readout.readout_name=PropagateSignalDown model.feature_encoder.proj_dropout=0.25 dataset.dataloader_params.batch_size=128 transforms.graph2cell_lifting.max_cell_length=10 transforms.one_hot_node_degree_features.degrees_fields=x callbacks.early_sto

In [None]:
python -m topobenchmarkx model=cell/cwn dataset=graph/cocitation_cora optimizer.parameters.lr=0.001 model.feature_encoder.out_channels=64 model.backbone.n_layers=1 model.readout.readout_name=PropagateSignalDown model.feature_encoder.proj_dropout=0.5 dataset.parameters.batch_size=1 dataset.transforms.graph2cell_lifting.max_cell_length=10 dataset.parameters.data_seed=0 dataset.transforms.one_hot_node_degree_features.degrees_fields=x logger.wandb.project=TopoBenchmarkX_main --multirun