In [1]:
%matplotlib notebook

In [9]:
import numpy as np
import matplotlib.pyplot as plt
import os.path as osp
import os
import json
from tqdm import tqdm
from collections import defaultdict
import seml
import pandas as pd
from pprint import pprint

In [3]:
collection = seml.database.get_collection('week1_no_model_logging')

In [4]:
results = [{'config' : r['config'], 'result' : r['result']} for r in collection.find()]

In [5]:
results_by_dataset_and_model_type = defaultdict(lambda: defaultdict(list))
for result in results:
    results_by_dataset_and_model_type[result['config']['data']['dataset']][result['config']['model']['model_type']].append(result)

In [6]:
best_scores = defaultdict(dict)
best_configs = defaultdict(dict)
for dataset, dresults in results_by_dataset_and_model_type.items():
    for model_type, mresults in dresults.items():
        best_result = max(mresults, key=lambda result: result['result']['accuracy_mean']['value'])
        score = best_result['result']['accuracy_mean']['value']
        best_scores[dataset][model_type] = score
        best_configs[dataset][model_type] = best_result['config']
        # print(f'\tModel {model_type} ({score * 100:.2f}%) : {best_result["config"]["model"]}')

In [7]:
df = pd.DataFrame(best_scores)
df

Unnamed: 0,cora_ml,citeseer,pubmed
gat,0.734713,0.702254,0.705408
gcn,0.838254,0.687429,0.849486
gin,0.796866,0.646222,0.815742
sage,0.786794,0.680794,0.834072
mlp,0.651962,0.565206,0.827582
appnp,0.84622,0.699333,0.864565


In [10]:
pprint(best_configs)

defaultdict(<class 'dict'>,
            {'citeseer': {'appnp': {'data': {'dataset': 'citeseer',
                                             'num_dataset_splits': 10,
                                             'test_portion': 0.6,
                                             'test_portion_fixed': 0.2,
                                             'train_portion': 0.05,
                                             'val_portion': 0.15},
                                    'db_collection': 'week1_no_model_logging',
                                    'model': {'activation': 'leaky_relu',
                                              'diffusion_iterations': 10,
                                              'hidden_sizes': [32],
                                              'leaky_relu_slope': 0.01,
                                              'model_type': 'appnp',
                                              'num_initializations': 10,
                                              'tele

In [26]:
for dataset, dr in best_configs.items():
    for cnf in dr.values():
        model_type = cnf['model']['model_type']
        print(f'{dataset}-{model_type}:')
        print(f'  fixed:')
        print(f'    data.dataset: {dataset}')
        print(f'    model.model_type: {model_type}')
        print(f'    training.learning_rate: {cnf["optimization"]["learning_rate"]:.3f}')
        print(f'    model.hidden_sizes: {cnf["model"]["hidden_sizes"]}')
        if model_type == 'gat':
            print(f'    model.num_heads: {cnf["model"]["num_heads"]}')
        elif model_type == 'appnp':
            print(f'    model.teleportation_probability: {cnf["model"]["teleportation_probability"]}')
            print(f'    model.diffusion_iterations: {cnf["model"]["diffusion_iterations"]}')
        print('\n')

cora_ml-gat:
  fixed:
    data.dataset: cora_ml
    model.model_type: gat
    training.learning_rate: 0.003
    model.hidden_sizes: [64, 64]
    model.num_heads: 8


cora_ml-gcn:
  fixed:
    data.dataset: cora_ml
    model.model_type: gcn
    training.learning_rate: 0.003
    model.hidden_sizes: [64]


cora_ml-gin:
  fixed:
    data.dataset: cora_ml
    model.model_type: gin
    training.learning_rate: 0.003
    model.hidden_sizes: [64]


cora_ml-sage:
  fixed:
    data.dataset: cora_ml
    model.model_type: sage
    training.learning_rate: 0.003
    model.hidden_sizes: [64]


cora_ml-mlp:
  fixed:
    data.dataset: cora_ml
    model.model_type: mlp
    training.learning_rate: 0.001
    model.hidden_sizes: [32]


cora_ml-appnp:
  fixed:
    data.dataset: cora_ml
    model.model_type: appnp
    training.learning_rate: 0.003
    model.hidden_sizes: [64, 64]
    model.teleportation_probability: 0.1
    model.diffusion_iterations: 10


citeseer-gat:
  fixed:
    data.dataset: citeseer
   