In [1]:
%load_ext autoreload
%autoreload 2

In [62]:
import os

while 'notebooks' in os.getcwd():
    os.chdir('..')

import pandas as pd
import numpy as np
from sklearn.model_selection import ParameterGrid

from src.train.structural_omega.graph_sage import StructuralOmegaGraphSageCosSim
from src.train.structural_omega.gcn import StructuralOmegaGCNCosSim
from src.train.structural_omega.mlp import StructuralOmegaMLP
from src.train.structural_omega.gat import StructuralOmegaGATCosSim
from src.train.positional_omega.graph_sage import PositionalOmegaGraphSageCosSim
from src.train.positional_omega.node2vec import PositionalOmegaNode2Vec

In [55]:
model_parameters = [
    {
        'model': StructuralOmegaGraphSageCosSim,
        'parameter_range':{
            'n_layers_graph_sage': list(range(1, 4))
        }
    },
    {
        'model': StructuralOmegaGCNCosSim,
        'parameter_range':{
            'n_layers_gcn': list(range(1, 4))
        }
    },
    {
        'model': StructuralOmegaMLP,
        'parameter_range':{
            'n_layers': list(range(1, 6))
        }
    },
    {
        'model': StructuralOmegaGATCosSim,
        'parameter_range':{
            'n_layers_gat': list(range(1, 4))
        }
    },
    {
        'model': StructuralOmegaMLP,
        'parameter_range':{
            'n_layers': list(range(1, 4))
        }
    },
    {
        'model': PositionalOmegaGraphSageCosSim,
        'parameter_range':{
            'n_layers': list(range(1, 4))
        }
    },
    {
        'model': PositionalOmegaNode2Vec,
        'parameter_range':{
            'p': np.logspace(-1, 1, 5).round(2).tolist(),
            'q': np.logspace(-1, 1, 5).round(2).tolist()
        }
    }
]

In [56]:
model = PositionalOmegaGraphSageCosSim

In [60]:
model.__name__

'PositionalOmegaGraphSageCosSim'

In [84]:
model_series_list = []
for dataset in ['ogbn-arxiv', 'cora']:
    for model_dict in model_parameters:
        model = model_dict['model']
        model_name = model.__name__
        for params in ParameterGrid(model_dict['parameter_range']):
            metrics = model.read_metrics(dataset, **params)
            aucs_list = []
            for run in metrics['run'].unique():
                sub_df = metrics.query(f'run == {run}')
                idxmax = sub_df['auc_val'].idxmax()
                aucs = sub_df.loc[idxmax, ['auc_train', 'auc_val', 'auc_test']]
                aucs_list.append(aucs.rename(run))

            aucs_df = pd.concat(aucs_list, axis=1).rename_axis(columns='run').T
            aucs_mean = aucs_df.mean()
            aucs_std = aucs_df.std()

            model_series_list.append(pd.Series({
                'dataset': dataset,
                'model_name': model_name,
                'params': params,
                'mean_auc_train': aucs_mean['auc_train'],
                'std_auc_train': aucs_std['auc_train'],
                'mean_auc_val': aucs_mean['auc_val'],
                'std_auc_val': aucs_std['auc_val'],
                'mean_auc_test': aucs_mean['auc_test'],
                'std_auc_test': aucs_std['auc_test'],
            }))

summary_df = pd.concat(model_series_list, axis=1).T

In [92]:
summary_df

Unnamed: 0,dataset,model_name,params,mean_auc_train,std_auc_train,mean_auc_val,std_auc_val,mean_auc_test,std_auc_test
0,ogbn-arxiv,StructuralOmegaGraphSageCosSim,{'n_layers_graph_sage': 1},0.833655,0.000578,0.803573,0.000166,0.774845,0.000308
1,ogbn-arxiv,StructuralOmegaGraphSageCosSim,{'n_layers_graph_sage': 2},0.876891,0.005401,0.82857,0.001862,0.79265,0.002747
2,ogbn-arxiv,StructuralOmegaGraphSageCosSim,{'n_layers_graph_sage': 3},0.890174,0.002311,0.848632,0.001677,0.817013,0.001488
3,ogbn-arxiv,StructuralOmegaGCNCosSim,{'n_layers_gcn': 1},0.785775,0.00121,0.781448,0.000395,0.760862,0.000787
4,ogbn-arxiv,StructuralOmegaGCNCosSim,{'n_layers_gcn': 2},0.853997,0.002835,0.836976,0.001716,0.81015,0.002574
...,...,...,...,...,...,...,...,...,...
85,cora,PositionalOmegaNode2Vec,"{'p': 10.0, 'q': 0.1}",0.553903,0.039018,0.565294,0.012467,0.569602,0.013505
86,cora,PositionalOmegaNode2Vec,"{'p': 10.0, 'q': 0.32}",0.561533,0.027831,0.567862,0.036673,0.556509,0.021694
87,cora,PositionalOmegaNode2Vec,"{'p': 10.0, 'q': 1.0}",0.557592,0.02223,0.562054,0.023377,0.555846,0.016991
88,cora,PositionalOmegaNode2Vec,"{'p': 10.0, 'q': 3.16}",0.545672,0.037902,0.581606,0.030513,0.564161,0.004561


In [99]:
summary_df['mean_auc_val'].astype(float).idxmax()

48

In [113]:
def get_max(model_df):
    idxmax = model_df['mean_auc_val'].astype(float).idxmax()
    return model_df.loc[idxmax].drop(index=['dataset', 'model_name', 'params']).astype(float)


summary_df.groupby(['dataset', 'model_name'], group_keys=False).apply(
    get_max).style.background_gradient(axis=0).format(lambda x: f'{x:.3f}')


Unnamed: 0_level_0,Unnamed: 1_level_0,mean_auc_train,std_auc_train,mean_auc_val,std_auc_val,mean_auc_test,std_auc_test
dataset,model_name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
cora,PositionalOmegaGraphSageCosSim,0.713,0.008,0.791,0.005,0.73,0.005
cora,PositionalOmegaNode2Vec,0.617,0.011,0.635,0.012,0.596,0.011
cora,StructuralOmegaGATCosSim,0.945,0.009,0.853,0.012,0.789,0.015
cora,StructuralOmegaGCNCosSim,0.995,0.0,0.878,0.002,0.819,0.002
cora,StructuralOmegaGraphSageCosSim,1.0,0.0,0.871,0.003,0.822,0.002
cora,StructuralOmegaMLP,0.926,0.002,0.711,0.001,0.685,0.0
ogbn-arxiv,PositionalOmegaGraphSageCosSim,0.675,0.003,0.698,0.004,0.69,0.004
ogbn-arxiv,PositionalOmegaNode2Vec,0.622,0.0,0.638,0.002,0.634,0.001
ogbn-arxiv,StructuralOmegaGATCosSim,0.866,0.002,0.849,0.002,0.82,0.003
ogbn-arxiv,StructuralOmegaGCNCosSim,0.868,0.002,0.847,0.001,0.823,0.002
