# Tutorial 7 : Neural Process Graphs

Last Update : 28 July 2019

**Aim**: 


In [1]:
N_THREADS = 12
# Nota Bene : notebooks don't deallocate GPU memory
IS_FORCE_CPU = True # can also be set in the trainer

## Environment

In [2]:
cd ..

/conv


In [3]:
%autosave 600
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

# CENTER PLOTS
from IPython.core.display import HTML
display(HTML(""" <style> .output_png {display: table-cell; text-align: center; margin:auto; }
.prompt display:none;}  </style>"""))

import os
if IS_FORCE_CPU:
    os.environ['CUDA_VISIBLE_DEVICES'] = ""
    
import sys
sys.path.append("notebooks")

import numpy as np
import matplotlib.pyplot as plt
import torch
torch.set_num_threads(N_THREADS)

Autosaving every 600 seconds


# Dataset 

Cora, Citeseer, Pubmed
PROTEINS, enzymes


In [4]:
from torch_geometric.datasets import TUDataset, Planetoid, PPI, QM7b, ModelNet
from sklearn.preprocessing import StandardScaler

from torch_geometric.datasets import TUDataset, Planetoid, PPI, QM7b, ModelNet
from sklearn.preprocessing import StandardScaler
from utils.data.helpers import make_ssl_dataset_
from copy import deepcopy

In [5]:
from random import shuffle

datasets=dict(enzymes = TUDataset(root='data/ENZYMES', name='ENZYMES', use_node_attr=True, ),
             proteins = TUDataset(root='data/PROTEINS_full', name='PROTEINS_full', use_node_attr=True),
             synthie = TUDataset(root='data/Synthie', name='Synthie', use_node_attr=True))

data_specific_kwargs = {k:dict(y_dim=d.num_node_features,
                               t_dim=d.num_classes
                              ) 
                        for k,d in datasets.items()}

def train_test_split(d, transform=lambda x : StandardScaler().fit_transform(x)):
    if transform is not None:
        d.data.x = torch.from_numpy(transform(d.data.x.numpy()))
    d = d.shuffle()
    return d[:-len(d) // 10], d[-len(d) // 10:]

from skssl.utils.helpers import cont_tuple_to_tuple_cont

def ssl_graph(train_test, label_perc=0.1, is_add_test=True, is_augment=True):
    train, test = train_test
    make_ssl_dataset_(train, label_perc, is_graph=True)
    list_train, list_test = list(train), list(test)
    
    if is_add_test:
        test = deepcopy(test)
        test.data.y = torch.ones_like(test.data.y) * -1
        list_train += list(test)
        
    if is_augment:
        indcs_labels = (train.data.y != -1)
        factor = int((1 - label_perc)/label_perc)-1
        labeled_data = [d for d,i in zip(list_train, indcs_labels) if i]
        list_train += labeled_data * factor
        
    return list_train, list_test

In [6]:
from skssl.transformers.neuralproc.datasplit import GridCntxtTrgtGetter, RandomMasker, no_masker, half_masker
from utils.data.tsdata import get_timeseries_dataset, SparseMultiTimeSeriesDataset

get_cntxt_trgt_test = GridCntxtTrgtGetter(context_masker=RandomMasker(min_nnz=0.01, max_nnz=0.50),
                                         target_masker=no_masker,
                                         is_add_cntxts_to_trgts=False, is_stratify=True)  # don't context points to tagrtes

get_cntxt_trgt_feat = GridCntxtTrgtGetter(context_masker=no_masker,
                                     target_masker=no_masker,
                                     is_add_cntxts_to_trgts=False)  # don't context points to tagrtes

get_cntxt_trgt = GridCntxtTrgtGetter(context_masker=RandomMasker(min_nnz=0.01, max_nnz=0.99),
                                 target_masker=RandomMasker(min_nnz=0.50, max_nnz=0.99),
                                 is_add_cntxts_to_trgts=False, is_stratify=True)  # don't context points to tagrtes

import torch
import skorch
from torch_geometric.data import Batch

def cntxt_trgt_collate(get_cntxt_trgt, is_repeat_batch=False):
    def mycollate(data_list):
        
        if is_repeat_batch:
            data_list = data_list + data_list
            
        data = Batch.from_data_list(data_list, [])
        edge_attr = torch.ones_like(data.edge_index[0], dtype=torch.float) if data.edge_attr is None else data.edge_attr
        
        X, mask_context, mask_target = get_cntxt_trgt(data.x.t().unsqueeze(0), None, is_grided=True, 
                                                      stratify=data.batch)
        data.x = X.squeeze(0).t()
            
        
        # Can't pass a Dataset directly, since it expects tensors. 
        # Use dict of tensors instead. Also, use torch.sparse for 
        # adjacency matrix to pass skorch's same-dimension check
        return {
            "X":{'x': data.x,
            'adj': torch.sparse.FloatTensor(data.edge_index, 
                                            edge_attr, 
                                            size=[data.num_nodes, data.num_nodes], 
                                            device=data.x.device),
            'batch': data.batch},
            'mask_context':mask_context.squeeze(0),
            'mask_target':mask_target.squeeze(0),
            
        }, data.y
    
    return mycollate
        
class SkorchDataset(skorch.dataset.Dataset):
    def __init__(self, X, y):
        # We need to specify `length` to avoid checks
        super(SkorchDataset, self).__init__(X, y, length=len(X))
    
    def transform(self, X, y):
        return X   # Ignore y, since it is included in X
    

In [7]:
X_DIM = 2  # 2D spatial input 
#Y_DIM = data.shape[0]
N_TARGETS = None#data.n_classes

label_percentages = [0.01, 0.05, 0.1, 0.3, 0.5, 1]

# Model

In [8]:
from skssl.transformers import GraphConvNeuralProcess, GraphNeuralProcessLoss
from skssl.predefined import GCN, UnetGCN, GAT
from skssl.transformers.neuralproc.datasplit import precomputed_cntxt_trgt_split
from functools import partial
from torch_geometric.nn import GCNConv, GINConv, global_mean_pool
from torch_geometric.nn import GraphConv, TopKPooling
from torch_geometric.nn import global_mean_pool as gap, global_max_pool as gmp
import torch.nn as nn
import torch.nn.functional as F
from types import SimpleNamespace

models = {}

class Topk(torch.nn.Module):
    def __init__(self, y_dim, t_dim):
        super().__init__()

        self.conv1 = GraphConv(y_dim, 128)
        self.pool1 = TopKPooling(128, ratio=0.8)
        self.conv2 = GraphConv(128, 128)
        self.pool2 = TopKPooling(128, ratio=0.8)
        self.conv3 = GraphConv(128, 128)
        self.pool3 = TopKPooling(128, ratio=0.8)

        self.lin1 = torch.nn.Linear(256, 128)
        self.lin2 = torch.nn.Linear(128, 64)
        self.lin3 = torch.nn.Linear(64, t_dim)

    def forward(self, x, adj, batch):
        edge_index = adj._indices()

        x = F.relu(self.conv1(x, edge_index))
        x, edge_index, _, batch, _ = self.pool1(x, edge_index, None, batch)
        x1 = torch.cat([gmp(x, batch), gap(x, batch)], dim=1)

        x = F.relu(self.conv2(x, edge_index))
        x, edge_index, _, batch, _ = self.pool2(x, edge_index, None, batch)
        x2 = torch.cat([gmp(x, batch), gap(x, batch)], dim=1)

        x = F.relu(self.conv3(x, edge_index))
        x, edge_index, _, batch, _ = self.pool3(x, edge_index, None, batch)
        x3 = torch.cat([gmp(x, batch), gap(x, batch)], dim=1)

        x = x1 + x2 + x3

        x = F.relu(self.lin1(x))
        x = F.dropout(x, p=0.5, training=self.training)

        return F.relu(self.lin2(x))


class GATSupervised(torch.nn.Module):
    def __init__(self, y_dim, t_dim, dim=8, heads=8, dropout=0.6, n_layers=2):
        super().__init__()
        self.gat = GAT(y_dim, out_channels=32, dim=dim, heads=heads, dropout=dropout, n_layers=n_layers)
        self.lin = nn.Linear(32, t_dim)
        
    def forward(self, x, adj, batch):
        x = self.gat(SimpleNamespace(x=x, edge_index=adj._indices(), batch=batch))
        x = global_mean_pool(x.x, x.batch)
        return self.lin(x) 
    
class GIN0(torch.nn.Module):
    def __init__(self, y_dim, t_dim, num_layers=5, hidden=32):
        super(GIN0, self).__init__()
        self.conv1 = GINConv(
            nn.Sequential(
            nn.Linear(y_dim, hidden),
                nn.ReLU(),
                nn.Linear(hidden, hidden),
                nn.ReLU(),
                nn.BatchNorm1d(hidden),
            ),
            train_eps=False)
        self.convs = torch.nn.ModuleList()
        for i in range(num_layers - 1):
            self.convs.append(
                GINConv(
                    nn.Sequential(
                        nn.Linear(hidden, hidden),
                        nn.ReLU(),
                        nn.Linear(hidden, hidden),
                        nn.ReLU(),
                        nn.BatchNorm1d(hidden),
                    ),
                    train_eps=False))
        self.lin1 = torch.nn.Linear(num_layers * hidden, hidden)
        self.lin2 = nn.Linear(hidden, t_dim)

    def reset_parameters(self):
        self.conv1.reset_parameters()
        for conv in self.convs:
            conv.reset_parameters()
        self.lin1.reset_parameters()
        self.lin2.reset_parameters()

    def forward(self, x, adj, batch):
        edge_index = adj._indices()
        x = self.conv1(x, edge_index)
        xs = [x]
        for conv in self.convs:
            x = conv(x, edge_index)
            xs += [x]
        x = global_mean_pool(torch.cat(xs, dim=1), batch)
        x = F.relu(self.lin1(x))
        x = F.dropout(x, p=0.5, training=self.training)
        return self.lin2(x)

    def __repr__(self):
        return self.__class__.__name__

# initialize one model for each dataset
models["supervised_gat_large"] = lambda y_dim, t_dim : GATSupervised(y_dim, t_dim, dim=16, n_layers=3)
models["supervised_gin0_large"] = lambda y_dim, t_dim : GIN0(y_dim, t_dim, hidden=64)
models["supervised_topk"] = lambda y_dim, t_dim : Topk(y_dim, t_dim)
#models["supervised_gat_small"] = lambda y_dim, t_dim : GATSupervised(y_dim, t_dim, dim=8, n_layers=2)
#models["supervised_gin0_small"] = lambda y_dim, t_dim : GIN0(y_dim, t_dim, hidden=32)

In [9]:
from utils.helpers import count_parameters
for k,v in models.items():
    print(k, "- N Param:", count_parameters(v(y_dim=20, t_dim=5)))

supervised_gat_large - N Param: 53189
supervised_gin0_large - N Param: 60293
supervised_topk - N Param: 112901


In [10]:
from skssl.transformers.neuralproc.datasplit import GridCntxtTrgtGetter, RandomMasker, no_masker, half_masker
from utils.data.tsdata import get_timeseries_dataset, SparseMultiTimeSeriesDataset
import torch
import skorch
from torch_geometric.data import Batch
class SkorchDataLoader(torch.utils.data.DataLoader):
    def _collate_fn(self, data_list, follow_batch=[]):
        data = Batch.from_data_list(data_list, follow_batch)
        edge_attr = torch.ones_like(data.edge_index[0], dtype=torch.float) if data.edge_attr is None else data.edge_attr
        
        # Can't pass a Dataset directly, since it expects tensors. 
        # Use dict of tensors instead. Also, use torch.sparse for 
        # adjacency matrix to pass skorch's same-dimension check
        return {
            'x': data.x,
            'adj': torch.sparse.FloatTensor(data.edge_index, 
                                            edge_attr, 
                                            size=[data.num_nodes, data.num_nodes], 
                                            device=data.x.device),
            'batch': data.batch
        }, data.y
    
    def __init__(self,
                 dataset,
                 batch_size=1,
                 shuffle=True,
                 follow_batch=[],
                 **kwargs):
        super(SkorchDataLoader, self).__init__(
            dataset,
            batch_size,
            shuffle,
            collate_fn=lambda data_list: self._collate_fn(data_list, follow_batch),
            **kwargs)
        
class SkorchDataset(skorch.dataset.Dataset):
    def __init__(self, X, y):
        # We need to specify `length` to avoid checks
        super(SkorchDataset, self).__init__(X, y, length=len(X))
    
    def transform(self, X, y):
        return X   # Ignore y, since it is included in X

# Training

In [11]:
N_EPOCHS = 100 
BATCH_SIZE = 32
IS_RETRAIN = False # if false load precomputed
chckpnt_dirname="results/notebooks/neural_process_graph/"

from ntbks_helpers import train_models_

In [13]:
import random

data_trainers = {}
data_keys = datasets.keys()

for label_perc in label_percentages[::-1]:
    for run in range(10):
        for name_mod in models.keys():
            for data_name in data_keys:

                datasets=dict(enzymes = TUDataset(root='data/ENZYMES', name='ENZYMES', use_node_attr=True),
                             proteins = TUDataset(root='data/PROTEINS_full', name='PROTEINS_full', use_node_attr=True),
                         synthie = TUDataset(root='data/Synthie', name='Synthie', use_node_attr=True))

                data_specific_kwargs = {k:dict(y_dim=d.num_node_features,
                                               t_dim=d.num_classes
                                              ) 
                                        for k,d in datasets.items()}
                # should store the split
                datasets = {k:ssl_graph(train_test_split(d), label_perc=label_perc, is_add_test=False, is_augment=False) 
                                for k,d in datasets.items()if k == data_name}

                print("label perc test:", np.array([x.y != -1 for x in datasets[data_name][0]]).mean())

                data_trainers.update(train_models_({k:d for k,d in datasets.items()}, 
                                       {k+ "_lab{}%_run{}".format(label_perc, run):m  for k,m in models.items() if name_mod == k},
                                      criterion=partial(nn.CrossEntropyLoss, ignore_index=-1),
                                        data_specific_kwargs=data_specific_kwargs,
                                     patience=15,
                                     chckpnt_dirname=chckpnt_dirname,
                                      max_epochs=N_EPOCHS,
                                      batch_size=BATCH_SIZE,
                                      is_retrain=IS_RETRAIN,
                                      callbacks=[],
                                                   seed=random.randint(0,10000),
                                      iterator_train=SkorchDataLoader,
                                    iterator_valid=SkorchDataLoader,
                                    dataset=SkorchDataset,
                                                   is_monitor_acc=True,
                                      mode="classifier"))

label perc test: 1.0

--- Loading enzymes/supervised_gat_large_lab1%_run0 ---

enzymes/supervised_gat_large_lab1%_run0 best epoch: 27 val_loss: 1.333056089922282
label perc test: 1.0

--- Loading proteins/supervised_gat_large_lab1%_run0 ---

proteins/supervised_gat_large_lab1%_run0 best epoch: 19 val_loss: 0.4450531373051857
label perc test: 1.0

--- Loading synthie/supervised_gat_large_lab1%_run0 ---

synthie/supervised_gat_large_lab1%_run0 best epoch: 11 val_loss: 1.2429730469786275
label perc test: 1.0

--- Loading enzymes/supervised_gin0_large_lab1%_run0 ---

enzymes/supervised_gin0_large_lab1%_run0 best epoch: 11 val_loss: 1.2847857066293016
label perc test: 1.0

--- Loading proteins/supervised_gin0_large_lab1%_run0 ---

proteins/supervised_gin0_large_lab1%_run0 best epoch: 5 val_loss: 0.48654540174728866
label perc test: 1.0

--- Loading synthie/supervised_gin0_large_lab1%_run0 ---

synthie/supervised_gin0_large_lab1%_run0 best epoch: 4 val_loss: 1.2884178859582218
label perc tes

label perc test: 1.0

--- Loading enzymes/supervised_topk_lab1%_run5 ---

enzymes/supervised_topk_lab1%_run5 best epoch: 17 val_loss: 1.277904800459987
label perc test: 1.0

--- Loading proteins/supervised_topk_lab1%_run5 ---

proteins/supervised_topk_lab1%_run5 best epoch: 13 val_loss: 0.4587379825324868
label perc test: 1.0

--- Loading synthie/supervised_topk_lab1%_run5 ---

synthie/supervised_topk_lab1%_run5 best epoch: 38 val_loss: 0.7400394499771238
label perc test: 1.0

--- Loading enzymes/supervised_gat_large_lab1%_run6 ---

enzymes/supervised_gat_large_lab1%_run6 best epoch: 64 val_loss: 0.9403902288475464
label perc test: 1.0

--- Loading proteins/supervised_gat_large_lab1%_run6 ---

proteins/supervised_gat_large_lab1%_run6 best epoch: 11 val_loss: 0.4229430837154388
label perc test: 1.0

--- Loading synthie/supervised_gat_large_lab1%_run6 ---

synthie/supervised_gat_large_lab1%_run6 best epoch: 23 val_loss: 1.3124746322631835
label perc test: 1.0

--- Loading enzymes/supervi

label perc test: 0.5

--- Loading enzymes/supervised_gin0_large_lab0.5%_run1 ---

enzymes/supervised_gin0_large_lab0.5%_run1 best epoch: 11 val_loss: 1.5160312238854312
label perc test: 0.5004995004995005

--- Loading proteins/supervised_gin0_large_lab0.5%_run1 ---

proteins/supervised_gin0_large_lab0.5%_run1 best epoch: 8 val_loss: 0.548702407853377
label perc test: 0.5

--- Loading synthie/supervised_gin0_large_lab0.5%_run1 ---

synthie/supervised_gin0_large_lab0.5%_run1 best epoch: 4 val_loss: 1.2137953472764869
label perc test: 0.5

--- Loading enzymes/supervised_topk_lab0.5%_run1 ---

enzymes/supervised_topk_lab0.5%_run1 best epoch: 12 val_loss: 1.5947247220623877
label perc test: 0.5004995004995005

--- Loading proteins/supervised_topk_lab0.5%_run1 ---

proteins/supervised_topk_lab0.5%_run1 best epoch: 10 val_loss: 0.5481218252885008
label perc test: 0.5

--- Loading synthie/supervised_topk_lab0.5%_run1 ---

synthie/supervised_topk_lab0.5%_run1 best epoch: 12 val_loss: 0.71131205

label perc test: 0.5004995004995005

--- Loading proteins/supervised_topk_lab0.5%_run6 ---

proteins/supervised_topk_lab0.5%_run6 best epoch: 11 val_loss: 0.4831573669109192
label perc test: 0.5

--- Loading synthie/supervised_topk_lab0.5%_run6 ---

synthie/supervised_topk_lab0.5%_run6 best epoch: 25 val_loss: 1.1263355480300055
label perc test: 0.5

--- Loading enzymes/supervised_gat_large_lab0.5%_run7 ---

enzymes/supervised_gat_large_lab0.5%_run7 best epoch: 45 val_loss: 1.4512570258139013
label perc test: 0.5004995004995005

--- Loading proteins/supervised_gat_large_lab0.5%_run7 ---

proteins/supervised_gat_large_lab0.5%_run7 best epoch: 6 val_loss: 0.5688191761947344
label perc test: 0.5

--- Loading synthie/supervised_gat_large_lab0.5%_run7 ---

synthie/supervised_gat_large_lab0.5%_run7 best epoch: 28 val_loss: 1.045873854368452
label perc test: 0.5

--- Loading enzymes/supervised_gin0_large_lab0.5%_run7 ---

enzymes/supervised_gin0_large_lab0.5%_run7 best epoch: 9 val_loss: 1.48

label perc test: 0.3

--- Loading enzymes/supervised_gin0_large_lab0.3%_run2 ---

enzymes/supervised_gin0_large_lab0.3%_run2 best epoch: 15 val_loss: 1.5044539750201031
label perc test: 0.3006993006993007

--- Loading proteins/supervised_gin0_large_lab0.3%_run2 ---

proteins/supervised_gin0_large_lab0.3%_run2 best epoch: 1 val_loss: 0.6535360782675477
label perc test: 0.3

--- Loading synthie/supervised_gin0_large_lab0.3%_run2 ---

synthie/supervised_gin0_large_lab0.3%_run2 best epoch: 3 val_loss: 1.3481911587526427
label perc test: 0.3

--- Loading enzymes/supervised_topk_lab0.3%_run2 ---

enzymes/supervised_topk_lab0.3%_run2 best epoch: 9 val_loss: 1.7507157139871075
label perc test: 0.3006993006993007

--- Loading proteins/supervised_topk_lab0.3%_run2 ---

proteins/supervised_topk_lab0.3%_run2 best epoch: 3 val_loss: 0.5625065614668836
label perc test: 0.3

--- Loading synthie/supervised_topk_lab0.3%_run2 ---

synthie/supervised_topk_lab0.3%_run2 best epoch: 13 val_loss: 0.764397739

label perc test: 0.3006993006993007

--- Loading proteins/supervised_topk_lab0.3%_run7 ---

proteins/supervised_topk_lab0.3%_run7 best epoch: 5 val_loss: 0.5282050612890676
label perc test: 0.3

--- Loading synthie/supervised_topk_lab0.3%_run7 ---

synthie/supervised_topk_lab0.3%_run7 best epoch: 16 val_loss: 1.3999945369605318
label perc test: 0.3

--- Loading enzymes/supervised_gat_large_lab0.3%_run8 ---

enzymes/supervised_gat_large_lab0.3%_run8 best epoch: 8 val_loss: 1.7507590977029373
label perc test: 0.3006993006993007

--- Loading proteins/supervised_gat_large_lab0.3%_run8 ---

proteins/supervised_gat_large_lab0.3%_run8 best epoch: 9 val_loss: 0.49581607814706846
label perc test: 0.3

--- Loading synthie/supervised_gat_large_lab0.3%_run8 ---

synthie/supervised_gat_large_lab0.3%_run8 best epoch: 3 val_loss: 1.3812758019857483
label perc test: 0.3

--- Loading enzymes/supervised_gin0_large_lab0.3%_run8 ---

enzymes/supervised_gin0_large_lab0.3%_run8 best epoch: 13 val_loss: 1.38

label perc test: 0.1

--- Loading enzymes/supervised_gin0_large_lab0.1%_run3 ---

enzymes/supervised_gin0_large_lab0.1%_run3 best epoch: 3 val_loss: 1.7035221311510826
label perc test: 0.1008991008991009

--- Loading proteins/supervised_gin0_large_lab0.1%_run3 ---

proteins/supervised_gin0_large_lab0.1%_run3 best epoch: 3 val_loss: 0.6283579249746307
label perc test: 0.1

--- Loading synthie/supervised_gin0_large_lab0.1%_run3 ---

synthie/supervised_gin0_large_lab0.1%_run3 best epoch: 5 val_loss: 1.3025683792013871
label perc test: 0.1

--- Loading enzymes/supervised_topk_lab0.1%_run3 ---

enzymes/supervised_topk_lab0.1%_run3 best epoch: 4 val_loss: 2.264928987635995
label perc test: 0.1008991008991009

--- Loading proteins/supervised_topk_lab0.1%_run3 ---

proteins/supervised_topk_lab0.1%_run3 best epoch: 5 val_loss: 0.6492432832563546
label perc test: 0.1

--- Loading synthie/supervised_topk_lab0.1%_run3 ---

synthie/supervised_topk_lab0.1%_run3 best epoch: 12 val_loss: 1.37649075431

label perc test: 0.1008991008991009

--- Loading proteins/supervised_topk_lab0.1%_run8 ---

proteins/supervised_topk_lab0.1%_run8 best epoch: 9 val_loss: 0.6466932037859585
label perc test: 0.1

--- Loading synthie/supervised_topk_lab0.1%_run8 ---

synthie/supervised_topk_lab0.1%_run8 best epoch: 14 val_loss: 1.3168847752749762
label perc test: 0.1

--- Loading enzymes/supervised_gat_large_lab0.1%_run9 ---

enzymes/supervised_gat_large_lab0.1%_run9 best epoch: 7 val_loss: 1.7318331646364789
label perc test: 0.1008991008991009

--- Loading proteins/supervised_gat_large_lab0.1%_run9 ---

proteins/supervised_gat_large_lab0.1%_run9 best epoch: 10 val_loss: 0.6644698342877496
label perc test: 0.1

--- Loading synthie/supervised_gat_large_lab0.1%_run9 ---

synthie/supervised_gat_large_lab0.1%_run9 best epoch: 1 val_loss: 1.37472842027509
label perc test: 0.1

--- Loading enzymes/supervised_gin0_large_lab0.1%_run9 ---

enzymes/supervised_gin0_large_lab0.1%_run9 best epoch: 4 val_loss: 1.72701

label perc test: 0.05094905094905095

--- Loading proteins/supervised_gat_large_lab0.05%_run4 ---

proteins/supervised_gat_large_lab0.05%_run4 best epoch: 1 val_loss: 0.6979277401224464
label perc test: 0.05

--- Loading synthie/supervised_gat_large_lab0.05%_run4 ---

synthie/supervised_gat_large_lab0.05%_run4 best epoch: 10 val_loss: 1.3748575323175314
label perc test: 0.05

--- Loading enzymes/supervised_gin0_large_lab0.05%_run4 ---

enzymes/supervised_gin0_large_lab0.05%_run4 best epoch: 3 val_loss: 1.724150002944962
label perc test: 0.05094905094905095

--- Loading proteins/supervised_gin0_large_lab0.05%_run4 ---

proteins/supervised_gin0_large_lab0.05%_run4 best epoch: 1 val_loss: 0.7775081868391112
label perc test: 0.05

--- Loading synthie/supervised_gin0_large_lab0.05%_run4 ---

synthie/supervised_gin0_large_lab0.05%_run4 best epoch: 1 val_loss: 1.3905136016066575
label perc test: 0.05

--- Loading enzymes/supervised_topk_lab0.05%_run4 ---

enzymes/supervised_topk_lab0.05%_run4

label perc test: 0.05094905094905095

--- Loading proteins/supervised_gin0_large_lab0.05%_run9 ---

proteins/supervised_gin0_large_lab0.05%_run9 best epoch: 1 val_loss: 0.6866635380340411
label perc test: 0.05

--- Loading synthie/supervised_gin0_large_lab0.05%_run9 ---

synthie/supervised_gin0_large_lab0.05%_run9 best epoch: 1 val_loss: 1.3878484619291205
label perc test: 0.05

--- Loading enzymes/supervised_topk_lab0.05%_run9 ---

enzymes/supervised_topk_lab0.05%_run9 best epoch: 7 val_loss: 3.265656755541876
label perc test: 0.05094905094905095

--- Loading proteins/supervised_topk_lab0.05%_run9 ---

proteins/supervised_topk_lab0.05%_run9 best epoch: 4 val_loss: 0.916452561337525
label perc test: 0.05

--- Loading synthie/supervised_topk_lab0.05%_run9 ---

synthie/supervised_topk_lab0.05%_run9 best epoch: 36 val_loss: 2.092158110286441
label perc test: 0.011111111111111112

--- Loading enzymes/supervised_gat_large_lab0.01%_run0 ---

enzymes/supervised_gat_large_lab0.01%_run0 best ep

label perc test: 0.011111111111111112

--- Loading enzymes/supervised_topk_lab0.01%_run4 ---

enzymes/supervised_topk_lab0.01%_run4 best epoch: 12 val_loss: 3.814911645391713
label perc test: 0.01098901098901099

--- Loading proteins/supervised_topk_lab0.01%_run4 ---

proteins/supervised_topk_lab0.01%_run4 best epoch: 2 val_loss: 1.0982486040369102
label perc test: 0.011111111111111112

--- Loading synthie/supervised_topk_lab0.01%_run4 ---

synthie/supervised_topk_lab0.01%_run4 best epoch: 8 val_loss: 2.2323575353244993
label perc test: 0.011111111111111112

--- Loading enzymes/supervised_gat_large_lab0.01%_run5 ---

enzymes/supervised_gat_large_lab0.01%_run5 best epoch: 1 val_loss: 1.780623247087464
label perc test: 0.01098901098901099

--- Loading proteins/supervised_gat_large_lab0.01%_run5 ---

proteins/supervised_gat_large_lab0.01%_run5 best epoch: 2 val_loss: 0.6836774850645071
label perc test: 0.011111111111111112

--- Loading synthie/supervised_gat_large_lab0.01%_run5 ---

synth

label perc test: 0.011111111111111112

--- Loading enzymes/supervised_topk_lab0.01%_run9 ---

enzymes/supervised_topk_lab0.01%_run9 best epoch: 6 val_loss: 3.9314929436528407
label perc test: 0.01098901098901099

--- Loading proteins/supervised_topk_lab0.01%_run9 ---

proteins/supervised_topk_lab0.01%_run9 best epoch: 5 val_loss: 1.8696230036746473
label perc test: 0.011111111111111112

--- Loading synthie/supervised_topk_lab0.01%_run9 ---

synthie/supervised_topk_lab0.01%_run9 best epoch: 1 val_loss: 3.7207018215199468


In [14]:
for k,t in data_trainers.items(): 
    for e, h in enumerate(t.history[::-1]):
        if h["valid_acc_best"]:
            print(k, "epoch:", len(t.history)-e, 
                  "val_loss:", h["valid_loss"], 
                 "valid_acc", h["valid_acc"])
            break

enzymes/supervised_gat_large_lab1%_run0 epoch: 27 val_loss: 1.333056089922282 valid_acc 0.5
proteins/supervised_gat_large_lab1%_run0 epoch: 19 val_loss: 0.4450531373051857 valid_acc 0.7410714285714286
synthie/supervised_gat_large_lab1%_run0 epoch: 11 val_loss: 1.2429730469786275 valid_acc 0.5
enzymes/supervised_gin0_large_lab1%_run0 epoch: 11 val_loss: 1.2847857066293016 valid_acc 0.55
proteins/supervised_gin0_large_lab1%_run0 epoch: 5 val_loss: 0.48654540174728866 valid_acc 0.7767857142857143
synthie/supervised_gin0_large_lab1%_run0 epoch: 4 val_loss: 1.2884178859582218 valid_acc 0.425
enzymes/supervised_topk_lab1%_run0 epoch: 20 val_loss: 1.27866411121194 valid_acc 0.5166666666666667
proteins/supervised_topk_lab1%_run0 epoch: 21 val_loss: 0.5020774744207989 valid_acc 0.7589285714285714
synthie/supervised_topk_lab1%_run0 epoch: 16 val_loss: 1.2331990718841552 valid_acc 0.5
enzymes/supervised_gat_large_lab1%_run1 epoch: 20 val_loss: 1.44608872124009 valid_acc 0.45
proteins/supervised_g

proteins/supervised_gin0_large_lab0.3%_run9 epoch: 2 val_loss: 0.6146249752525605 valid_acc 0.6428571428571429
synthie/supervised_gin0_large_lab0.3%_run9 epoch: 4 val_loss: 1.2422228795024042 valid_acc 0.475
enzymes/supervised_topk_lab0.3%_run9 epoch: 8 val_loss: 1.6770923188603282 valid_acc 0.31666666666666665
proteins/supervised_topk_lab0.3%_run9 epoch: 3 val_loss: 0.667897039219408 valid_acc 0.625
synthie/supervised_topk_lab0.3%_run9 epoch: 17 val_loss: 1.1609162023733552 valid_acc 0.675
enzymes/supervised_gat_large_lab0.1%_run0 epoch: 8 val_loss: 1.6965100303943017 valid_acc 0.3333333333333333
proteins/supervised_gat_large_lab0.1%_run0 epoch: 3 val_loss: 0.6540130296152754 valid_acc 0.7232142857142857
synthie/supervised_gat_large_lab0.1%_run0 epoch: 13 val_loss: 1.3411064629727694 valid_acc 0.375
enzymes/supervised_gin0_large_lab0.1%_run0 epoch: 6 val_loss: 1.6869017599861402 valid_acc 0.2833333333333333
proteins/supervised_gin0_large_lab0.1%_run0 epoch: 3 val_loss: 0.6383244732483

In [15]:
import pandas as pd

out = pd.Series({k:v.history[-1]["valid_acc"] for k,v in data_trainers.items()}).reset_index(name="accuracy")
splitted = out["index"].str.split("/", expand = True)
out["data"] = splitted[0]
out["models"] = splitted[1]

splitted2 = out["models"].str.split("_run", expand = True)
out["models"] = splitted2[0]
out["run"] = splitted2[1]

splitted3 = out["models"].str.split("_lab", expand = True)
out["models"] = splitted3[0]
out["lab"] = splitted3[1]


out.drop(columns =["index"], inplace = True) 

out.groupby(["data", "models", "lab"]).describe()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,accuracy,accuracy,accuracy,accuracy,accuracy,accuracy,accuracy,accuracy
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,count,mean,std,min,25%,50%,75%,max
data,models,lab,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
enzymes,supervised_gat_large,0.01%,10.0,0.258333,0.032632,0.2,0.2375,0.266667,0.283333,0.3
enzymes,supervised_gat_large,0.05%,10.0,0.311667,0.055583,0.233333,0.283333,0.3,0.341667,0.433333
enzymes,supervised_gat_large,0.1%,10.0,0.363333,0.042889,0.283333,0.366667,0.366667,0.383333,0.433333
enzymes,supervised_gat_large,0.3%,10.0,0.496667,0.09088,0.35,0.441667,0.5,0.55,0.616667
enzymes,supervised_gat_large,0.5%,10.0,0.505,0.081669,0.316667,0.483333,0.5,0.545833,0.633333
enzymes,supervised_gat_large,1%,10.0,0.615,0.089045,0.45,0.554167,0.633333,0.6875,0.733333
enzymes,supervised_gin0_large,0.01%,10.0,0.291667,0.059447,0.2,0.241667,0.308333,0.341667,0.366667
enzymes,supervised_gin0_large,0.05%,10.0,0.318333,0.060067,0.183333,0.3,0.325,0.345833,0.4
enzymes,supervised_gin0_large,0.1%,10.0,0.4,0.04714,0.333333,0.3625,0.408333,0.429167,0.466667
enzymes,supervised_gin0_large,0.3%,10.0,0.546667,0.05432,0.433333,0.533333,0.541667,0.591667,0.616667
