In [84]:
import torch
from torch import nn
from torch_geometric.nn import GCNConv, GATConv, ChebConv, SAGEConv
from torch.nn import Linear
import torch.nn.functional as F
from GNNNestedCVEvaluation import GNNNestedCVEvaluation
from torch_geometric.datasets import Planetoid
import torch_geometric.transforms as T
from torch_geometric.utils import add_self_loops
from hyperopt import hp
import numpy as np
from tqdm.notebook import tqdm

In [98]:
class GCN(nn.Module):
    def __init__(self, in_dim, hidden_dim, out_dim, dropout = .2, normalize = False, add_self_loops = True):
        super(GCN, self).__init__()
        hidden_dim = int(hidden_dim)
        self.conv1 = GCNConv(in_dim, hidden_dim, normalize = normalize, add_self_loops=add_self_loops)
        self.conv2 = GCNConv(hidden_dim, out_dim, normalize = normalize, add_self_loops=add_self_loops)
        self.dropout = nn.Dropout(p=dropout)
        
    def forward(self, x, edge_index):
        x = self.dropout(x)
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = self.dropout(x)
        x = self.conv2(x, edge_index)
        return x

In [111]:
class GAT(nn.Module):
    def __init__(self, in_dim, hidden_dim, out_dim, dropout = .2, heads = 1, add_self_loops = True):
        super(GAT, self).__init__()
        hidden_dim = int(hidden_dim)
        heads = int(heads)
        self.conv1 = GATConv(in_dim, hidden_dim, add_self_loops=add_self_loops, concat=True, dropout = dropout, heads = heads)
        self.conv2 = GATConv(hidden_dim*heads, out_dim, add_self_loops=add_self_loops, concat=False, dropout = dropout, heads = heads)
        self.dropout = nn.Dropout(p=dropout)
        
    def forward(self, x, edge_index):
        x = self.dropout(x)
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = self.dropout(x)
        x = self.conv2(x, edge_index)
        return x

In [112]:
class Cheb(nn.Module):
    def __init__(self, in_dim, hidden_dim, out_dim, dropout = .2, K = 2, normalization = "sym"):
        super(Cheb, self).__init__()
        hidden_dim = int(hidden_dim)
        K = int(K)
        self.conv1 = ChebConv(in_dim, hidden_dim, K = K, normalization = normalization)
        self.conv2 = ChebConv(hidden_dim, out_dim, K = K, normalization = normalization)
        self.dropout = nn.Dropout(p=dropout)
        
    def forward(self, x, edge_index):
        x = self.dropout(x)
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = self.dropout(x)
        x = self.conv2(x, edge_index)
        return x

In [132]:
class SAGE(nn.Module):
    def __init__(self, in_dim, hidden_dim, out_dim, dropout = .2, normalize = False, project = True, root_weight = True):
        super(SAGE, self).__init__()
        hidden_dim = int(hidden_dim)
        self.conv1 = SAGEConv(in_dim, hidden_dim, normalize = normalize, project = project, root_weight = root_weight)
        self.conv2 = SAGEConv(hidden_dim, out_dim, normalize = normalize, project = project, root_weight = root_weight)
        self.dropout = nn.Dropout(p=dropout)
        
    def forward(self, x, edge_index):
        x = self.dropout(x)
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = self.dropout(x)
        x = self.conv2(x, edge_index)
        return x

In [133]:
dataset_name = 'Cora'
split = "public"
dataset = Planetoid(root='data/', name=dataset_name, split=split)
dataset.transform = T.NormalizeFeatures()

In [134]:
class GNNSpace():
    def __init__(self, dataset):
        self.hidden_dim_limits = (8, 1024)
        self.dropout_limits = (0.0, 0.8)
        self.weight_decay_limits = (1e-5, 1e-2)
        self.lr_limits = (1e-4, 1e-1)
        self.out_dim = [dataset.num_classes]
        self.gnn_space = None
        self.initialize_space()

    def initialize_space(self):
        gnn_choices = {
            'out_dim': self.out_dim
        }
         
        self.gnn_space = {
            **{key: hp.choice(key, value) for key, value in gnn_choices.items()},
            'lr': hp.loguniform('lr',np.log(self.lr_limits[0]), np.log(self.lr_limits[1])),
            'weight_decay': hp.loguniform('weight_decay',np.log(self.weight_decay_limits[0]), np.log(self.weight_decay_limits[1])),
            'dropout': hp.uniform('dropout', self.dropout_limits[0], self.dropout_limits[1]),
            'hidden_dim': hp.qloguniform('hidden_dim', low=np.log(self.hidden_dim_limits[0]), high=np.log(self.hidden_dim_limits[1]), q=16)
        }
        
    def add_choice(self, key, items):
        self.gnn_space[key] = hp.choice(key, items)
        
    def add_uniform(self, key, limits: tuple):
        self.gnn_space[key] = hp.uniform(key, limits[0], limits[1])
        
    def add_loguniform(self, key, limits: tuple):
        self.gnn_space[key] = hp.loguniform(key, np.log(limits[0]), np.log(limits[1]))
        
    def add_qloguniform(self, key, limits, q):
        self.gnn_space[key] = hp.qloguniform(key, low=np.log(limits[0]), high=np.log(limits[1]), q=q)

class GCNSpace(GNNSpace):
    def __init__(self, dataset):
        super().__init__(dataset)

    def get_space(self):
        self.add_choice('normalize', [True])
        self.add_choice('add_self_loops', [True, False])
        return self.gnn_space    

class GATSpace(GNNSpace):
    def __init__(self, dataset):
        super().__init__(dataset)

    def get_space(self):
        self.add_qloguniform('heads', (1, 8), 2)
        self.add_choice('add_self_loops', [True, False])
        return self.gnn_space    

class ChebSpace(GNNSpace):
    def __init__(self, dataset):
        super().__init__(dataset)

    def get_space(self):
        self.add_qloguniform('K', (1, 4), 2)
        self.add_choice('normalization', ["sym", "rw", None])
        return self.gnn_space    

class SAGESpace(GNNSpace):
    def __init__(self, dataset):
        super().__init__(dataset)

    def get_space(self):
        self.add_choice('normalize', [True, False])
        self.add_choice('project', [True, False])
        self.add_choice('root_weight', [True, False])
        return self.gnn_space   

In [135]:
data = dataset[0]

In [136]:
device = torch.device("cuda:0")

In [137]:


out_dim = [dataset.num_classes]
normalize = [True]
add_self_loops = [True, False]

gnn_choices = {
    'out_dim': out_dim, 
    'normalize': normalize, 
    'add_self_loops': add_self_loops, 
}
 
gnn_space = {
    **{key: hp.choice(key, value) for key, value in gnn_choices.items()},
    'lr': hp.loguniform('lr',np.log(1e-4), np.log(1e-1)),
    'weight_decay': hp.loguniform('weight_decay',np.log(1e-5), np.log(1e-2)),
    'dropout': hp.uniform('dropout', 0, .8),
    'hidden_dim': hp.qloguniform('hidden_dim', low=np.log(8), high=np.log(1024), q=16)
}

In [138]:
gcn_space = GCNSpace(dataset)
gat_space = GATSpace(dataset)
cheb_space = ChebSpace(dataset)
sage_space = SAGESpace(dataset)

In [139]:
gnns = [GCN, GAT, Cheb, SAGE]
gnn_spaces = [gcn_space.get_space(), gat_space.get_space(), cheb_space.get_space(), sage_space.get_space()]

In [140]:
score_store = {}
param_store = {}

In [None]:
for i, space in tqdm(enumerate(gnn_spaces)):
    if i <= 2: continue
    gnn_nestedCV_evaluation = GNNNestedCVEvaluation(device, gnns[i],data)
    gnn_nestedCV_evaluation.nested_cross_validate(3, 3, space, 5) #len(gcn_space.get_space().keys())*20
    score_store[gnns[i].__name__] = gnn_nestedCV_evaluation.nested_transd_cv.outer_scores
    param_store[gnns[i].__name__] = gnn_nestedCV_evaluation.nested_transd_cv.best_params_per_fold

0it [00:00, ?it/s]

0it [00:00, ?it/s]

  _torch_pytree._register_pytree_node(


In [71]:
gnn_nestedCV_evaluation.nested_cross_validate(3, 3, gcn_space.get_space(), len(gcn_space.get_space().keys())*20)

0it [00:00, ?it/s]

  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(
Total Trials: 2: 2 succeeded, 0 failed, 0 cancelled.                            


KeyboardInterrupt: 

In [45]:
gnn_nestedCV_evaluation.nested_transd_cv.outer_scores

array([0.8704319 , 0.89036548, 0.87028825])

In [46]:
gnn_nestedCV_evaluation.nested_transd_cv.outer_scores.mean(), gnn_nestedCV_evaluation.nested_transd_cv.outer_scores.std()

(0.8770285447438558, 0.009430820667344367)

In [48]:
gnn_nestedCV_evaluation.nested_transd_cv.best_params_per_fold

[{'add_self_loops': True,
  'dropout': 0.4033649329996741,
  'hidden_dim': 1024.0,
  'normalize': True,
  'out_dim': 7},
 {'add_self_loops': True,
  'dropout': 0.07125875672145196,
  'hidden_dim': 1024.0,
  'normalize': True,
  'out_dim': 7},
 {'add_self_loops': True,
  'dropout': 0.2809695004157676,
  'hidden_dim': 544.0,
  'normalize': True,
  'out_dim': 7}]