In [1]:
import torch
from torch import nn
from torch_geometric.nn import GCNConv, GATConv, ChebConv, SAGEConv
from torch.nn import Linear
import torch.nn.functional as F
from GNNNestedCVEvaluationInductive import GNNNestedCVEvaluationInductive
from torch_geometric.datasets import PPI
import torch_geometric.transforms as T
from torch_geometric.utils import add_self_loops
from hyperopt import hp
import numpy as np
from tqdm.notebook import tqdm

  _torch_pytree._register_pytree_node(


In [6]:
class GCN(nn.Module):
    def __init__(self, in_dim, hidden_dim, out_dim = 121, dropout = .2, normalize = False, add_self_loops = True):
        super(GCN, self).__init__()
        hidden_dim = int(hidden_dim)
        self.conv1 = GCNConv(in_dim, hidden_dim, normalize = normalize, add_self_loops=add_self_loops)
        self.conv2 = GCNConv(hidden_dim, hidden_dim, normalize = normalize, add_self_loops=add_self_loops)
        self.conv3 = GCNConv(hidden_dim, out_dim, normalize = normalize, add_self_loops=add_self_loops)
        self.lin1 = Linear(in_dim, hidden_dim)
        self.lin2 = Linear(hidden_dim, hidden_dim)
        self.lin3 = Linear(hidden_dim, out_dim)
        self.dropout = nn.Dropout(p=dropout)
        
    def forward(self, x, edge_index):
        x = self.dropout(x)
        x = self.conv1(x, edge_index) +self.lin1(x)
        x = F.elu(x)
        x = self.dropout(x)
        x = self.conv2(x, edge_index)+self.lin2(x)
        x = F.elu(x)
        x = self.dropout(x)
        x = self.conv3(x, edge_index)+self.lin3(x)
        return x

In [7]:
class GAT(nn.Module):
    def __init__(self, in_dim, hidden_dim, out_dim = 121, dropout = .2, heads = 1, add_self_loops = True):
        super(GAT, self).__init__()
        hidden_dim = int(hidden_dim)
        heads = int(heads)
        self.conv1 = GATConv(in_dim, hidden_dim, add_self_loops=add_self_loops, concat=True, dropout = dropout, heads = heads)
        self.conv2 = GATConv(hidden_dim*heads, hidden_dim, add_self_loops=add_self_loops, concat=True, dropout = dropout, heads = heads)
        self.conv3 = GATConv(hidden_dim*heads, out_dim, add_self_loops=add_self_loops, concat=False, dropout = dropout, heads = heads)
        self.lin1 = Linear(in_dim, hidden_dim*heads)
        self.lin2 = Linear(hidden_dim*heads, hidden_dim*heads)
        self.lin3 = Linear(hidden_dim*heads, out_dim)
        self.dropout = nn.Dropout(p=dropout)
        
    def forward(self, x, edge_index):
        x = self.dropout(x)
        x = self.conv1(x, edge_index) +self.lin1(x)
        x = F.elu(x)
        x = self.dropout(x)
        x = self.conv2(x, edge_index)+self.lin2(x)
        x = F.elu(x)
        x = self.dropout(x)
        x = self.conv3(x, edge_index)+self.lin3(x)
        return x

In [8]:
dataset = PPI(root='data//PPI')
dataset.transform = T.NormalizeFeatures()
dataset.num_classes

121

In [9]:
class GNNSpace():
    def __init__(self, dataset):
        EPS = 1e-6
        # self.hidden_dim_limits = (8, 1024)
        # self.dropout_limits = (0.0, 0.8)
        # self.weight_decay_limits = (1e-5, 1e-2)
        # self.lr_limits = (1e-4, 1e-1)
        # self.out_dim = [dataset.num_classes]
        # self.gnn_space = None
        # self.initialize_space()
        self.hidden_dim_limits = (8, 1024)
        self.dropout_limits = (0.0, 0.8)
        self.weight_decay_limits = (1e-7, 1e-4)
        self.lr_limits = (1e-5, 1e-2)
        self.out_dim = [dataset.num_classes]
        self.gnn_space = None
        self.initialize_space()

    def initialize_space(self):
        gnn_choices = {
            # 'out_dim': self.out_dim
        }
         
        self.gnn_space = {
            **{key: hp.choice(key, value) for key, value in gnn_choices.items()},
            'lr': hp.loguniform('lr',np.log(self.lr_limits[0]), np.log(self.lr_limits[1])),
            'weight_decay': hp.loguniform('weight_decay',np.log(self.weight_decay_limits[0]), np.log(self.weight_decay_limits[1])),
            'dropout': hp.uniform('dropout', self.dropout_limits[0], self.dropout_limits[1]),
            'hidden_dim': hp.qloguniform('hidden_dim', low=np.log(self.hidden_dim_limits[0]), high=np.log(self.hidden_dim_limits[1]), q=16)
        }
        
    def add_choice(self, key, items):
        self.gnn_space[key] = hp.choice(key, items)
        
    def add_uniform(self, key, limits: tuple):
        self.gnn_space[key] = hp.uniform(key, limits[0], limits[1])
        
    def add_loguniform(self, key, limits: tuple):
        self.gnn_space[key] = hp.loguniform(key, np.log(limits[0]), np.log(limits[1]))
        
    def add_qloguniform(self, key, limits, q):
        self.gnn_space[key] = hp.qloguniform(key, low=np.log(limits[0]), high=np.log(limits[1]), q=q)

class GCNSpace(GNNSpace):
    def __init__(self, dataset):
        super().__init__(dataset)

    def get_space(self):
        self.add_choice('normalize', [True])
        self.add_choice('add_self_loops', [True]) #False
        return self.gnn_space    

class GATSpace(GNNSpace):
    def __init__(self, dataset):
        super().__init__(dataset)

    def get_space(self):
        self.add_qloguniform('heads', (1, 4), 2)
        self.add_choice('add_self_loops', [True, False])
        return self.gnn_space    

class ChebSpace(GNNSpace):
    def __init__(self, dataset):
        super().__init__(dataset)

    def get_space(self):
        self.add_qloguniform('K', (1, 4), 2)
        self.add_choice('normalization', ["sym", "rw", None])
        return self.gnn_space    

class SAGESpace(GNNSpace):
    def __init__(self, dataset):
        super().__init__(dataset)

    def get_space(self):
        self.add_choice('normalize', [True, False])
        self.add_choice('project', [True, False])
        self.add_choice('root_weight', [True, False])
        return self.gnn_space   

In [10]:
data = dataset

In [11]:
device = torch.device("cuda:1")

In [12]:
dataset.num_classes

121

In [13]:
gcn_space = GCNSpace(dataset)
gat_space = GATSpace(dataset)
cheb_space = ChebSpace(dataset)
sage_space = SAGESpace(dataset)

In [15]:
gnns = [GAT] #[GCN]
gnn_spaces = [gat_space.get_space()]# [gcn_space.get_space()]
len(gnn_spaces[0])

6

In [16]:
score_store = {}
param_store = {}

In [None]:
for i, space in tqdm(enumerate(gnn_spaces)):
    gnn_nestedCV_evaluation = GNNNestedCVEvaluationInductive(device, gnns[i],data, max_evals= len(space.keys())*20, epochs  = 10000, PATIENCE=10)
    gnn_nestedCV_evaluation.nested_cross_validate(5, 5, space)
    score_store[gnns[i].__name__] = gnn_nestedCV_evaluation.nested_inductive_cv.outer_scores
    param_store[gnns[i].__name__] = gnn_nestedCV_evaluation.nested_inductive_cv.best_params_per_fold
    break

0it [00:00, ?it/s]

0it [00:00, ?it/s]

Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
24/07/14 07:11:53 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
24/07/14 07:11:53 WARN Utils: Service 'SparkUI' could not bind on port 4040. Attempting port 4041.
  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(
  _t

In [None]:
for key in score_store:
    print(f"{key}: {score_store[key].mean()} +- {score_store[key].std()}")

In [15]:
score_store

{'GAT': array([0.8748318 , 0.98842423, 0.93867617, 0.9891719 , 0.88002504,
        0.95937033, 0.92682495, 0.97213443, 0.98409614, 0.96266779,
        0.98732086])}

In [16]:
gnn_nestedCV_evaluation.nested_inductive_cv.best_params_per_fold

[{'add_self_loops': False,
  'dropout': 0.006719214776768539,
  'heads': 2.0,
  'hidden_dim': 1024.0,
  'lr': 0.00030082755927652395,
  'out_dim': 121,
  'weight_decay': 1.0023577958988111e-07},
 {'add_self_loops': True,
  'dropout': 0.0040446304140414014,
  'heads': 2.0,
  'hidden_dim': 1024.0,
  'lr': 0.00030004816157712204,
  'out_dim': 121,
  'weight_decay': 7.948683569716372e-07},
 {'add_self_loops': True,
  'dropout': 0.007010177433444743,
  'heads': 4.0,
  'hidden_dim': 1024.0,
  'lr': 0.00030061368427374344,
  'out_dim': 121,
  'weight_decay': 3.889481293479986e-07},
 {'add_self_loops': False,
  'dropout': 0.0014353616434441174,
  'heads': 4.0,
  'hidden_dim': 1024.0,
  'lr': 0.0003003598653171147,
  'out_dim': 121,
  'weight_decay': 2.2035683104619298e-07},
 {'add_self_loops': True,
  'dropout': 0.0007297050613278022,
  'heads': 2.0,
  'hidden_dim': 1024.0,
  'lr': 0.0003006992910767814,
  'out_dim': 121,
  'weight_decay': 3.366192816483186e-07},
 {'add_self_loops': True,
  'd

In [20]:
gnn_nestedCV_evaluation.nested_inductive_cv.outer_scores.mean(), gnn_nestedCV_evaluation.nested_inductive_cv.outer_scores.std()

(0.9512312413736125, 0.039886034171323484)

In [21]:
gnn_nestedCV_evaluation.nested_inductive_cv.inner_scores.mean(), gnn_nestedCV_evaluation.nested_inductive_cv.inner_scores.std()

(0.945004884409546, 0.03723504496150597)