# Installations and Imports

In [None]:
!pip install pyg-lib torch-scatter torch-sparse torch-cluster torch-spline-conv torch-geometric -f https://data.pyg.org/whl/torch-1.13.0+cu116.html

In [2]:
import torch.nn.functional as F
from torch.nn import Linear, Dropout
from torch_geometric.nn import GCNConv, GATConv, GATv2Conv
import torch
from torch_geometric.datasets import Planetoid
from torch_geometric.utils import degree
from collections import Counter
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
import numpy as np
import torch_geometric.datasets as datasets
import torch_geometric
import torch.nn as nn
from typing import Optional, Tuple, Union

import torch
import torch.nn.functional as F
from torch import Tensor
from torch.nn import Parameter
from torch_sparse import SparseTensor, set_diag

from torch_geometric.nn.conv import MessagePassing
from torch_geometric.nn.dense.linear import Linear
from torch_geometric.nn.inits import glorot, zeros
from torch_geometric.typing import Adj, OptTensor, OptPairTensor, PairTensor, Size
from torch_geometric.utils import add_self_loops, remove_self_loops, softmax
from torch_geometric.utils import is_sparse, is_torch_sparse_tensor

import time

# Get GPU Specification
Will be usefull to report the time on current machine

In [20]:
!nvidia-smi

Thu Dec 29 01:24:23 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   40C    P8     9W /  70W |      3MiB / 15109MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

# Datasets Preparation

In [3]:
def get_dataset_info(dataset):
  # Print information about the dataset
  print(dataset.data)
  print(f'Number of graphs: {len(dataset)}')
  print(f'Number of nodes: {dataset[0].x.shape[0]}')
  print(f'Number of features: {dataset.num_features}')
  print(f'Number of classes: {dataset.num_classes}')
  print(f'Has isolated nodes: {dataset[0].has_isolated_nodes()}')

In [None]:
# Import dataset from PyTorch Geometric
dataset_cite_seer = Planetoid(root="./", name="CiteSeer")

print("CiteSeer dataset:")
get_dataset_info(dataset_cite_seer)
print()

dataset_cora = Planetoid(
    root="./",
    name='Cora',
    split="public",
    transform=torch_geometric.transforms.GCNNorm()
  )

print("Cora dataset:")
get_dataset_info(dataset_cora)
print()

dataset_pub_med = Planetoid(root="./", name="PubMed")

print("PubMed dataset:")
get_dataset_info(dataset_pub_med)
print()

In [5]:
data_cite_seer = dataset_cite_seer[0]
data_cora = dataset_cora[0]
data_pub_med = dataset_pub_med[0]

# Architectures

## GCN: Graph Convolutional Network

In [6]:
class GCN(torch.nn.Module):
  """Graph Convolutional Network"""
  def __init__(self, dim_in, dim_h, dim_out):
    super().__init__()
    self.gcn1 = GCNConv(dim_in, dim_h)
    self.gcn2 = GCNConv(dim_h, dim_out)

  def forward(self, x, edge_index):
    h = self.gcn1(x, edge_index)
    h = torch.relu(h)
    h = F.dropout(h, p=0.5, training=self.training)
    h = self.gcn2(h, edge_index)
    return h, F.log_softmax(h, dim=1)

## Custom GCN Implementation

In [7]:
class GCNConvMy(nn.Module):
    """Custom GCN Layer Implementation"""
    def __init__(self, input_dim, output_dim):
      super(GCNConvMy, self).__init__()

      self.W = nn.Parameter(torch.FloatTensor(input_dim, output_dim))
      torch.nn.init.xavier_uniform(self.W)

    def forward(self, x, adj_matrix):
      neighs = adj_matrix.sum(0)
      res = torch.mm(torch.mm(adj_matrix, x), self.W)
      res = torch.div(res.T, neighs).T

      return res

In [8]:
class GCNMy(torch.nn.Module):
  """Graph Convolutional Network With Custom Layer Implementation"""
  def __init__(self, dim_in, dim_h, dim_out):
    super().__init__()
    self.gcn1 = GCNConvMy(dim_in, dim_h)
    self.gcn2 = GCNConvMy(dim_h, dim_out)
   
  def forward(self, x, adj_matrix):
    h = self.gcn1(x, adj_matrix)
    h = torch.relu(h)
    h = F.dropout(h, p=0.5, training=self.training)
    h = self.gcn2(h, adj_matrix)
    return h, F.log_softmax(h, dim=1)

## GAT: Graph Attention Network

In [9]:
class GAT(torch.nn.Module):
  """Graph Attention Network"""
  def __init__(self, dim_in, dim_h, dim_out, heads=8):
    super().__init__()
    self.gat1 = GATConv(dim_in, dim_h, heads=heads)
    self.gat2 = GATConv(dim_h*heads, dim_out, heads=1)

  def forward(self, x, edge_index):
    h = self.gat1(x, edge_index)
    h = F.elu(h)
    h = F.dropout(h, p=0.6, training=self.training)
    h = self.gat2(h, edge_index)
    return h, F.log_softmax(h, dim=1)

## GATv2: Graph Attention Network

In [10]:
class GATv2(torch.nn.Module):
  """Graph Attention Network v2"""
  def __init__(self, dim_in, dim_h, dim_out, heads=8):
    super().__init__()
    self.gat1 = GATv2Conv(dim_in, dim_h, heads=heads)
    self.gat2 = GATv2Conv(dim_h*heads, dim_out, heads=1)

  def forward(self, x, edge_index):
    h = self.gat1(x, edge_index)
    h = F.elu(h)
    h = F.dropout(h, p=0.6, training=self.training)
    h = self.gat2(h, edge_index)
    return h, F.log_softmax(h, dim=1)

## GCN + GAT: Use different layers in the network

In [24]:
class GCN_GAT(torch.nn.Module):
  def __init__(self, dim_in, dim_h, dim_out):
    super().__init__()
    self.gcn1 = GCNConv(dim_in, dim_h)
    self.gat2 = GATConv(dim_h, dim_out, heads=1)

  def forward(self, x, edge_index):
    h = self.gcn1(x, edge_index)
    h = torch.relu(h)
    h = F.dropout(h, p=0.5, training=self.training)
    h = self.gat2(h, edge_index)
    return h, F.log_softmax(h, dim=1)

## GAT + GCN: Use different layers in the network

In [28]:
class GAT_GCN(torch.nn.Module):
  def __init__(self, dim_in, dim_h, dim_out, heads=8):
    super().__init__()
    self.gat1 = GATConv(dim_in, dim_h, heads=heads)
    self.gcn2 = GCNConv(dim_h*heads, dim_out)

  def forward(self, x, edge_index):
    h = self.gat1(x, edge_index)
    h = F.elu(h)
    h = F.dropout(h, p=0.6, training=self.training)
    h = self.gcn2(h, edge_index)
    return h, F.log_softmax(h, dim=1)

## Custom L-CAT Implementation

In [64]:
class LCATLayerMy(MessagePassing):
    def __init__(
        self,
        in_channels: Union[int, Tuple[int, int]],
        out_channels: int,
        heads: int = 1,
        concat: bool = True,
        negative_slope: float = 0.2,
        dropout: float = 0.0,
        add_self_loops: bool = True,
        edge_dim: Optional[int] = None,
        fill_value: Union[float, Tensor, str] = 'mean',
        bias: bool = True,
        lambda1: Optional[Parameter] = None,
        lambda2: Optional[Parameter] = None,
        scaling_coef: Optional[int] = None,
        add_lambda_constraint: bool = True,
        hardcoded_lambdas: Optional[Tuple[int, int]] = None,
        **kwargs
    ):
        kwargs.setdefault('aggr', 'add')
        super().__init__(node_dim=0, **kwargs)

        self.in_channels = in_channels
        self.out_channels = out_channels
        self.heads = heads
        self.concat = concat
        self.negative_slope = negative_slope
        self.dropout = dropout
        self.add_self_loops = add_self_loops
        self.edge_dim = edge_dim
        self.fill_value = fill_value
        # The learnable parameters lambda1 and lambda2 for L-CAT modelwise
        self.lambda1 = lambda1
        self.lambda2 = lambda2
        self.scaling_coef = scaling_coef if scaling_coef else 1
        if add_lambda_constraint:
          self.constraint = torch.sigmoid
        else:
          self.constraint = None
        self.hardcoded_lambdas = hardcoded_lambdas
        if hardcoded_lambdas:
          self.scaling_coef = 1
        

        if isinstance(in_channels, int):
            self.lin_src = Linear(in_channels, heads * out_channels,
                                  bias=False, weight_initializer='glorot')
            self.lin_dst = self.lin_src
        else:
            self.lin_src = Linear(in_channels[0], heads * out_channels, False,
                                  weight_initializer='glorot')
            self.lin_dst = Linear(in_channels[1], heads * out_channels, False,
                                  weight_initializer='glorot')

        # The learnable parameters to compute attention coefficients:
        self.att_src = Parameter(torch.Tensor(1, heads, out_channels))
        self.att_dst = Parameter(torch.Tensor(1, heads, out_channels))

        if self.lambda1 == None:
          if self.hardcoded_lambdas:
            self.lambda1 = torch.tensor(self.hardcoded_lambdas[0])
          else:
            # The learnable parameter lambda1 for L-CAT layerwise
            self.lambda1 = Parameter(torch.Tensor(1))
        if self.lambda2 == None:
          if self.hardcoded_lambdas:
            self.lambda2 = torch.tensor(self.hardcoded_lambdas[1])
          else:
            # The learnable parameter lambda2 for L-CAT layerwise
            self.lambda2 = Parameter(torch.Tensor(1))

        # also tried to add xavier_uniform initialization, better without it
        # torch.nn.init.xavier_uniform(self.lambda1)
        # torch.nn.init.xavier_uniform(self.lambda2)

        self.lin_edge = None
        self.register_parameter('att_edge', None)

        self.bias = Parameter(torch.Tensor(heads * out_channels))

        self.reset_parameters()

    def forward(self, x: Union[Tensor, OptPairTensor], edge_index: Adj,
                edge_attr: OptTensor = None, size: Size = None,
                return_attention_weights=None, adj_matrix=None):

        H, C = self.heads, self.out_channels

        neighs = adj_matrix.sum(0)
        sum_of_neighs_embeds = torch.mm(adj_matrix, x)

        # add constraint to lambda2
        if self.constraint:
          lambda2_constrained = self.constraint(self.lambda2 * self.scaling_coef)
        else:
          lambda2_constrained = self.lambda2

        x = torch.div((x + lambda2_constrained * sum_of_neighs_embeds).T, 1 + lambda2_constrained * neighs).T

        # We first transform the input node features
        x_src = x_dst = self.lin_src(x).view(-1, H, C)
        x = (x_src, x_dst)

        # Next, we compute node-level attention coefficients, both for source
        # and target nodes (if present):
        alpha_src = (x_src * self.att_src).sum(dim=-1)
        alpha_dst = None if x_dst is None else (x_dst * self.att_dst).sum(-1)
        alpha = (alpha_src, alpha_dst)

        num_nodes = x_src.size(0)
        if x_dst is not None:
            num_nodes = min(num_nodes, x_dst.size(0))
        num_nodes = min(size) if size is not None else num_nodes
        edge_index, edge_attr = remove_self_loops(
            edge_index, edge_attr)
        edge_index, edge_attr = add_self_loops(
            edge_index, edge_attr, fill_value=self.fill_value,
            num_nodes=num_nodes)

        alpha = self.edge_updater(edge_index, alpha=alpha, edge_attr=edge_attr)

        out = self.propagate(edge_index, x=x, alpha=alpha, size=size)
        out = out.view(-1, self.heads * self.out_channels)
        out = out + self.bias

        return out, (edge_index, alpha)

    def edge_update(self, alpha_j: Tensor, alpha_i: OptTensor,
                    edge_attr: OptTensor, index: Tensor, ptr: OptTensor,
                    size_i: Optional[int]) -> Tensor:
        # Given edge-level attention coefficients for source and target nodes,
        # we simply need to sum them up to "emulate" concatenation:
        alpha = alpha_j if alpha_i is None else alpha_j + alpha_i

        alpha = F.leaky_relu(alpha, self.negative_slope)
        
        # add constraint to lambda1
        if self.constraint:
          lambda1_constrained = self.constraint(self.lambda1 * self.scaling_coef)
        else:
          lambda1_constrained = self.lambda1

        alpha = lambda1_constrained * alpha

        alpha = softmax(alpha, index, ptr, size_i)
        alpha = F.dropout(alpha, p=self.dropout, training=self.training)

        return alpha

    def reset_parameters(self):
        self.lin_src.reset_parameters()
        self.lin_dst.reset_parameters()
        if self.lin_edge is not None:
            self.lin_edge.reset_parameters()
        glorot(self.att_src)
        glorot(self.att_dst)
        glorot(self.att_edge)
        zeros(self.bias)

    def message(self, x_j: Tensor, alpha: Tensor) -> Tensor:
        return alpha.unsqueeze(-1) * x_j

    def __repr__(self) -> str:
        return (f'{self.__class__.__name__}({self.in_channels}, '
                f'{self.out_channels}, heads={self.heads})')

In [60]:
class LCATMy(torch.nn.Module):
  """Learnable Convolutional Attention Layer"""
  def __init__(self, dim_in, dim_h, dim_out, heads=8, hardcoded_lambdas=None, global_lambdas=False):
    super().__init__()
    if global_lambdas:
      self.lambda1 = Parameter(torch.Tensor(1))
      self.lambda2 = Parameter(torch.Tensor(1))

      self.gat1 = LCATLayerMy(dim_in, dim_h, heads=heads, lambda1=self.lambda1, lambda2=self.lambda2)
      self.gat2 = LCATLayerMy(dim_h*heads, dim_out, heads=1, lambda1=self.lambda1, lambda2=self.lambda2)
    else:
      self.gat1 = LCATLayerMy(dim_in, dim_h, heads=heads, hardcoded_lambdas=hardcoded_lambdas)
      self.gat2 = LCATLayerMy(dim_h*heads, dim_out, heads=1, hardcoded_lambdas=hardcoded_lambdas)

  def forward(self, x, edge_index, adj_matrix):
    h, weights = self.gat1(x, edge_index, return_attention_weights=True, adj_matrix=adj_matrix)
    h = F.elu(h)
    h = F.dropout(h, p=0.6, training=self.training)
    h, weights = self.gat2(h, edge_index, return_attention_weights=True, adj_matrix=adj_matrix)
    return h, F.log_softmax(h, dim=1)

# Utilities, Support Classes and Experimental Setup

In [13]:
def calculate_accuracy(pred, y):
    return ((pred == y).sum() / len(y)).item()

class Trainer:
  """Utility Class to handle the training pocedure"""
  def __init__(self, config):
    self.criterion = config["loss"] 
    self.optimizer = config["optimizer"]
    self.lr = config["learning_rate"]
    self.weight_decay = config["weight_decay"]
    self.epochs = config["num_epochs"] 
    self.report_freq = config["report_freq"]
    self.data_requirements = (config["req_edge_index"], config["req_adj_matrix"])
    self.training_time = None

  def train(self, model, data):
    optimizer = self.optimizer(model.parameters(),
                                      lr=self.lr,
                                      weight_decay=self.weight_decay)
    forward_input = [data.x]

    if self.data_requirements[0]:
      forward_input.append(data.edge_index)

    if self.data_requirements[1]:
      adj_matrix = torch_geometric.utils.to_dense_adj(data.edge_index).squeeze(0)
      adj_matrix = adj_matrix + torch.eye(adj_matrix.shape[0]) # added self-loop

      forward_input.append(adj_matrix)

    start = time.time()
    model.train()
    for epoch in range(self.epochs):
        optimizer.zero_grad()
        _, out = model(*forward_input)
        loss = self.criterion(out[data.train_mask], data.y[data.train_mask])
        acc = calculate_accuracy(out[data.train_mask].argmax(dim=1), data.y[data.train_mask])
        loss.backward()
        optimizer.step()

        # Validation
        val_loss = self.criterion(out[data.val_mask], data.y[data.val_mask])
        val_acc = calculate_accuracy(out[data.val_mask].argmax(dim=1), data.y[data.val_mask])

        if(epoch % self.report_freq == 0):
            print(f'Epoch {epoch:>3} | Train Loss: {loss:.3f} | Train Acc: '
                  f'{acc*100:>6.2f}% | Val Loss: {val_loss:.2f} | '
                  f'Val Acc: {val_acc*100:.2f}%')
          
    end = time.time()
    self.training_time = end - start

    return model

  def print_training_time(self):
    print('Training took {:.2f}s'.format(self.training_time))


class Tester:
  """Utility Class to handle the testing pocedure"""
  def __init__(self, config):
    self.data_requirements = (config["req_edge_index"], config["req_adj_matrix"])
    self.testing_time = None

  def test(self, model, data):
    """Evaluate the model on test set and print the accuracy score."""
    forward_input = [data.x]

    if self.data_requirements[0]:
      forward_input.append(data.edge_index)

    if self.data_requirements[1]:
      adj_matrix = torch_geometric.utils.to_dense_adj(data.edge_index).squeeze(0)
      adj_matrix = adj_matrix + torch.eye(adj_matrix.shape[0]) # added self-loop

      forward_input.append(adj_matrix)

    start = time.time()
    model.eval()
    _, out = model(*forward_input)
    acc = calculate_accuracy(out.argmax(dim=1)[data.test_mask], data.y[data.test_mask])
    end = time.time()
    self.testing_time = end - start

    return acc

  def print_testing_time(self):
    print('Testing took {:.2f}s'.format(self.testing_time))

In [61]:
class Experiment:
  """Utility Class to setup and run the exact experiment"""
  def __init__(self, data, model_type, config):
    self.model_name = config["model_name"]
    self.dataset_name = config["dataset_name"]
    if "num_heads" in config.keys():
      if "hardcoded_lambdas" in config.keys():
        self.model = model_type(config["input_dimension"], config["hidden_dimension"], 
                                config["output_dimension"], hardcoded_lambdas=config["hardcoded_lambdas"], 
                                global_lambdas=config["global_lambdas"] if "global_lambdas" in config.keys() else None)        
      else:
        if "global_lambdas" in config.keys():
          self.model = model_type(config["input_dimension"], config["hidden_dimension"], 
                                          config["output_dimension"], heads=config["num_heads"],
                                          global_lambdas=config["global_lambdas"])
        else:
          self.model = model_type(config["input_dimension"], config["hidden_dimension"], 
                                  config["output_dimension"], heads=config["num_heads"])
    else:
      self.model = model_type(config["input_dimension"], config["hidden_dimension"], config["output_dimension"])
    self.trainer = Trainer(config)
    self.tester = Tester(config)
    self.data = data

    print(f"Finished experiment initialization (model: {self.model_name}, dataset: {self.dataset_name}).")
    print(self.model)

  def run(self):
    print("Started model training.")
    self.model = self.trainer.train(self.model, self.data)
    print("Finished model training.")

    print("Started model testing.")
    test_acc = self.tester.test(self.model, self.data)
    print("Finished model testing.")

    self.print_experiment_report(test_acc)

    return test_acc, self.trainer.training_time, self.tester.testing_time

  def print_experiment_report(self, accuracy):
    self.trainer.print_training_time()
    self.tester.print_testing_time()
    print(f'{self.model_name} test accuracy: {accuracy*100:.2f}%\n')

# Experiments

## GCN Experiments

### CiteSeer

In [62]:
gcn_cite_seer_config = {
    "model_name": "GCN",
    "dataset_name": "CiteSeer",
    "input_dimension": dataset_cite_seer.num_features,
    "hidden_dimension": 16,
    "output_dimension": dataset_cite_seer.num_classes,
    "loss": torch.nn.CrossEntropyLoss(),
    "optimizer": torch.optim.Adam,
    "learning_rate": 0.01,
    "weight_decay": 5e-4,
    "num_epochs": 200,
    "report_freq": 10,
    "req_edge_index": True,
    "req_adj_matrix": False
}

experiment_gcn_cite_seer = Experiment(data_cite_seer, GCN, gcn_cite_seer_config)

accuracy, training_time, testing_time =experiment_gcn_cite_seer.run()

Finished experiment initialization (model: GCN, dataset: CiteSeer).
GCN(
  (gcn1): GCNConv(3703, 16)
  (gcn2): GCNConv(16, 6)
)
Started model training.
Epoch   0 | Train Loss: 1.792 | Train Acc:  15.83% | Val Loss: 1.79 | Val Acc: 14.20%
Epoch  10 | Train Loss: 0.365 | Train Acc:  92.50% | Val Loss: 1.25 | Val Acc: 57.20%
Epoch  20 | Train Loss: 0.107 | Train Acc:  96.67% | Val Loss: 1.34 | Val Acc: 56.40%
Epoch  30 | Train Loss: 0.069 | Train Acc:  98.33% | Val Loss: 1.60 | Val Acc: 54.80%
Epoch  40 | Train Loss: 0.019 | Train Acc: 100.00% | Val Loss: 1.54 | Val Acc: 56.80%
Epoch  50 | Train Loss: 0.030 | Train Acc: 100.00% | Val Loss: 1.66 | Val Acc: 56.20%
Epoch  60 | Train Loss: 0.037 | Train Acc:  99.17% | Val Loss: 1.47 | Val Acc: 57.80%
Epoch  70 | Train Loss: 0.042 | Train Acc:  99.17% | Val Loss: 1.44 | Val Acc: 58.60%
Epoch  80 | Train Loss: 0.037 | Train Acc: 100.00% | Val Loss: 1.48 | Val Acc: 59.60%
Epoch  90 | Train Loss: 0.024 | Train Acc: 100.00% | Val Loss: 1.36 | Val 

### Cora

In [50]:
gcn_cora_config = {
    "model_name": "GCN",
    "dataset_name": "Cora",
    "input_dimension": dataset_cora.num_features,
    "hidden_dimension": 16,
    "output_dimension": dataset_cora.num_classes,
    "loss": torch.nn.CrossEntropyLoss(),
    "optimizer": torch.optim.Adam,
    "learning_rate": 0.01,
    "weight_decay": 5e-4,
    "num_epochs": 200,
    "report_freq": 10,
    "req_edge_index": True,
    "req_adj_matrix": False
}

experiment_gcn_cora = Experiment(data_cora, GCN, gcn_cora_config)

accuracy, training_time, testing_time = experiment_gcn_cora.run()

Finished experiment initialization (model: GCN, dataset: Cora).
GCN(
  (gcn1): GCNConv(1433, 16)
  (gcn2): GCNConv(16, 7)
)
Started model training.
Epoch   0 | Train Loss: 1.956 | Train Acc:  12.14% | Val Loss: 1.96 | Val Acc: 10.40%
Epoch  10 | Train Loss: 0.786 | Train Acc:  87.86% | Val Loss: 1.29 | Val Acc: 65.80%
Epoch  20 | Train Loss: 0.306 | Train Acc:  95.00% | Val Loss: 1.06 | Val Acc: 67.80%
Epoch  30 | Train Loss: 0.167 | Train Acc:  96.43% | Val Loss: 1.05 | Val Acc: 69.60%
Epoch  40 | Train Loss: 0.076 | Train Acc:  99.29% | Val Loss: 1.07 | Val Acc: 70.40%
Epoch  50 | Train Loss: 0.100 | Train Acc:  97.14% | Val Loss: 1.05 | Val Acc: 70.20%
Epoch  60 | Train Loss: 0.066 | Train Acc:  99.29% | Val Loss: 1.08 | Val Acc: 70.20%
Epoch  70 | Train Loss: 0.048 | Train Acc: 100.00% | Val Loss: 1.14 | Val Acc: 70.00%
Epoch  80 | Train Loss: 0.046 | Train Acc: 100.00% | Val Loss: 1.00 | Val Acc: 71.20%
Epoch  90 | Train Loss: 0.053 | Train Acc:  99.29% | Val Loss: 1.12 | Val Acc:

### PubMed

In [52]:
gcn_pub_med_config = {
    "model_name": "GCN",
    "dataset_name": "PubMed",
    "input_dimension": dataset_pub_med.num_features,
    "hidden_dimension": 16,
    "output_dimension": dataset_pub_med.num_classes,
    "loss": torch.nn.CrossEntropyLoss(),
    "optimizer": torch.optim.Adam,
    "learning_rate": 0.01,
    "weight_decay": 5e-4,
    "num_epochs": 200,
    "report_freq": 10,
    "req_edge_index": True,
    "req_adj_matrix": False
}

experiment_gcn_pub_med = Experiment(data_pub_med, GCN, gcn_pub_med_config)

accuracy, training_time, testing_time = experiment_gcn_pub_med.run()

Finished experiment initialization (model: GCN, dataset: PubMed).
GCN(
  (gcn1): GCNConv(500, 16)
  (gcn2): GCNConv(16, 3)
)
Started model training.
Epoch   0 | Train Loss: 1.098 | Train Acc:  31.67% | Val Loss: 1.10 | Val Acc: 33.20%
Epoch  10 | Train Loss: 0.948 | Train Acc:  83.33% | Val Loss: 1.01 | Val Acc: 67.00%
Epoch  20 | Train Loss: 0.755 | Train Acc:  85.00% | Val Loss: 0.89 | Val Acc: 71.00%
Epoch  30 | Train Loss: 0.503 | Train Acc:  95.00% | Val Loss: 0.78 | Val Acc: 70.60%
Epoch  40 | Train Loss: 0.425 | Train Acc:  93.33% | Val Loss: 0.70 | Val Acc: 73.40%
Epoch  50 | Train Loss: 0.339 | Train Acc:  93.33% | Val Loss: 0.67 | Val Acc: 72.00%
Epoch  60 | Train Loss: 0.301 | Train Acc:  95.00% | Val Loss: 0.65 | Val Acc: 73.80%
Epoch  70 | Train Loss: 0.161 | Train Acc: 100.00% | Val Loss: 0.63 | Val Acc: 74.40%
Epoch  80 | Train Loss: 0.174 | Train Acc:  96.67% | Val Loss: 0.64 | Val Acc: 74.60%
Epoch  90 | Train Loss: 0.166 | Train Acc:  96.67% | Val Loss: 0.65 | Val Acc

## Custom GCN Experiments

### CiteSeer

In [57]:
custom_gcn_cite_seer_config = {
    "model_name": "GCNMy",
    "dataset_name": "CiteSeer",
    "input_dimension": dataset_cite_seer.num_features,
    "hidden_dimension": 16,
    "output_dimension": dataset_cite_seer.num_classes,
    "loss": torch.nn.CrossEntropyLoss(),
    "optimizer": torch.optim.Adam,
    "learning_rate": 0.01,
    "weight_decay": 5e-4,
    "num_epochs": 200,
    "report_freq": 10,
    "req_edge_index": False,
    "req_adj_matrix": True
}

experiment_gcn_my_cite_seer = Experiment(data_cite_seer, GCNMy, custom_gcn_cite_seer_config)

accuracy, training_time, testing_time = experiment_gcn_my_cite_seer.run()

  torch.nn.init.xavier_uniform(self.W)


Finished experiment initialization (model: GCNMy, dataset: CiteSeer).
GCNMy(
  (gcn1): GCNConvMy()
  (gcn2): GCNConvMy()
)
Started model training.
Epoch   0 | Train Loss: 1.798 | Train Acc:  16.67% | Val Loss: 1.79 | Val Acc: 15.20%
Epoch  10 | Train Loss: 0.505 | Train Acc:  89.17% | Val Loss: 1.26 | Val Acc: 56.20%
Epoch  20 | Train Loss: 0.167 | Train Acc:  95.83% | Val Loss: 1.33 | Val Acc: 59.00%
Epoch  30 | Train Loss: 0.127 | Train Acc:  95.00% | Val Loss: 1.43 | Val Acc: 58.80%
Epoch  40 | Train Loss: 0.077 | Train Acc:  99.17% | Val Loss: 1.39 | Val Acc: 59.40%
Epoch  50 | Train Loss: 0.067 | Train Acc:  98.33% | Val Loss: 1.56 | Val Acc: 60.00%
Epoch  60 | Train Loss: 0.047 | Train Acc:  99.17% | Val Loss: 1.48 | Val Acc: 60.20%
Epoch  70 | Train Loss: 0.062 | Train Acc:  98.33% | Val Loss: 1.56 | Val Acc: 60.20%
Epoch  80 | Train Loss: 0.033 | Train Acc: 100.00% | Val Loss: 1.47 | Val Acc: 61.20%
Epoch  90 | Train Loss: 0.055 | Train Acc:  98.33% | Val Loss: 1.57 | Val Acc: 

### Cora

In [None]:
custom_gcn_cora_config = {
    "model_name": "GCNMy",
    "dataset_name": "Cora",
    "input_dimension": dataset_cora.num_features,
    "hidden_dimension": 16,
    "output_dimension": dataset_cora.num_classes,
    "loss": torch.nn.CrossEntropyLoss(),
    "optimizer": torch.optim.Adam,
    "learning_rate": 0.01,
    "weight_decay": 5e-4,
    "num_epochs": 200,
    "report_freq": 10,
    "req_edge_index": False,
    "req_adj_matrix": True
}

experiment_gcn_my_cora = Experiment(data_cora, GCNMy, custom_gcn_cora_config)

accuracy, training_time, testing_time = experiment_gcn_my_cora.run()

### PubMed

In [None]:
custom_gcn_pub_med_config = {
    "model_name": "GCNMy",
    "dataset_name": "PubMed",
    "input_dimension": dataset_pub_med.num_features,
    "hidden_dimension": 16,
    "output_dimension": dataset_pub_med.num_classes,
    "loss": torch.nn.CrossEntropyLoss(),
    "optimizer": torch.optim.Adam,
    "learning_rate": 0.01,
    "weight_decay": 5e-4,
    "num_epochs": 200,
    "report_freq": 10,
    "req_edge_index": False,
    "req_adj_matrix": True
}

experiment_gcn_my_pub_med = Experiment(data_pub_med, GCNMy, custom_gcn_pub_med_config)

accuracy, training_time, testing_time = experiment_gcn_my_pub_med.run()

## GAT Experiments

### CiteSeer

In [65]:
gat_cite_seer_config = {
    "model_name": "GAT",
    "dataset_name": "CiteSeer",
    "input_dimension": dataset_cite_seer.num_features,
    "hidden_dimension": 16,
    "output_dimension": dataset_cite_seer.num_classes,
    "num_heads": 8,
    "loss": torch.nn.CrossEntropyLoss(),
    "optimizer": torch.optim.Adam,
    "learning_rate": 0.01,
    "weight_decay": 5e-4,
    "num_epochs": 200,
    "report_freq": 10,
    "req_edge_index": True,
    "req_adj_matrix": False
}

experiment_gat_cite_seer = Experiment(data_cite_seer, GAT, gat_cite_seer_config)

accuracy, training_time, testing_time = experiment_gat_cite_seer.run()

Finished experiment initialization (model: GAT, dataset: CiteSeer).
GAT(
  (gat1): GATv2Conv(3703, 16, heads=8)
  (gat2): GATv2Conv(128, 6, heads=1)
)
Started model training.
Epoch   0 | Train Loss: 1.805 | Train Acc:  15.00% | Val Loss: 1.82 | Val Acc: 17.00%
Epoch  10 | Train Loss: 0.004 | Train Acc: 100.00% | Val Loss: 1.37 | Val Acc: 64.00%
Epoch  20 | Train Loss: 0.000 | Train Acc: 100.00% | Val Loss: 1.66 | Val Acc: 63.20%
Epoch  30 | Train Loss: 0.001 | Train Acc: 100.00% | Val Loss: 1.53 | Val Acc: 62.80%
Epoch  40 | Train Loss: 0.005 | Train Acc: 100.00% | Val Loss: 1.26 | Val Acc: 67.20%
Epoch  50 | Train Loss: 0.004 | Train Acc: 100.00% | Val Loss: 1.14 | Val Acc: 68.60%
Epoch  60 | Train Loss: 0.007 | Train Acc: 100.00% | Val Loss: 1.12 | Val Acc: 67.80%
Epoch  70 | Train Loss: 0.004 | Train Acc: 100.00% | Val Loss: 1.13 | Val Acc: 67.40%
Epoch  80 | Train Loss: 0.004 | Train Acc: 100.00% | Val Loss: 1.14 | Val Acc: 67.20%
Epoch  90 | Train Loss: 0.005 | Train Acc: 100.00% 

### Cora

In [66]:
gat_cora_config = {
    "model_name": "GAT",
    "dataset_name": "Cora",
    "input_dimension": dataset_cora.num_features,
    "hidden_dimension": 16,
    "output_dimension": dataset_cora.num_classes,
    "num_heads": 8,
    "loss": torch.nn.CrossEntropyLoss(),
    "optimizer": torch.optim.Adam,
    "learning_rate": 0.01,
    "weight_decay": 5e-4,
    "num_epochs": 200,
    "report_freq": 10,
    "req_edge_index": True,
    "req_adj_matrix": False
}

experiment_gat_cora = Experiment(data_cora, GAT, gat_cora_config)

accuracy, training_time, testing_time = experiment_gat_cora.run()

Finished experiment initialization (model: GAT, dataset: Cora).
GAT(
  (gat1): GATv2Conv(1433, 16, heads=8)
  (gat2): GATv2Conv(128, 7, heads=1)
)
Started model training.
Epoch   0 | Train Loss: 1.959 | Train Acc:  14.29% | Val Loss: 1.97 | Val Acc: 13.80%
Epoch  10 | Train Loss: 0.018 | Train Acc: 100.00% | Val Loss: 0.70 | Val Acc: 79.00%
Epoch  20 | Train Loss: 0.002 | Train Acc: 100.00% | Val Loss: 0.97 | Val Acc: 75.60%
Epoch  30 | Train Loss: 0.001 | Train Acc: 100.00% | Val Loss: 0.93 | Val Acc: 74.80%
Epoch  40 | Train Loss: 0.007 | Train Acc: 100.00% | Val Loss: 0.91 | Val Acc: 74.80%
Epoch  50 | Train Loss: 0.006 | Train Acc: 100.00% | Val Loss: 0.82 | Val Acc: 75.60%
Epoch  60 | Train Loss: 0.007 | Train Acc: 100.00% | Val Loss: 0.76 | Val Acc: 76.40%
Epoch  70 | Train Loss: 0.007 | Train Acc: 100.00% | Val Loss: 0.79 | Val Acc: 76.20%
Epoch  80 | Train Loss: 0.006 | Train Acc: 100.00% | Val Loss: 0.85 | Val Acc: 76.20%
Epoch  90 | Train Loss: 0.006 | Train Acc: 100.00% | Va

### PubMed

In [67]:
gat_pub_med_config = {
    "model_name": "GAT",
    "dataset_name": "PubMed",
    "input_dimension": dataset_pub_med.num_features,
    "hidden_dimension": 16,
    "output_dimension": dataset_pub_med.num_classes,
    "num_heads": 8,
    "loss": torch.nn.CrossEntropyLoss(),
    "optimizer": torch.optim.Adam,
    "learning_rate": 0.01,
    "weight_decay": 5e-4,
    "num_epochs": 200,
    "report_freq": 10,
    "req_edge_index": True,
    "req_adj_matrix": False
}

experiment_gat_pub_med = Experiment(data_pub_med, GAT, gat_pub_med_config)

accuracy, training_time, testing_time = experiment_gat_pub_med.run()

Finished experiment initialization (model: GAT, dataset: PubMed).
GAT(
  (gat1): GATv2Conv(500, 16, heads=8)
  (gat2): GATv2Conv(128, 3, heads=1)
)
Started model training.
Epoch   0 | Train Loss: 1.105 | Train Acc:  33.33% | Val Loss: 1.09 | Val Acc: 38.80%
Epoch  10 | Train Loss: 0.460 | Train Acc:  95.00% | Val Loss: 0.70 | Val Acc: 74.20%
Epoch  20 | Train Loss: 0.128 | Train Acc:  98.33% | Val Loss: 0.58 | Val Acc: 76.60%
Epoch  30 | Train Loss: 0.055 | Train Acc: 100.00% | Val Loss: 0.60 | Val Acc: 76.00%
Epoch  40 | Train Loss: 0.042 | Train Acc: 100.00% | Val Loss: 0.62 | Val Acc: 76.40%
Epoch  50 | Train Loss: 0.049 | Train Acc: 100.00% | Val Loss: 0.68 | Val Acc: 74.60%
Epoch  60 | Train Loss: 0.046 | Train Acc: 100.00% | Val Loss: 0.64 | Val Acc: 76.00%
Epoch  70 | Train Loss: 0.031 | Train Acc: 100.00% | Val Loss: 0.65 | Val Acc: 75.80%
Epoch  80 | Train Loss: 0.030 | Train Acc: 100.00% | Val Loss: 0.70 | Val Acc: 73.60%
Epoch  90 | Train Loss: 0.033 | Train Acc: 100.00% | V

## GATv2 Experiments

### CiteSeer

In [68]:
gatv2_cite_seer_config = {
    "model_name": "GATv2",
    "dataset_name": "CiteSeer",
    "input_dimension": dataset_cite_seer.num_features,
    "hidden_dimension": 16,
    "output_dimension": dataset_cite_seer.num_classes,
    "num_heads": 8,
    "loss": torch.nn.CrossEntropyLoss(),
    "optimizer": torch.optim.Adam,
    "learning_rate": 0.01,
    "weight_decay": 5e-4,
    "num_epochs": 200,
    "report_freq": 10,
    "req_edge_index": True,
    "req_adj_matrix": False
}

experiment_gatv2_cite_seer = Experiment(data_cite_seer, GATv2, gatv2_cite_seer_config)

accuracy, training_time, testing_time = experiment_gatv2_cite_seer.run()

Finished experiment initialization (model: GATv2, dataset: CiteSeer).
GATv2(
  (gat1): GATv2Conv(3703, 16, heads=8)
  (gat2): GATv2Conv(128, 6, heads=1)
)
Started model training.
Epoch   0 | Train Loss: 1.818 | Train Acc:  13.33% | Val Loss: 1.80 | Val Acc: 15.40%
Epoch  10 | Train Loss: 0.005 | Train Acc: 100.00% | Val Loss: 1.47 | Val Acc: 65.00%
Epoch  20 | Train Loss: 0.000 | Train Acc: 100.00% | Val Loss: 1.63 | Val Acc: 63.00%
Epoch  30 | Train Loss: 0.001 | Train Acc: 100.00% | Val Loss: 1.46 | Val Acc: 66.20%
Epoch  40 | Train Loss: 0.002 | Train Acc: 100.00% | Val Loss: 1.33 | Val Acc: 66.60%
Epoch  50 | Train Loss: 0.007 | Train Acc: 100.00% | Val Loss: 1.17 | Val Acc: 67.80%
Epoch  60 | Train Loss: 0.005 | Train Acc: 100.00% | Val Loss: 1.14 | Val Acc: 70.80%
Epoch  70 | Train Loss: 0.005 | Train Acc: 100.00% | Val Loss: 1.18 | Val Acc: 66.20%
Epoch  80 | Train Loss: 0.005 | Train Acc: 100.00% | Val Loss: 1.14 | Val Acc: 68.60%
Epoch  90 | Train Loss: 0.004 | Train Acc: 100.

### Cora

In [69]:
gatv2_cora_config = {
    "model_name": "GATv2",
    "dataset_name": "Cora",
    "input_dimension": dataset_cora.num_features,
    "hidden_dimension": 16,
    "output_dimension": dataset_cora.num_classes,
    "num_heads": 8,
    "loss": torch.nn.CrossEntropyLoss(),
    "optimizer": torch.optim.Adam,
    "learning_rate": 0.01,
    "weight_decay": 5e-4,
    "num_epochs": 200,
    "report_freq": 10,
    "req_edge_index": True,
    "req_adj_matrix": False
}

experiment_gatv2_cora = Experiment(data_cora, GATv2, gatv2_cora_config)

accuracy, training_time, testing_time = experiment_gatv2_cora.run()

Finished experiment initialization (model: GATv2, dataset: Cora).
GATv2(
  (gat1): GATv2Conv(1433, 16, heads=8)
  (gat2): GATv2Conv(128, 7, heads=1)
)
Started model training.
Epoch   0 | Train Loss: 1.953 | Train Acc:  13.57% | Val Loss: 1.93 | Val Acc: 18.40%
Epoch  10 | Train Loss: 0.022 | Train Acc: 100.00% | Val Loss: 0.87 | Val Acc: 74.60%
Epoch  20 | Train Loss: 0.001 | Train Acc: 100.00% | Val Loss: 1.29 | Val Acc: 74.60%
Epoch  30 | Train Loss: 0.002 | Train Acc: 100.00% | Val Loss: 1.22 | Val Acc: 74.00%
Epoch  40 | Train Loss: 0.004 | Train Acc: 100.00% | Val Loss: 1.04 | Val Acc: 71.40%
Epoch  50 | Train Loss: 0.005 | Train Acc: 100.00% | Val Loss: 0.94 | Val Acc: 73.80%
Epoch  60 | Train Loss: 0.008 | Train Acc: 100.00% | Val Loss: 0.89 | Val Acc: 74.40%
Epoch  70 | Train Loss: 0.006 | Train Acc: 100.00% | Val Loss: 0.85 | Val Acc: 74.60%
Epoch  80 | Train Loss: 0.008 | Train Acc: 100.00% | Val Loss: 0.90 | Val Acc: 73.40%
Epoch  90 | Train Loss: 0.007 | Train Acc: 100.00% 

### PubMed

In [70]:
gatv2_pub_med_config = {
    "model_name": "GATv2",
    "dataset_name": "PubMed",
    "input_dimension": dataset_pub_med.num_features,
    "hidden_dimension": 16,
    "output_dimension": dataset_pub_med.num_classes,
    "num_heads": 8,
    "loss": torch.nn.CrossEntropyLoss(),
    "optimizer": torch.optim.Adam,
    "learning_rate": 0.01,
    "weight_decay": 5e-4,
    "num_epochs": 200,
    "report_freq": 10,
    "req_edge_index": True,
    "req_adj_matrix": False
}

experiment_gatv2_pub_med = Experiment(data_pub_med, GATv2, gatv2_pub_med_config)

accuracy, training_time, testing_time = experiment_gatv2_pub_med.run()

Finished experiment initialization (model: GATv2, dataset: PubMed).
GATv2(
  (gat1): GATv2Conv(500, 16, heads=8)
  (gat2): GATv2Conv(128, 3, heads=1)
)
Started model training.
Epoch   0 | Train Loss: 1.099 | Train Acc:  33.33% | Val Loss: 1.12 | Val Acc: 21.40%
Epoch  10 | Train Loss: 0.435 | Train Acc:  95.00% | Val Loss: 0.70 | Val Acc: 74.60%
Epoch  20 | Train Loss: 0.137 | Train Acc:  95.00% | Val Loss: 0.56 | Val Acc: 78.40%
Epoch  30 | Train Loss: 0.057 | Train Acc: 100.00% | Val Loss: 0.57 | Val Acc: 78.80%
Epoch  40 | Train Loss: 0.054 | Train Acc: 100.00% | Val Loss: 0.63 | Val Acc: 77.80%
Epoch  50 | Train Loss: 0.054 | Train Acc: 100.00% | Val Loss: 0.62 | Val Acc: 77.80%
Epoch  60 | Train Loss: 0.047 | Train Acc: 100.00% | Val Loss: 0.61 | Val Acc: 77.20%
Epoch  70 | Train Loss: 0.043 | Train Acc: 100.00% | Val Loss: 0.61 | Val Acc: 76.80%
Epoch  80 | Train Loss: 0.029 | Train Acc: 100.00% | Val Loss: 0.60 | Val Acc: 77.40%
Epoch  90 | Train Loss: 0.031 | Train Acc: 100.00%

## Custom L-CAT Experiments

### CiteSeer

In [65]:
lcat_cite_seer_config = {
    "model_name": "L-Cat",
    "dataset_name": "CiteSeer",
    "input_dimension": dataset_cite_seer.num_features,
    "hidden_dimension": 16,
    "output_dimension": dataset_cite_seer.num_classes,
    "num_heads": 8,
    "loss": torch.nn.CrossEntropyLoss(),
    "optimizer": torch.optim.Adam,
    "learning_rate": 0.01,
    "weight_decay": 5e-4,
    "num_epochs": 200,
    "report_freq": 10,
    "req_edge_index": True,
    "req_adj_matrix": True
}

experiment_lcat_cite_seer = Experiment(data_cite_seer, LCATMy, lcat_cite_seer_config)

accuracy, training_time, testing_time = experiment_lcat_cite_seer.run()

Finished experiment initialization (model: L-Cat, dataset: CiteSeer).
LCATMy(
  (gat1): LCATLayerMy(3703, 16, heads=8)
  (gat2): LCATLayerMy(128, 6, heads=1)
)
Started model training.
Epoch   0 | Train Loss: 1.814 | Train Acc:  10.00% | Val Loss: 1.81 | Val Acc: 9.60%
Epoch  10 | Train Loss: 0.039 | Train Acc:  98.33% | Val Loss: 1.47 | Val Acc: 66.40%
Epoch  20 | Train Loss: 0.007 | Train Acc: 100.00% | Val Loss: 1.69 | Val Acc: 65.40%
Epoch  30 | Train Loss: 0.012 | Train Acc: 100.00% | Val Loss: 1.59 | Val Acc: 66.00%
Epoch  40 | Train Loss: 0.030 | Train Acc:  98.33% | Val Loss: 1.45 | Val Acc: 65.00%
Epoch  50 | Train Loss: 0.016 | Train Acc: 100.00% | Val Loss: 1.34 | Val Acc: 65.80%
Epoch  60 | Train Loss: 0.018 | Train Acc:  99.17% | Val Loss: 1.37 | Val Acc: 66.00%
Epoch  70 | Train Loss: 0.009 | Train Acc: 100.00% | Val Loss: 1.30 | Val Acc: 66.20%
Epoch  80 | Train Loss: 0.025 | Train Acc:  98.33% | Val Loss: 1.32 | Val Acc: 66.60%
Epoch  90 | Train Loss: 0.009 | Train Acc: 

### Cora

In [74]:
lcat_cora_config = {
    "model_name": "L-Cat",
    "dataset_name": "Cora",
    "input_dimension": dataset_cora.num_features,
    "hidden_dimension": 16,
    "output_dimension": dataset_cora.num_classes,
    "num_heads": 8,
    "loss": torch.nn.CrossEntropyLoss(),
    "optimizer": torch.optim.Adam,
    "learning_rate": 0.01,
    "weight_decay": 5e-4,
    "num_epochs": 200,
    "report_freq": 10,
    "req_edge_index": True,
    "req_adj_matrix": True
}

experiment_lcat_cora = Experiment(data_cora, LCATMy, lcat_cora_config)

accuracy, training_time, testing_time = experiment_lcat_cora.run()

Finished experiment initialization (model: L-Cat, dataset: Cora).
LCATMy(
  (gat1): LCATLayerMy(1433, 16, heads=8)
  (gat2): LCATLayerMy(128, 7, heads=1)
)
Started model training.
Epoch   0 | Train Loss: 1.932 | Train Acc:  17.86% | Val Loss: 1.93 | Val Acc: 18.20%
Epoch  10 | Train Loss: 0.022 | Train Acc: 100.00% | Val Loss: 0.73 | Val Acc: 77.60%
Epoch  20 | Train Loss: 0.002 | Train Acc: 100.00% | Val Loss: 0.97 | Val Acc: 76.20%
Epoch  30 | Train Loss: 0.002 | Train Acc: 100.00% | Val Loss: 0.97 | Val Acc: 76.40%
Epoch  40 | Train Loss: 0.004 | Train Acc: 100.00% | Val Loss: 0.88 | Val Acc: 76.00%
Epoch  50 | Train Loss: 0.009 | Train Acc: 100.00% | Val Loss: 0.80 | Val Acc: 76.80%
Epoch  60 | Train Loss: 0.011 | Train Acc: 100.00% | Val Loss: 0.79 | Val Acc: 76.80%
Epoch  70 | Train Loss: 0.010 | Train Acc: 100.00% | Val Loss: 0.80 | Val Acc: 76.80%
Epoch  80 | Train Loss: 0.010 | Train Acc: 100.00% | Val Loss: 0.82 | Val Acc: 76.20%
Epoch  90 | Train Loss: 0.008 | Train Acc: 100

### PubMed

In [75]:
lcat_pub_med_config = {
    "model_name": "L-Cat",
    "dataset_name": "PubMed",
    "input_dimension": dataset_pub_med.num_features,
    "hidden_dimension": 16,
    "output_dimension": dataset_pub_med.num_classes,
    "num_heads": 8,
    "loss": torch.nn.CrossEntropyLoss(),
    "optimizer": torch.optim.Adam,
    "learning_rate": 0.01,
    "weight_decay": 5e-4,
    "num_epochs": 200,
    "report_freq": 10,
    "req_edge_index": True,
    "req_adj_matrix": True
}

experiment_lcat_pub_med = Experiment(data_pub_med, LCATMy, lcat_pub_med_config)

accuracy, training_time, testing_time = experiment_lcat_pub_med.run()

Finished experiment initialization (model: L-Cat, dataset: PubMed).
LCATMy(
  (gat1): LCATLayerMy(500, 16, heads=8)
  (gat2): LCATLayerMy(128, 3, heads=1)
)
Started model training.
Epoch   0 | Train Loss: 1.098 | Train Acc:  30.00% | Val Loss: 1.10 | Val Acc: 33.60%
Epoch  10 | Train Loss: 0.432 | Train Acc:  93.33% | Val Loss: 0.68 | Val Acc: 74.80%
Epoch  20 | Train Loss: 0.136 | Train Acc:  96.67% | Val Loss: 0.56 | Val Acc: 77.00%
Epoch  30 | Train Loss: 0.060 | Train Acc: 100.00% | Val Loss: 0.57 | Val Acc: 78.60%
Epoch  40 | Train Loss: 0.058 | Train Acc: 100.00% | Val Loss: 0.58 | Val Acc: 79.00%
Epoch  50 | Train Loss: 0.056 | Train Acc: 100.00% | Val Loss: 0.58 | Val Acc: 77.80%
Epoch  60 | Train Loss: 0.052 | Train Acc: 100.00% | Val Loss: 0.58 | Val Acc: 77.60%
Epoch  70 | Train Loss: 0.048 | Train Acc: 100.00% | Val Loss: 0.58 | Val Acc: 77.80%
Epoch  80 | Train Loss: 0.039 | Train Acc: 100.00% | Val Loss: 0.59 | Val Acc: 77.60%
Epoch  90 | Train Loss: 0.041 | Train Acc: 10

## CAT Experiments

### CiteSeer

In [54]:
cat_cite_seer_config = {
    "model_name": "CAT",
    "dataset_name": "CiteSeer",
    "input_dimension": dataset_cite_seer.num_features,
    "hidden_dimension": 16,
    "output_dimension": dataset_cite_seer.num_classes,
    "num_heads": 8,
    "loss": torch.nn.CrossEntropyLoss(),
    "optimizer": torch.optim.Adam,
    "learning_rate": 0.01,
    "weight_decay": 5e-4,
    "num_epochs": 200,
    "report_freq": 10,
    "req_edge_index": True,
    "req_adj_matrix": True,
    "hardcoded_lambdas": (1, 1) # by the statement of the paper and by the design of the layer L-Cat with thse lambdas will be CAT
}

experiment_cat_cite_seer = Experiment(data_cite_seer, LCATMy, cat_cite_seer_config)

accuracy, training_time, testing_time = experiment_cat_cite_seer.run()

yeees
Finished experiment initialization (model: Cat, dataset: CiteSeer).
LCATMy(
  (gat1): LCATLayerMy(3703, 16, heads=8)
  (gat2): LCATLayerMy(128, 6, heads=1)
)
Started model training.
Epoch   0 | Train Loss: 1.803 | Train Acc:  19.17% | Val Loss: 1.81 | Val Acc: 12.00%
Epoch  10 | Train Loss: 0.046 | Train Acc:  97.50% | Val Loss: 1.47 | Val Acc: 65.40%
Epoch  20 | Train Loss: 0.011 | Train Acc: 100.00% | Val Loss: 1.68 | Val Acc: 65.40%
Epoch  30 | Train Loss: 0.028 | Train Acc:  99.17% | Val Loss: 1.57 | Val Acc: 66.40%
Epoch  40 | Train Loss: 0.012 | Train Acc: 100.00% | Val Loss: 1.38 | Val Acc: 65.80%
Epoch  50 | Train Loss: 0.010 | Train Acc: 100.00% | Val Loss: 1.27 | Val Acc: 66.20%
Epoch  60 | Train Loss: 0.008 | Train Acc: 100.00% | Val Loss: 1.26 | Val Acc: 67.40%
Epoch  70 | Train Loss: 0.009 | Train Acc: 100.00% | Val Loss: 1.26 | Val Acc: 68.00%
Epoch  80 | Train Loss: 0.008 | Train Acc: 100.00% | Val Loss: 1.22 | Val Acc: 68.20%
Epoch  90 | Train Loss: 0.007 | Train 

### Cora

In [56]:
cat_cora_config = {
    "model_name": "CAT",
    "dataset_name": "Cora",
    "input_dimension": dataset_cora.num_features,
    "hidden_dimension": 16,
    "output_dimension": dataset_cora.num_classes,
    "num_heads": 8,
    "loss": torch.nn.CrossEntropyLoss(),
    "optimizer": torch.optim.Adam,
    "learning_rate": 0.01,
    "weight_decay": 5e-4,
    "num_epochs": 200,
    "report_freq": 10,
    "req_edge_index": True,
    "req_adj_matrix": True,
    "hardcoded_lambdas": (1, 1)
}

experiment_cat_cora = Experiment(data_cora, LCATMy, cat_cora_config)

accuracy, training_time, testing_time = experiment_cat_cora.run()

Finished experiment initialization (model: Cat, dataset: Cora).
LCATMy(
  (gat1): LCATLayerMy(1433, 16, heads=8)
  (gat2): LCATLayerMy(128, 7, heads=1)
)
Started model training.
Epoch   0 | Train Loss: 1.963 | Train Acc:   9.29% | Val Loss: 1.99 | Val Acc: 6.00%
Epoch  10 | Train Loss: 0.040 | Train Acc:  99.29% | Val Loss: 0.68 | Val Acc: 80.40%
Epoch  20 | Train Loss: 0.009 | Train Acc: 100.00% | Val Loss: 0.88 | Val Acc: 79.00%
Epoch  30 | Train Loss: 0.005 | Train Acc: 100.00% | Val Loss: 0.95 | Val Acc: 77.20%
Epoch  40 | Train Loss: 0.007 | Train Acc: 100.00% | Val Loss: 0.84 | Val Acc: 78.40%
Epoch  50 | Train Loss: 0.012 | Train Acc: 100.00% | Val Loss: 0.80 | Val Acc: 76.40%
Epoch  60 | Train Loss: 0.014 | Train Acc: 100.00% | Val Loss: 0.81 | Val Acc: 77.40%
Epoch  70 | Train Loss: 0.010 | Train Acc: 100.00% | Val Loss: 0.88 | Val Acc: 76.80%
Epoch  80 | Train Loss: 0.009 | Train Acc: 100.00% | Val Loss: 0.82 | Val Acc: 77.40%
Epoch  90 | Train Loss: 0.008 | Train Acc: 100.00

### PubMed

In [58]:
cat_pub_med_config = {
    "model_name": "CAT",
    "dataset_name": "PubMed",
    "input_dimension": dataset_pub_med.num_features,
    "hidden_dimension": 16,
    "output_dimension": dataset_pub_med.num_classes,
    "num_heads": 8,
    "loss": torch.nn.CrossEntropyLoss(),
    "optimizer": torch.optim.Adam,
    "learning_rate": 0.01,
    "weight_decay": 5e-4,
    "num_epochs": 200,
    "report_freq": 10,
    "req_edge_index": True,
    "req_adj_matrix": True,
    "hardcoded_lambdas": (1, 1)
}

experiment_cat_pub_med = Experiment(data_pub_med, LCATMy, cat_pub_med_config)

accuracy, training_time, testing_time = experiment_cat_pub_med.run()

Finished experiment initialization (model: CAT, dataset: PubMed).
LCATMy(
  (gat1): LCATLayerMy(500, 16, heads=8)
  (gat2): LCATLayerMy(128, 3, heads=1)
)


## Global L-CAT Experimnts

### CiteSeer

In [62]:
lcat_global_cite_seer_config = {
    "model_name": "L-Cat",
    "dataset_name": "CiteSeer",
    "input_dimension": dataset_cite_seer.num_features,
    "hidden_dimension": 16,
    "output_dimension": dataset_cite_seer.num_classes,
    "num_heads": 8,
    "loss": torch.nn.CrossEntropyLoss(),
    "optimizer": torch.optim.Adam,
    "learning_rate": 0.01,
    "weight_decay": 5e-4,
    "num_epochs": 200,
    "report_freq": 10,
    "req_edge_index": True,
    "req_adj_matrix": True,
    "global_lambdas": True
}

experiment_lcat_global_cite_seer = Experiment(data_cite_seer, LCATMy, lcat_global_cite_seer_config)

accuracy, training_time, testing_time = experiment_lcat_global_cite_seer.run()


Finished experiment initialization (model: L-Cat, dataset: CiteSeer).
LCATMy(
  (gat1): LCATLayerMy(3703, 16, heads=8)
  (gat2): LCATLayerMy(128, 6, heads=1)
)
Started model training.
Epoch   0 | Train Loss: 1.799 | Train Acc:  17.50% | Val Loss: 1.79 | Val Acc: 19.60%
Epoch  10 | Train Loss: 0.057 | Train Acc:  97.50% | Val Loss: 1.49 | Val Acc: 65.00%
Epoch  20 | Train Loss: 0.028 | Train Acc:  99.17% | Val Loss: 1.53 | Val Acc: 66.20%
Epoch  30 | Train Loss: 0.010 | Train Acc:  99.17% | Val Loss: 1.40 | Val Acc: 67.40%
Epoch  40 | Train Loss: 0.011 | Train Acc: 100.00% | Val Loss: 1.26 | Val Acc: 68.20%
Epoch  50 | Train Loss: 0.013 | Train Acc:  99.17% | Val Loss: 1.20 | Val Acc: 68.60%
Epoch  60 | Train Loss: 0.010 | Train Acc: 100.00% | Val Loss: 1.27 | Val Acc: 65.20%
Epoch  70 | Train Loss: 0.007 | Train Acc: 100.00% | Val Loss: 1.23 | Val Acc: 65.80%
Epoch  80 | Train Loss: 0.008 | Train Acc: 100.00% | Val Loss: 1.23 | Val Acc: 66.80%
Epoch  90 | Train Loss: 0.005 | Train Acc:

### Cora

In [66]:
lcat_global_cora_config = {
    "model_name": "L-Cat",
    "dataset_name": "Cora",
    "input_dimension": dataset_cora.num_features,
    "hidden_dimension": 16,
    "output_dimension": dataset_cora.num_classes,
    "num_heads": 8,
    "loss": torch.nn.CrossEntropyLoss(),
    "optimizer": torch.optim.Adam,
    "learning_rate": 0.01,
    "weight_decay": 5e-4,
    "num_epochs": 200,
    "report_freq": 10,
    "req_edge_index": True,
    "req_adj_matrix": True,
    "global_lambdas": True
}

experiment_lcat_global_cora = Experiment(data_cora, LCATMy, lcat_global_cora_config)

accuracy, training_time, testing_time = experiment_lcat_global_cora.run()

Finished experiment initialization (model: L-Cat, dataset: Cora).
LCATMy(
  (gat1): LCATLayerMy(1433, 16, heads=8)
  (gat2): LCATLayerMy(128, 7, heads=1)
)
Started model training.
Epoch   0 | Train Loss: 1.937 | Train Acc:  22.86% | Val Loss: 1.94 | Val Acc: 17.20%
Epoch  10 | Train Loss: 0.042 | Train Acc:  99.29% | Val Loss: 0.74 | Val Acc: 78.60%
Epoch  20 | Train Loss: 0.006 | Train Acc: 100.00% | Val Loss: 1.00 | Val Acc: 77.00%
Epoch  30 | Train Loss: 0.004 | Train Acc: 100.00% | Val Loss: 1.08 | Val Acc: 76.80%
Epoch  40 | Train Loss: 0.007 | Train Acc: 100.00% | Val Loss: 0.93 | Val Acc: 76.60%
Epoch  50 | Train Loss: 0.011 | Train Acc: 100.00% | Val Loss: 0.85 | Val Acc: 76.80%
Epoch  60 | Train Loss: 0.013 | Train Acc: 100.00% | Val Loss: 0.87 | Val Acc: 76.80%
Epoch  70 | Train Loss: 0.009 | Train Acc: 100.00% | Val Loss: 0.89 | Val Acc: 76.80%
Epoch  80 | Train Loss: 0.009 | Train Acc: 100.00% | Val Loss: 0.87 | Val Acc: 76.40%
Epoch  90 | Train Loss: 0.007 | Train Acc: 100

### PubMed

In [67]:
lcat_global_pub_med_config = {
    "model_name": "L-Cat",
    "dataset_name": "PubMed",
    "input_dimension": dataset_pub_med.num_features,
    "hidden_dimension": 16,
    "output_dimension": dataset_pub_med.num_classes,
    "num_heads": 8,
    "loss": torch.nn.CrossEntropyLoss(),
    "optimizer": torch.optim.Adam,
    "learning_rate": 0.01,
    "weight_decay": 5e-4,
    "num_epochs": 200,
    "report_freq": 10,
    "req_edge_index": True,
    "req_adj_matrix": True,
    "global_lambdas": True
}

experiment_lcat_global_pub_med = Experiment(data_pub_med, LCATMy, lcat_global_pub_med_config)

accuracy, training_time, testing_time = experiment_lcat_global_pub_med.run()

Finished experiment initialization (model: L-Cat, dataset: PubMed).
LCATMy(
  (gat1): LCATLayerMy(500, 16, heads=8)
  (gat2): LCATLayerMy(128, 3, heads=1)
)


## GCN+GAT Experiments

### CireSeer

In [25]:
gcn_gat_cite_seer_config = {
    "model_name": "GCN_GAT",
    "dataset_name": "CiteSeer",
    "input_dimension": dataset_cite_seer.num_features,
    "hidden_dimension": 16,
    "output_dimension": dataset_cite_seer.num_classes,
    "loss": torch.nn.CrossEntropyLoss(),
    "optimizer": torch.optim.Adam,
    "learning_rate": 0.01,
    "weight_decay": 5e-4,
    "num_epochs": 200,
    "report_freq": 10,
    "req_edge_index": True,
    "req_adj_matrix": False
}

experiment_gcn_gat_cite_seer = Experiment(data_cite_seer, GCN_GAT, gcn_gat_cite_seer_config)

accuracy, training_time, testing_time = experiment_gcn_gat_cite_seer.run()

Finished experiment initialization (model: GCN_GAT, dataset: CiteSeer).
GCN_GAT(
  (gcn1): GCNConv(3703, 16)
  (gat2): GATConv(16, 6, heads=1)
)
Started model training.
Epoch   0 | Train Loss: 1.795 | Train Acc:  10.00% | Val Loss: 1.80 | Val Acc: 14.40%
Epoch  10 | Train Loss: 0.439 | Train Acc:  88.33% | Val Loss: 1.25 | Val Acc: 59.00%
Epoch  20 | Train Loss: 0.150 | Train Acc:  97.50% | Val Loss: 1.41 | Val Acc: 56.00%
Epoch  30 | Train Loss: 0.117 | Train Acc:  97.50% | Val Loss: 1.22 | Val Acc: 60.60%
Epoch  40 | Train Loss: 0.067 | Train Acc:  98.33% | Val Loss: 1.45 | Val Acc: 57.40%
Epoch  50 | Train Loss: 0.072 | Train Acc:  99.17% | Val Loss: 1.37 | Val Acc: 60.60%
Epoch  60 | Train Loss: 0.063 | Train Acc:  99.17% | Val Loss: 1.47 | Val Acc: 61.80%
Epoch  70 | Train Loss: 0.053 | Train Acc: 100.00% | Val Loss: 1.35 | Val Acc: 60.80%
Epoch  80 | Train Loss: 0.054 | Train Acc:  99.17% | Val Loss: 1.51 | Val Acc: 57.00%
Epoch  90 | Train Loss: 0.056 | Train Acc:  98.33% | Val 

### Cora

In [26]:
gcn_gat_cora_config = {
    "model_name": "GCN_GAT",
    "dataset_name": "Cora",
    "input_dimension": dataset_cora.num_features,
    "hidden_dimension": 16,
    "output_dimension": dataset_cora.num_classes,
    "loss": torch.nn.CrossEntropyLoss(),
    "optimizer": torch.optim.Adam,
    "learning_rate": 0.01,
    "weight_decay": 5e-4,
    "num_epochs": 200,
    "report_freq": 10,
    "req_edge_index": True,
    "req_adj_matrix": False
}

experiment_gcn_gat_cora = Experiment(data_cora, GCN_GAT, gcn_gat_cora_config)

accuracy, training_time, testing_time = experiment_gcn_gat_cora.run()

Finished experiment initialization (model: GCN_GAT, dataset: Cora).
GCN_GAT(
  (gcn1): GCNConv(1433, 16)
  (gat2): GATConv(16, 7, heads=1)
)
Started model training.
Epoch   0 | Train Loss: 1.949 | Train Acc:  17.14% | Val Loss: 1.93 | Val Acc: 23.40%
Epoch  10 | Train Loss: 0.850 | Train Acc:  90.71% | Val Loss: 1.26 | Val Acc: 66.80%
Epoch  20 | Train Loss: 0.340 | Train Acc:  95.71% | Val Loss: 1.19 | Val Acc: 67.40%
Epoch  30 | Train Loss: 0.220 | Train Acc:  95.00% | Val Loss: 1.18 | Val Acc: 65.60%
Epoch  40 | Train Loss: 0.166 | Train Acc:  98.57% | Val Loss: 1.13 | Val Acc: 69.20%
Epoch  50 | Train Loss: 0.101 | Train Acc:  99.29% | Val Loss: 1.17 | Val Acc: 70.00%
Epoch  60 | Train Loss: 0.066 | Train Acc:  99.29% | Val Loss: 1.16 | Val Acc: 70.20%
Epoch  70 | Train Loss: 0.059 | Train Acc:  99.29% | Val Loss: 1.30 | Val Acc: 69.40%
Epoch  80 | Train Loss: 0.055 | Train Acc:  99.29% | Val Loss: 1.21 | Val Acc: 71.00%
Epoch  90 | Train Loss: 0.054 | Train Acc: 100.00% | Val Loss

### PubMed

In [27]:
gcn_gat_pub_med_config = {
    "model_name": "GCN_GAT",
    "dataset_name": "PubMed",
    "input_dimension": dataset_pub_med.num_features,
    "hidden_dimension": 16,
    "output_dimension": dataset_pub_med.num_classes,
    "loss": torch.nn.CrossEntropyLoss(),
    "optimizer": torch.optim.Adam,
    "learning_rate": 0.01,
    "weight_decay": 5e-4,
    "num_epochs": 200,
    "report_freq": 10,
    "req_edge_index": True,
    "req_adj_matrix": False
}

experiment_gcn_gat_pub_med = Experiment(data_pub_med, GCN_GAT, gcn_gat_pub_med_config)

accuracy, training_time, testing_time = experiment_gcn_gat_pub_med.run()

Finished experiment initialization (model: GCN_GAT, dataset: PubMed).
GCN_GAT(
  (gcn1): GCNConv(500, 16)
  (gat2): GATConv(16, 3, heads=1)
)
Started model training.
Epoch   0 | Train Loss: 1.100 | Train Acc:  26.67% | Val Loss: 1.10 | Val Acc: 40.60%
Epoch  10 | Train Loss: 0.915 | Train Acc:  83.33% | Val Loss: 0.96 | Val Acc: 66.80%
Epoch  20 | Train Loss: 0.682 | Train Acc:  91.67% | Val Loss: 0.77 | Val Acc: 70.60%
Epoch  30 | Train Loss: 0.446 | Train Acc:  95.00% | Val Loss: 0.71 | Val Acc: 70.80%
Epoch  40 | Train Loss: 0.357 | Train Acc:  91.67% | Val Loss: 0.69 | Val Acc: 72.60%
Epoch  50 | Train Loss: 0.254 | Train Acc:  93.33% | Val Loss: 0.64 | Val Acc: 73.80%
Epoch  60 | Train Loss: 0.207 | Train Acc:  96.67% | Val Loss: 0.68 | Val Acc: 74.80%
Epoch  70 | Train Loss: 0.180 | Train Acc:  96.67% | Val Loss: 0.66 | Val Acc: 74.00%
Epoch  80 | Train Loss: 0.113 | Train Acc: 100.00% | Val Loss: 0.68 | Val Acc: 72.20%
Epoch  90 | Train Loss: 0.140 | Train Acc:  98.33% | Val Los

## GAT + GCN Experiments

### CiteSeer

In [29]:
gat_gcn_cite_seer_config = {
    "model_name": "GAT_GCN",
    "dataset_name": "CiteSeer",
    "input_dimension": dataset_cite_seer.num_features,
    "hidden_dimension": 16,
    "output_dimension": dataset_cite_seer.num_classes,
    "num_heads": 8,
    "loss": torch.nn.CrossEntropyLoss(),
    "optimizer": torch.optim.Adam,
    "learning_rate": 0.01,
    "weight_decay": 5e-4,
    "num_epochs": 200,
    "report_freq": 10,
    "req_edge_index": True,
    "req_adj_matrix": False
}

experiment_gat_gcn_cite_seer = Experiment(data_cite_seer, GAT_GCN, gat_gcn_cite_seer_config)

accuracy, training_time, testing_time = experiment_gat_gcn_cite_seer.run()

Finished experiment initialization (model: GAT_GCN, dataset: CiteSeer).
GAT_GCN(
  (gat1): GATConv(3703, 16, heads=8)
  (gcn2): GCNConv(128, 6)
)
Started model training.
Epoch   0 | Train Loss: 1.793 | Train Acc:  19.17% | Val Loss: 1.81 | Val Acc: 16.00%
Epoch  10 | Train Loss: 0.009 | Train Acc: 100.00% | Val Loss: 1.25 | Val Acc: 67.20%
Epoch  20 | Train Loss: 0.001 | Train Acc: 100.00% | Val Loss: 1.46 | Val Acc: 66.40%
Epoch  30 | Train Loss: 0.002 | Train Acc: 100.00% | Val Loss: 1.36 | Val Acc: 67.60%
Epoch  40 | Train Loss: 0.005 | Train Acc: 100.00% | Val Loss: 1.18 | Val Acc: 67.80%
Epoch  50 | Train Loss: 0.007 | Train Acc: 100.00% | Val Loss: 1.13 | Val Acc: 67.80%
Epoch  60 | Train Loss: 0.005 | Train Acc: 100.00% | Val Loss: 1.12 | Val Acc: 67.80%
Epoch  70 | Train Loss: 0.005 | Train Acc: 100.00% | Val Loss: 1.13 | Val Acc: 68.00%
Epoch  80 | Train Loss: 0.006 | Train Acc: 100.00% | Val Loss: 1.09 | Val Acc: 69.60%
Epoch  90 | Train Loss: 0.005 | Train Acc: 100.00% | Val

### Cora

In [30]:
gat_gcn_cora_config = {
    "model_name": "GAT_GCN",
    "dataset_name": "Cora",
    "input_dimension": dataset_cora.num_features,
    "hidden_dimension": 16,
    "output_dimension": dataset_cora.num_classes,
    "num_heads": 8,
    "loss": torch.nn.CrossEntropyLoss(),
    "optimizer": torch.optim.Adam,
    "learning_rate": 0.01,
    "weight_decay": 5e-4,
    "num_epochs": 200,
    "report_freq": 10,
    "req_edge_index": True,
    "req_adj_matrix": False
}

experiment_gat_gcn_cora = Experiment(data_cora, GAT_GCN, gat_gcn_cora_config)

accuracy, training_time, testing_time = experiment_gat_gcn_cora.run()

Finished experiment initialization (model: GAT_GCN, dataset: Cora).
GAT_GCN(
  (gat1): GATConv(1433, 16, heads=8)
  (gcn2): GCNConv(128, 7)
)
Started model training.
Epoch   0 | Train Loss: 1.991 | Train Acc:   9.29% | Val Loss: 1.96 | Val Acc: 13.20%
Epoch  10 | Train Loss: 0.025 | Train Acc:  99.29% | Val Loss: 0.79 | Val Acc: 77.60%
Epoch  20 | Train Loss: 0.001 | Train Acc: 100.00% | Val Loss: 0.97 | Val Acc: 76.60%
Epoch  30 | Train Loss: 0.001 | Train Acc: 100.00% | Val Loss: 1.04 | Val Acc: 76.00%
Epoch  40 | Train Loss: 0.003 | Train Acc: 100.00% | Val Loss: 0.90 | Val Acc: 75.80%
Epoch  50 | Train Loss: 0.007 | Train Acc: 100.00% | Val Loss: 0.82 | Val Acc: 77.20%
Epoch  60 | Train Loss: 0.009 | Train Acc: 100.00% | Val Loss: 0.79 | Val Acc: 76.40%
Epoch  70 | Train Loss: 0.009 | Train Acc: 100.00% | Val Loss: 0.81 | Val Acc: 77.80%
Epoch  80 | Train Loss: 0.007 | Train Acc: 100.00% | Val Loss: 0.81 | Val Acc: 76.80%
Epoch  90 | Train Loss: 0.007 | Train Acc: 100.00% | Val Los

### PubMed

In [31]:
gat_gcn_pub_med_config = {
    "model_name": "GAT_GCN",
    "dataset_name": "PubMed",
    "input_dimension": dataset_pub_med.num_features,
    "hidden_dimension": 16,
    "output_dimension": dataset_pub_med.num_classes,
    "num_heads": 8,
    "loss": torch.nn.CrossEntropyLoss(),
    "optimizer": torch.optim.Adam,
    "learning_rate": 0.01,
    "weight_decay": 5e-4,
    "num_epochs": 200,
    "report_freq": 10,
    "req_edge_index": True,
    "req_adj_matrix": False
}

experiment_gat_gcn_pub_med = Experiment(data_pub_med, GAT_GCN, gat_gcn_pub_med_config)

accuracy, training_time, testing_time = experiment_gat_gcn_pub_med.run()

Finished experiment initialization (model: GAT_GCN, dataset: PubMed).
GAT_GCN(
  (gat1): GATConv(500, 16, heads=8)
  (gcn2): GCNConv(128, 3)
)
Started model training.
Epoch   0 | Train Loss: 1.100 | Train Acc:  33.33% | Val Loss: 1.10 | Val Acc: 40.20%
Epoch  10 | Train Loss: 0.518 | Train Acc:  96.67% | Val Loss: 0.75 | Val Acc: 75.00%
Epoch  20 | Train Loss: 0.178 | Train Acc:  96.67% | Val Loss: 0.59 | Val Acc: 77.80%
Epoch  30 | Train Loss: 0.085 | Train Acc: 100.00% | Val Loss: 0.58 | Val Acc: 78.20%
Epoch  40 | Train Loss: 0.069 | Train Acc: 100.00% | Val Loss: 0.58 | Val Acc: 78.60%
Epoch  50 | Train Loss: 0.064 | Train Acc: 100.00% | Val Loss: 0.59 | Val Acc: 78.20%
Epoch  60 | Train Loss: 0.060 | Train Acc: 100.00% | Val Loss: 0.59 | Val Acc: 78.60%
Epoch  70 | Train Loss: 0.050 | Train Acc: 100.00% | Val Loss: 0.61 | Val Acc: 78.00%
Epoch  80 | Train Loss: 0.044 | Train Acc: 100.00% | Val Loss: 0.58 | Val Acc: 79.40%
Epoch  90 | Train Loss: 0.044 | Train Acc: 100.00% | Val Lo