In [19]:
#Check the PyTorch and Cuda version
!python -c "import torch; print(torch.__version__)"
!python -c "import torch; print(torch.version.cuda)"

1.10.0+cu111
11.1


In [20]:
!pip install -q torch-scatter -f https://data.pyg.org/whl/torch-1.10.0+cu111.html
!pip install -q torch-sparse -f https://data.pyg.org/whl/torch-1.10.0+cu111.html
!pip install -q git+https://github.com/pyg-team/pytorch_geometric.git
!pip install torch-cluster -f https://data.pyg.org/whl/torch-1.10.0+cu111.html
!pip install torch-spline-conv -f https://pytorch-geometric.com/whl/torch-1.10.0+cu111.html

Looking in links: https://data.pyg.org/whl/torch-1.10.0+cu111.html
Looking in links: https://pytorch-geometric.com/whl/torch-1.10.0+cu111.html


In [None]:
# Helper function for visualization.
%matplotlib inline
import torch
import networkx as nx
import matplotlib.pyplot as plt


def visualize(h, color, epoch=None, loss=None):
    plt.figure(figsize=(7,7))
    plt.xticks([])
    plt.yticks([])

    if torch.is_tensor(h):
        h = h.detach().cpu().numpy()
        plt.scatter(h[:, 0], h[:, 1], s=140, c=color, cmap="Set2")
        if epoch is not None and loss is not None:
            plt.xlabel(f'Epoch: {epoch}, Loss: {loss.item():.4f}', fontsize=16)
    else:
        nx.draw_networkx(G, pos=nx.spring_layout(G, seed=42), with_labels=False,
                         node_color=color, cmap="Set2")
    plt.show()

In [21]:
#First data characteristics
from torch_geometric.datasets import Planetoid
from torch_geometric.transforms import NormalizeFeatures

dataset = Planetoid(root='/tmp/CiteSeer', name='CiteSeer',transform=NormalizeFeatures())

data = dataset[0]
print(data)

print(f'Number of nodes: {data.num_nodes}')
print(f'Nodes features: {data.num_node_features}')
print(f'Number of classes: {dataset.num_classes}')
print(f'Number of edges: {data.num_edges}')
print(f'Avarage degree: {data.num_edges / data.num_nodes:.2f}')
print(f'Training nodes: {data.train_mask.sum()}')
print(f'Validation nodes: {data.val_mask.sum()}')
print(f'Test nodes: {data.test_mask.sum()}')
print(f'Isolated nodes: {data.has_isolated_nodes()}')
print(f'Loops: {data.has_self_loops()}')
print(f'Is undirected: {data.is_undirected()}')

Data(x=[3327, 3703], edge_index=[2, 9104], y=[3327], train_mask=[3327], val_mask=[3327], test_mask=[3327])
Number of nodes: 3327
Nodes features: 3703
Number of classes: 6
Number of edges: 9104
Avarage degree: 2.74
Training nodes: 120
Validation nodes: 500
Test nodes: 1000
Isolated nodes: True
Loops: False
Is undirected: True


In [None]:
#Converts a torch_geometric.data.Data instance to a networkx.Graph
from torch_geometric.utils import to_networkx

G = to_networkx(data, to_undirected=True)
h=data.x, data.edge_index
#visualize(h, color=data.y)

#Function to calculate the average degree. Change the number of nodes
def average_degree(G):
  degree=nx.degree(G)
  sum=0
  for i in range(3327):
    sum=sum+degree[i]

  ad=sum/nx.number_of_nodes(G)
  return ad

#Function to calculate the shortest path. Change the number of nodes. 

def average_path(G):

  count=torch.empty(3328)

  for j in range(2):
    sh=nx.shortest_path_length(G,source=j)
    count0=0
    contador=0
    for i in sh:
      count0=count0+sh[i]
      contador=contador+1

    count[j]=count0/contador
    

  shortestpath=torch.mean(count)
  return shortestpath

shpath=average_path(G)

Dataset description: Citation network extracted from the CiteSeer digital library. Nodes are publications and the edges denote citations. https://networkrepository.com/citeseer.php

In [None]:
#Data characteristics
#networkx functions and algorithms: degree, shortest paths, clustering, centrality, ...
print(f'Is weighted: {nx.is_weighted(G)}')
print(f'Is directed: {nx.is_directed(G)}')
print(f'Number of nodes: {nx.number_of_nodes(G)}')
print(f'Nodes features: {data.num_node_features}')
print(f'Number of classes: {dataset.num_classes}')
print(f'Number of edges: {nx.number_of_edges(G)}')
print(f'Density(num of edges vs maximal num of edges): {nx.density(G)}')
print(f'Average degree: {average_degree(G)}')
print(f'Is connected(every pair of nodes is connected): {nx.is_connected(G)}')
print(f'Average clustering coefficient: {nx.average_clustering(G)}')
print(f'Average shortest path: {shpath}')

Is weighted: False
Is directed: False
Number of nodes: 3327
Nodes features: 3703
Number of classes: 6
Number of edges: 4552
Density(num of edges vs maximal num of edges): 0.0008227297529768376
Average degree: 2.7363991584009617
Is connected(every pair of nodes is connected): False
Average clustering coefficient: 0.14147102442629086
Average shortest path: 31.58768653869629


**Model with only neural networks**

In [22]:
import torch
from torch.nn import Linear
import torch.nn.functional as F


class MLP(torch.nn.Module):
    def __init__(self, hidden_channels):
        super(MLP, self).__init__()
        torch.manual_seed(12345)
        self.lin1 = Linear(dataset.num_features, hidden_channels)
        self.lin2 = Linear(hidden_channels, dataset.num_classes)

    def forward(self, x):
        x = self.lin1(x)
        x = x.relu()
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.lin2(x)
        return x

model = MLP(hidden_channels=16)
def model_summary(model):
    
    model_params_list = list(model.named_parameters())
    print("----------------------------------------------------------------")
    line_new = "{:>20}  {:>25} {:>15}".format("Layer.Parameter", "Param Tensor Shape", "Param #")
    print(line_new)
    print("----------------------------------------------------------------")
    for elem in model_params_list:
        p_name = elem[0] 
        p_shape = list(elem[1].size())
        p_count = torch.tensor(elem[1].size()).prod().item()
        line_new = "{:>20}  {:>25} {:>15}".format(p_name, str(p_shape), str(p_count))
        print(line_new)
    print("----------------------------------------------------------------")
    total_params = sum([param.nelement() for param in model.parameters()])
    print("Total params:", total_params)
    num_trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    print("Trainable params:", num_trainable_params)
    print("Non-trainable params:", total_params - num_trainable_params)

model_summary(model)

----------------------------------------------------------------
     Layer.Parameter         Param Tensor Shape         Param #
----------------------------------------------------------------
         lin1.weight                 [16, 3703]           59248
           lin1.bias                       [16]              16
         lin2.weight                    [6, 16]              96
           lin2.bias                        [6]               6
----------------------------------------------------------------
Total params: 59366
Trainable params: 59366
Non-trainable params: 0


In [None]:
#For training, the cross entropy loss combines LogSoftmax and NLLLoss in one single class
#input is expected to contain raw, unnormalized scores for each class
#target a class index in the range for each value of a 1D tensor of size minibatch
#For testing, compares the class with highest probability with the labels and counts the correct predictions
from IPython.display import Javascript  # Restrict height of output cell.
display(Javascript('''google.colab.output.setIframeHeight(0, true, {maxHeight: 300})'''))

model = MLP(hidden_channels=16)
criterion = torch.nn.CrossEntropyLoss()  # Define loss criterion.
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)  # Define optimizer.

def train():
      model.train()
      optimizer.zero_grad()  # Clear gradients.
      out = model(data.x)  # Perform a single forward pass.
      loss = criterion(out[data.train_mask], data.y[data.train_mask])  # Compute the loss solely based on the training nodes.
      loss.backward()  # Derive gradients.
      optimizer.step()  # Update parameters based on gradients.
      return loss

def test():
      model.eval()
      out = model(data.x)
      pred = out.argmax(dim=1)  # Use the class with highest probability.
      test_correct = pred[data.test_mask] == data.y[data.test_mask]  # Check against ground-truth labels.
      test_acc = int(test_correct.sum()) / int(data.test_mask.sum())  # Derive ratio of correct predictions.
      return test_acc

for epoch in range(1, 201):
    loss = train()
    print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}')

<IPython.core.display.Javascript object>

Epoch: 001, Loss: 1.8032
Epoch: 002, Loss: 1.7984
Epoch: 003, Loss: 1.7924
Epoch: 004, Loss: 1.7849
Epoch: 005, Loss: 1.7753
Epoch: 006, Loss: 1.7695
Epoch: 007, Loss: 1.7571
Epoch: 008, Loss: 1.7482
Epoch: 009, Loss: 1.7426
Epoch: 010, Loss: 1.7279
Epoch: 011, Loss: 1.7184
Epoch: 012, Loss: 1.6971
Epoch: 013, Loss: 1.6868
Epoch: 014, Loss: 1.6798
Epoch: 015, Loss: 1.6684
Epoch: 016, Loss: 1.6549
Epoch: 017, Loss: 1.6277
Epoch: 018, Loss: 1.6293
Epoch: 019, Loss: 1.6114
Epoch: 020, Loss: 1.5793
Epoch: 021, Loss: 1.5792
Epoch: 022, Loss: 1.5556
Epoch: 023, Loss: 1.5405
Epoch: 024, Loss: 1.5248
Epoch: 025, Loss: 1.4971
Epoch: 026, Loss: 1.4818
Epoch: 027, Loss: 1.4754
Epoch: 028, Loss: 1.4292
Epoch: 029, Loss: 1.4271
Epoch: 030, Loss: 1.4126
Epoch: 031, Loss: 1.3647
Epoch: 032, Loss: 1.3686
Epoch: 033, Loss: 1.3339
Epoch: 034, Loss: 1.3454
Epoch: 035, Loss: 1.2883
Epoch: 036, Loss: 1.3052
Epoch: 037, Loss: 1.2462
Epoch: 038, Loss: 1.2758
Epoch: 039, Loss: 1.1842
Epoch: 040, Loss: 1.1490


In [None]:
test_acc = test()
print(f'Test Accuracy: {test_acc:.4f}')

Test Accuracy: 0.5820


MLP performs with only about 58% test accuracy. This model suffers from heavy overfitting due to only a small amount of training nodes, and therefore generalizes poorly to unseen node representations.

It also fails to incorporate an important bias into the model: Cited papers are very likely related to the category of a document. 


**Model with neural networks, more layers and intermediate dimension**

In [None]:
import torch
from torch.nn import Linear
import torch.nn.functional as F


class MLP(torch.nn.Module):
    def __init__(self, hidden_channels1,hidden_channels2):
        super(MLP, self).__init__()
        torch.manual_seed(12345)
        self.lin1 = Linear(dataset.num_features, hidden_channels1)
        self.lin2 = Linear(hidden_channels1, hidden_channels2)
        self.lin3 = Linear(hidden_channels2, dataset.num_classes)

    def forward(self, x):
        x = self.lin1(x)
        x = x.relu()
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.lin2(x)
        x = x.relu()
        x = self.lin3(x)
        return x

model = MLP(hidden_channels1=512,hidden_channels2=256)

def model_summary(model):
    
    model_params_list = list(model.named_parameters())
    print("----------------------------------------------------------------")
    line_new = "{:>20}  {:>25} {:>15}".format("Layer.Parameter", "Param Tensor Shape", "Param #")
    print(line_new)
    print("----------------------------------------------------------------")
    for elem in model_params_list:
        p_name = elem[0] 
        p_shape = list(elem[1].size())
        p_count = torch.tensor(elem[1].size()).prod().item()
        line_new = "{:>20}  {:>25} {:>15}".format(p_name, str(p_shape), str(p_count))
        print(line_new)
    print("----------------------------------------------------------------")
    total_params = sum([param.nelement() for param in model.parameters()])
    print("Total params:", total_params)
    num_trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    print("Trainable params:", num_trainable_params)
    print("Non-trainable params:", total_params - num_trainable_params)

model_summary(model)

----------------------------------------------------------------
     Layer.Parameter         Param Tensor Shape         Param #
----------------------------------------------------------------
         lin1.weight                [512, 3703]         1895936
           lin1.bias                      [512]             512
         lin2.weight                 [256, 512]          131072
           lin2.bias                      [256]             256
         lin3.weight                   [6, 256]            1536
           lin3.bias                        [6]               6
----------------------------------------------------------------
Total params: 2029318
Trainable params: 2029318
Non-trainable params: 0


In [None]:
#For training, the cross entropy loss combines LogSoftmax and NLLLoss in one single class
#input is expected to contain raw, unnormalized scores for each class
#target a class index in the range for each value of a 1D tensor of size minibatch
#For testing, compares the class with highest probability with the labels and counts the correct predictions
from IPython.display import Javascript  # Restrict height of output cell.
display(Javascript('''google.colab.output.setIframeHeight(0, true, {maxHeight: 300})'''))

model = MLP(hidden_channels1=512,hidden_channels2=256)
criterion = torch.nn.CrossEntropyLoss()  # Define loss criterion.
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)  # Define optimizer.

def train():
      model.train()
      optimizer.zero_grad()  # Clear gradients.
      out = model(data.x)  # Perform a single forward pass.
      loss = criterion(out[data.val_mask], data.y[data.val_mask])  # Compute the loss solely based on the training nodes.
      loss.backward()  # Derive gradients.
      optimizer.step()  # Update parameters based on gradients.
      return loss

def test():
      model.eval()
      out = model(data.x)
      pred = out.argmax(dim=1)  # Use the class with highest probability.
      test_correct = pred[data.test_mask] == data.y[data.test_mask]  # Check against ground-truth labels.
      test_acc = int(test_correct.sum()) / int(data.test_mask.sum())  # Derive ratio of correct predictions.
      return test_acc

for epoch in range(1, 201):
    loss = train()
    print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}')

<IPython.core.display.Javascript object>

Epoch: 001, Loss: 1.7972
Epoch: 002, Loss: 1.7675
Epoch: 003, Loss: 1.7334
Epoch: 004, Loss: 1.7187
Epoch: 005, Loss: 1.6949
Epoch: 006, Loss: 1.6758
Epoch: 007, Loss: 1.6509
Epoch: 008, Loss: 1.6078
Epoch: 009, Loss: 1.5508
Epoch: 010, Loss: 1.4814
Epoch: 011, Loss: 1.3886
Epoch: 012, Loss: 1.2802
Epoch: 013, Loss: 1.1697
Epoch: 014, Loss: 1.0366
Epoch: 015, Loss: 0.8982
Epoch: 016, Loss: 0.7681
Epoch: 017, Loss: 0.6300
Epoch: 018, Loss: 0.5032
Epoch: 019, Loss: 0.4057
Epoch: 020, Loss: 0.3202
Epoch: 021, Loss: 0.2661
Epoch: 022, Loss: 0.1994
Epoch: 023, Loss: 0.1682
Epoch: 024, Loss: 0.1379
Epoch: 025, Loss: 0.1252
Epoch: 026, Loss: 0.0940
Epoch: 027, Loss: 0.0890
Epoch: 028, Loss: 0.0736
Epoch: 029, Loss: 0.0732
Epoch: 030, Loss: 0.0657
Epoch: 031, Loss: 0.0620
Epoch: 032, Loss: 0.0612
Epoch: 033, Loss: 0.0693
Epoch: 034, Loss: 0.0694
Epoch: 035, Loss: 0.0590
Epoch: 036, Loss: 0.0595
Epoch: 037, Loss: 0.0656
Epoch: 038, Loss: 0.0536
Epoch: 039, Loss: 0.0525
Epoch: 040, Loss: 0.0462


In [None]:
test_acc = test()
print(f'Test Accuracy: {test_acc:.4f}')

Test Accuracy: 0.6770


With a MLP, with two intermediate layers, intermediate dimensions 512 and 256, 2,029,318 of parameters and training with the training mask, we get test accuracy of 59%, slightly above the 58% of the previous configuration. Probably, the model is overfitting the training data. Using the validation mask to train the model, the loss decrease more slowly and gets 68% of test accuracy.

**Model with GCN**

In [None]:
from torch_geometric.nn import GCNConv


class GCN(torch.nn.Module):
    def __init__(self, hidden_channels):
        super(GCN, self).__init__()
        torch.manual_seed(1234567)
        self.conv1 = GCNConv(dataset.num_features, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, dataset.num_classes)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = x.relu()
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.conv2(x, edge_index)
        return x

model = GCN(hidden_channels=16)

def model_summary(model):
    
    model_params_list = list(model.named_parameters())
    print("----------------------------------------------------------------")
    line_new = "{:>20}  {:>25} {:>15}".format("Layer.Parameter", "Param Tensor Shape", "Param #")
    print(line_new)
    print("----------------------------------------------------------------")
    for elem in model_params_list:
        p_name = elem[0] 
        p_shape = list(elem[1].size())
        p_count = torch.tensor(elem[1].size()).prod().item()
        line_new = "{:>20}  {:>25} {:>15}".format(p_name, str(p_shape), str(p_count))
        print(line_new)
    print("----------------------------------------------------------------")
    total_params = sum([param.nelement() for param in model.parameters()])
    print("Total params:", total_params)
    num_trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    print("Trainable params:", num_trainable_params)
    print("Non-trainable params:", total_params - num_trainable_params)

model_summary(model)

----------------------------------------------------------------
     Layer.Parameter         Param Tensor Shape         Param #
----------------------------------------------------------------
          conv1.bias                       [16]              16
    conv1.lin.weight                 [16, 3703]           59248
          conv2.bias                        [6]               6
    conv2.lin.weight                    [6, 16]              96
----------------------------------------------------------------
Total params: 59366
Trainable params: 59366
Non-trainable params: 0


In [None]:
from IPython.display import Javascript  # Restrict height of output cell.
display(Javascript('''google.colab.output.setIframeHeight(0, true, {maxHeight: 300})'''))

model = GCN(hidden_channels=16)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
criterion = torch.nn.CrossEntropyLoss()

def train():
      model.train()
      optimizer.zero_grad()  # Clear gradients.
      out = model(data.x, data.edge_index)  # Perform a single forward pass.
      loss = criterion(out[data.val_mask], data.y[data.val_mask])  # Compute the loss solely based on the training nodes.
      loss.backward()  # Derive gradients.
      optimizer.step()  # Update parameters based on gradients.
      return loss

def test():
      model.eval()
      out = model(data.x, data.edge_index)
      pred = out.argmax(dim=1)  # Use the class with highest probability.
      test_correct = pred[data.test_mask] == data.y[data.test_mask]  # Check against ground-truth labels.
      test_acc = int(test_correct.sum()) / int(data.test_mask.sum())  # Derive ratio of correct predictions.
      return test_acc


for epoch in range(1, 201):
    loss = train()
    print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}')

<IPython.core.display.Javascript object>

Epoch: 001, Loss: 1.7916
Epoch: 002, Loss: 1.7839
Epoch: 003, Loss: 1.7756
Epoch: 004, Loss: 1.7676
Epoch: 005, Loss: 1.7593
Epoch: 006, Loss: 1.7479
Epoch: 007, Loss: 1.7415
Epoch: 008, Loss: 1.7316
Epoch: 009, Loss: 1.7228
Epoch: 010, Loss: 1.7151
Epoch: 011, Loss: 1.7045
Epoch: 012, Loss: 1.6994
Epoch: 013, Loss: 1.6930
Epoch: 014, Loss: 1.6847
Epoch: 015, Loss: 1.6747
Epoch: 016, Loss: 1.6705
Epoch: 017, Loss: 1.6618
Epoch: 018, Loss: 1.6522
Epoch: 019, Loss: 1.6474
Epoch: 020, Loss: 1.6411
Epoch: 021, Loss: 1.6394
Epoch: 022, Loss: 1.6350
Epoch: 023, Loss: 1.6225
Epoch: 024, Loss: 1.6085
Epoch: 025, Loss: 1.6051
Epoch: 026, Loss: 1.6024
Epoch: 027, Loss: 1.5851
Epoch: 028, Loss: 1.5862
Epoch: 029, Loss: 1.5702
Epoch: 030, Loss: 1.5571
Epoch: 031, Loss: 1.5520
Epoch: 032, Loss: 1.5399
Epoch: 033, Loss: 1.5422
Epoch: 034, Loss: 1.5373
Epoch: 035, Loss: 1.5141
Epoch: 036, Loss: 1.5011
Epoch: 037, Loss: 1.4898
Epoch: 038, Loss: 1.4844
Epoch: 039, Loss: 1.4731
Epoch: 040, Loss: 1.4689


In [None]:
test_acc = test()
print(f'Test Accuracy: {test_acc:.4f}')

Test Accuracy: 0.7620


Using a GCN with a GCN layer and 59,366 parameters, we get a test accuracy of 71.4% (the training loss is 0.44). If we use the validation mask to train, we get a test accuracy of 76.2% (the training loss is 0.62).

**Model with GCN with more layers and more feature dimensions**

In [None]:
import torch
from torch.nn import Linear
import torch.nn.functional as F
from torch_geometric.nn import GCNConv


class GCN(torch.nn.Module):
    def __init__(self, hidden_channels1,hidden_channels2):
        super(GCN, self).__init__()
        torch.manual_seed(1234567)
        self.conv1 = GCNConv(dataset.num_features, hidden_channels1)
        self.conv2 = GCNConv(hidden_channels1, hidden_channels2)
        self.conv3 = GCNConv(hidden_channels2, dataset.num_classes)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = x.relu()
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.conv2(x, edge_index)
        x = x.relu()
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.conv3(x, edge_index)
        return x

model = GCN(hidden_channels1=256,hidden_channels2=256)

def model_summary(model):
    
    model_params_list = list(model.named_parameters())
    print("----------------------------------------------------------------")
    line_new = "{:>20}  {:>25} {:>15}".format("Layer.Parameter", "Param Tensor Shape", "Param #")
    print(line_new)
    print("----------------------------------------------------------------")
    for elem in model_params_list:
        p_name = elem[0] 
        p_shape = list(elem[1].size())
        p_count = torch.tensor(elem[1].size()).prod().item()
        line_new = "{:>20}  {:>25} {:>15}".format(p_name, str(p_shape), str(p_count))
        print(line_new)
    print("----------------------------------------------------------------")
    total_params = sum([param.nelement() for param in model.parameters()])
    print("Total params:", total_params)
    num_trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    print("Trainable params:", num_trainable_params)
    print("Non-trainable params:", total_params - num_trainable_params)

model_summary(model)

----------------------------------------------------------------
     Layer.Parameter         Param Tensor Shape         Param #
----------------------------------------------------------------
          conv1.bias                      [256]             256
    conv1.lin.weight                [256, 3703]          947968
          conv2.bias                      [256]             256
    conv2.lin.weight                 [256, 256]           65536
          conv3.bias                        [6]               6
    conv3.lin.weight                   [6, 256]            1536
----------------------------------------------------------------
Total params: 1015558
Trainable params: 1015558
Non-trainable params: 0


In [None]:
from IPython.display import Javascript  # Restrict height of output cell.
display(Javascript('''google.colab.output.setIframeHeight(0, true, {maxHeight: 300})'''))

model = GCN(hidden_channels1=256,hidden_channels2=256)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
criterion = torch.nn.CrossEntropyLoss()

def train():
      model.train()
      optimizer.zero_grad()  # Clear gradients.
      out = model(data.x, data.edge_index)  # Perform a single forward pass.
      loss = criterion(out[data.train_mask], data.y[data.train_mask])  # Compute the loss solely based on the training nodes.
      loss.backward()  # Derive gradients.
      optimizer.step()  # Update parameters based on gradients.
      return loss

def test():
      model.eval()
      out = model(data.x, data.edge_index)
      pred = out.argmax(dim=1)  # Use the class with highest probability.
      test_correct = pred[data.test_mask] == data.y[data.test_mask]  # Check against ground-truth labels.
      test_acc = int(test_correct.sum()) / int(data.test_mask.sum())  # Derive ratio of correct predictions.
      return test_acc


for epoch in range(1, 201):
    loss = train()
    print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}')

<IPython.core.display.Javascript object>

Epoch: 001, Loss: 1.7921
Epoch: 002, Loss: 1.7875
Epoch: 003, Loss: 1.7766
Epoch: 004, Loss: 1.7621
Epoch: 005, Loss: 1.7386
Epoch: 006, Loss: 1.6855
Epoch: 007, Loss: 1.6204
Epoch: 008, Loss: 1.5380
Epoch: 009, Loss: 1.4197
Epoch: 010, Loss: 1.2689
Epoch: 011, Loss: 1.1122
Epoch: 012, Loss: 0.9326
Epoch: 013, Loss: 0.7945
Epoch: 014, Loss: 0.6366
Epoch: 015, Loss: 0.5016
Epoch: 016, Loss: 0.4165
Epoch: 017, Loss: 0.3318
Epoch: 018, Loss: 0.2676
Epoch: 019, Loss: 0.2258
Epoch: 020, Loss: 0.2002
Epoch: 021, Loss: 0.1869
Epoch: 022, Loss: 0.1450
Epoch: 023, Loss: 0.1290
Epoch: 024, Loss: 0.1234
Epoch: 025, Loss: 0.1273
Epoch: 026, Loss: 0.1103
Epoch: 027, Loss: 0.0990
Epoch: 028, Loss: 0.0921
Epoch: 029, Loss: 0.1393
Epoch: 030, Loss: 0.0999
Epoch: 031, Loss: 0.1175
Epoch: 032, Loss: 0.0982
Epoch: 033, Loss: 0.1006
Epoch: 034, Loss: 0.0959
Epoch: 035, Loss: 0.0896
Epoch: 036, Loss: 0.0968
Epoch: 037, Loss: 0.0810
Epoch: 038, Loss: 0.0743
Epoch: 039, Loss: 0.0799
Epoch: 040, Loss: 0.0600


In [None]:
test_acc = test()
print(f'Test Accuracy: {test_acc:.4f}')

Test Accuracy: 0.7330


Using a GNN with two intermediate layer we see that while increasing the feature dimensions the test accuracy decreases to 62%, probably because the model is overfitting the training data (the training loss is 0.04 aprox). If we lower the feature dimensions to 16, then the test accuracy returns to 67.4%. If we use the validation mask to train the model, we get a test accuracy of 71.5% or 73.3% with feature dimensions of 256 (the training loss is 0.07).