In [1]:
#Check the PyTorch and Cuda version
!python -c "import torch; print(torch.__version__)"
!python -c "import torch; print(torch.version.cuda)"

1.10.0+cu111
11.1


In [None]:
!pip install -q torch-scatter -f https://data.pyg.org/whl/torch-1.10.0+cu111.html
!pip install -q torch-sparse -f https://data.pyg.org/whl/torch-1.10.0+cu111.html
!pip install -q git+https://github.com/pyg-team/pytorch_geometric.git
!pip install torch-cluster -f https://data.pyg.org/whl/torch-1.10.0+cu111.html
!pip install torch-spline-conv -f https://pytorch-geometric.com/whl/torch-1.10.0+cu111.html

In [3]:
#First data characteristics
from torch_geometric.datasets import Planetoid
from torch_geometric.transforms import NormalizeFeatures

dataset = Planetoid(root='/tmp/CiteSeer', name='CiteSeer',transform=NormalizeFeatures())

data = dataset[0]
print(data)

print(f'Number of nodes: {data.num_nodes}')
print(f'Nodes features: {data.num_node_features}')
print(f'Number of classes: {dataset.num_classes}')
print(f'Number of edges: {data.num_edges}')
print(f'Avarage degree: {data.num_edges / data.num_nodes:.2f}')
print(f'Training nodes: {data.train_mask.sum()}')
print(f'Validation nodes: {data.val_mask.sum()}')
print(f'Test nodes: {data.test_mask.sum()}')
print(f'Isolated nodes: {data.has_isolated_nodes()}')
print(f'Loops: {data.has_self_loops()}')
print(f'Is undirected: {data.is_undirected()}')

Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.x
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.tx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.allx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.y
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.ty
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.ally
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.graph
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.test.index
Processing...


Data(x=[3327, 3703], edge_index=[2, 9104], y=[3327], train_mask=[3327], val_mask=[3327], test_mask=[3327])
Number of nodes: 3327
Nodes features: 3703
Number of classes: 6
Number of edges: 9104
Avarage degree: 2.74
Training nodes: 120
Validation nodes: 500
Test nodes: 1000
Isolated nodes: True
Loops: False
Is undirected: True


Done!


In [4]:
#We create the same number of edges but in a random way between the [0..3326] nodes
#Citeseer network is undirected, then the edges appear twice (i,j) (j,i) in the original edge matrix
#In this random edge matrix, the edges are directed
import torch

edge_index1=torch.randint(0,3327,(2,9104))
print(edge_index1)
edge_index1[:,1]

tensor([[1176, 3153, 1276,  ..., 2158, 2853,  444],
        [   7,  366, 1837,  ..., 1510, 2961,  428]])


tensor([3153,  366])

In [5]:
#Checkin boolean tensors to select tensor items
check=torch.rand(6)
print(check)

true=torch.tensor([True, False, True, False, True, False])
check[true]

tensor([0.5217, 0.6586, 0.2691, 0.7446, 0.2061, 0.1023])


tensor([0.5217, 0.2691, 0.2061])

**Model with GCN modifying the edges information (random edges)**

In [6]:
import torch
from torch.nn import Linear
import torch.nn.functional as F
from torch_geometric.nn import GCNConv



class GCN(torch.nn.Module):
    def __init__(self, hidden_channels):
        super(GCN, self).__init__()
        torch.manual_seed(1234567)
        self.conv1 = GCNConv(dataset.num_features, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, dataset.num_classes)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = x.relu()
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.conv2(x, edge_index)
        return x

model = GCN(hidden_channels=16)

def model_summary(model):
    
    model_params_list = list(model.named_parameters())
    print("----------------------------------------------------------------")
    line_new = "{:>20}  {:>25} {:>15}".format("Layer.Parameter", "Param Tensor Shape", "Param #")
    print(line_new)
    print("----------------------------------------------------------------")
    for elem in model_params_list:
        p_name = elem[0] 
        p_shape = list(elem[1].size())
        p_count = torch.tensor(elem[1].size()).prod().item()
        line_new = "{:>20}  {:>25} {:>15}".format(p_name, str(p_shape), str(p_count))
        print(line_new)
    print("----------------------------------------------------------------")
    total_params = sum([param.nelement() for param in model.parameters()])
    print("Total params:", total_params)
    num_trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    print("Trainable params:", num_trainable_params)
    print("Non-trainable params:", total_params - num_trainable_params)

model_summary(model)

----------------------------------------------------------------
     Layer.Parameter         Param Tensor Shape         Param #
----------------------------------------------------------------
          conv1.bias                       [16]              16
    conv1.lin.weight                 [16, 3703]           59248
          conv2.bias                        [6]               6
    conv2.lin.weight                    [6, 16]              96
----------------------------------------------------------------
Total params: 59366
Trainable params: 59366
Non-trainable params: 0


In [7]:
#Passing edge_index1 instead data.edge_index as the edge information
from IPython.display import Javascript  # Restrict height of output cell.
display(Javascript('''google.colab.output.setIframeHeight(0, true, {maxHeight: 300})'''))

model = GCN(hidden_channels=16)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
criterion = torch.nn.CrossEntropyLoss()

def train():
      model.train()
      optimizer.zero_grad()  # Clear gradients.
      out = model(data.x, edge_index1)  # Perform a single forward pass.
      loss = criterion(out[data.train_mask], data.y[data.train_mask])  # Compute the loss solely based on the training nodes.
      loss.backward()  # Derive gradients.
      optimizer.step()  # Update parameters based on gradients.
      return loss

def test():
      model.eval()
      out = model(data.x, edge_index1)
      pred = out.argmax(dim=1)  # Use the class with highest probability.
      test_correct = pred[data.test_mask] == data.y[data.test_mask]  # Check against ground-truth labels.
      test_acc = int(test_correct.sum()) / int(data.test_mask.sum())  # Derive ratio of correct predictions.
      return test_acc


for epoch in range(1, 201):
    loss = train()
    print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}')

<IPython.core.display.Javascript object>

Epoch: 001, Loss: 1.7914
Epoch: 002, Loss: 1.7884
Epoch: 003, Loss: 1.7837
Epoch: 004, Loss: 1.7783
Epoch: 005, Loss: 1.7764
Epoch: 006, Loss: 1.7673
Epoch: 007, Loss: 1.7617
Epoch: 008, Loss: 1.7516
Epoch: 009, Loss: 1.7522
Epoch: 010, Loss: 1.7461
Epoch: 011, Loss: 1.7350
Epoch: 012, Loss: 1.7354
Epoch: 013, Loss: 1.7192
Epoch: 014, Loss: 1.7225
Epoch: 015, Loss: 1.7126
Epoch: 016, Loss: 1.7002
Epoch: 017, Loss: 1.6942
Epoch: 018, Loss: 1.6840
Epoch: 019, Loss: 1.6893
Epoch: 020, Loss: 1.6766
Epoch: 021, Loss: 1.6592
Epoch: 022, Loss: 1.6697
Epoch: 023, Loss: 1.6567
Epoch: 024, Loss: 1.6493
Epoch: 025, Loss: 1.6343
Epoch: 026, Loss: 1.6184
Epoch: 027, Loss: 1.6189
Epoch: 028, Loss: 1.6067
Epoch: 029, Loss: 1.6144
Epoch: 030, Loss: 1.5952
Epoch: 031, Loss: 1.5788
Epoch: 032, Loss: 1.5850
Epoch: 033, Loss: 1.5873
Epoch: 034, Loss: 1.5728
Epoch: 035, Loss: 1.5586
Epoch: 036, Loss: 1.5412
Epoch: 037, Loss: 1.5307
Epoch: 038, Loss: 1.5492
Epoch: 039, Loss: 1.4830
Epoch: 040, Loss: 1.5279


In [8]:
test_acc = test()
print(f'Test Accuracy: {test_acc:.4f}')

Test Accuracy: 0.1720


Using the training mask with a GCN with two graph convolutional layers and random edges between the nodes, the test accuracy is 16.5% versus the 71.4% obtained with the correct edges.

**Model with GCN modifying the edges information (edge pruning)**

In [9]:
#Apply torch.idex_select to the original edge_index matrix to remove certain links. 
#Citeseer network is undirected, then the edges appear twice (i,j) (j,i) in the original edge matrix
#In this random edge matrix, the edges that are removed are directed

import torch

indices = torch.tensor([1])
for i in range(2,9104):

  a=torch.tensor([i])
  if i % 5 == 0: #Change the equal
     indices=torch.cat((indices,a),0)


edge_index2=torch.index_select(data.edge_index, 1, indices)
edge_index2.shape

torch.Size([2, 1821])

In [10]:
import torch
from torch.nn import Linear
import torch.nn.functional as F
from torch_geometric.nn import GCNConv



class GCN(torch.nn.Module):
    def __init__(self, hidden_channels):
        super(GCN, self).__init__()
        torch.manual_seed(1234567)
        self.conv1 = GCNConv(dataset.num_features, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, dataset.num_classes)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = x.relu()
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.conv2(x, edge_index)
        return x

model = GCN(hidden_channels=16)

def model_summary(model):
    
    model_params_list = list(model.named_parameters())
    print("----------------------------------------------------------------")
    line_new = "{:>20}  {:>25} {:>15}".format("Layer.Parameter", "Param Tensor Shape", "Param #")
    print(line_new)
    print("----------------------------------------------------------------")
    for elem in model_params_list:
        p_name = elem[0] 
        p_shape = list(elem[1].size())
        p_count = torch.tensor(elem[1].size()).prod().item()
        line_new = "{:>20}  {:>25} {:>15}".format(p_name, str(p_shape), str(p_count))
        print(line_new)
    print("----------------------------------------------------------------")
    total_params = sum([param.nelement() for param in model.parameters()])
    print("Total params:", total_params)
    num_trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    print("Trainable params:", num_trainable_params)
    print("Non-trainable params:", total_params - num_trainable_params)

model_summary(model)

----------------------------------------------------------------
     Layer.Parameter         Param Tensor Shape         Param #
----------------------------------------------------------------
          conv1.bias                       [16]              16
    conv1.lin.weight                 [16, 3703]           59248
          conv2.bias                        [6]               6
    conv2.lin.weight                    [6, 16]              96
----------------------------------------------------------------
Total params: 59366
Trainable params: 59366
Non-trainable params: 0


In [11]:
#Passing edge_index2 instead data.edge_index as the edge information
from IPython.display import Javascript  # Restrict height of output cell.
display(Javascript('''google.colab.output.setIframeHeight(0, true, {maxHeight: 300})'''))

model = GCN(hidden_channels=16)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
criterion = torch.nn.CrossEntropyLoss()

def train():
      model.train()
      optimizer.zero_grad()  # Clear gradients.
      out = model(data.x, edge_index2)  # Perform a single forward pass.
      loss = criterion(out[data.train_mask], data.y[data.train_mask])  # Compute the loss solely based on the training nodes.
      loss.backward()  # Derive gradients.
      optimizer.step()  # Update parameters based on gradients.
      return loss

def test():
      model.eval()
      out = model(data.x, edge_index2)
      pred = out.argmax(dim=1)  # Use the class with highest probability.
      test_correct = pred[data.test_mask] == data.y[data.test_mask]  # Check against ground-truth labels.
      test_acc = int(test_correct.sum()) / int(data.test_mask.sum())  # Derive ratio of correct predictions.
      return test_acc


for epoch in range(1, 201):
    loss = train()
    print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}')

<IPython.core.display.Javascript object>

Epoch: 001, Loss: 1.7915
Epoch: 002, Loss: 1.7853
Epoch: 003, Loss: 1.7777
Epoch: 004, Loss: 1.7656
Epoch: 005, Loss: 1.7606
Epoch: 006, Loss: 1.7410
Epoch: 007, Loss: 1.7268
Epoch: 008, Loss: 1.7167
Epoch: 009, Loss: 1.7015
Epoch: 010, Loss: 1.6918
Epoch: 011, Loss: 1.6771
Epoch: 012, Loss: 1.6528
Epoch: 013, Loss: 1.6261
Epoch: 014, Loss: 1.6118
Epoch: 015, Loss: 1.5992
Epoch: 016, Loss: 1.5798
Epoch: 017, Loss: 1.5503
Epoch: 018, Loss: 1.5218
Epoch: 019, Loss: 1.5065
Epoch: 020, Loss: 1.4946
Epoch: 021, Loss: 1.4532
Epoch: 022, Loss: 1.4516
Epoch: 023, Loss: 1.4396
Epoch: 024, Loss: 1.4068
Epoch: 025, Loss: 1.3721
Epoch: 026, Loss: 1.3558
Epoch: 027, Loss: 1.3307
Epoch: 028, Loss: 1.3468
Epoch: 029, Loss: 1.3008
Epoch: 030, Loss: 1.2704
Epoch: 031, Loss: 1.2093
Epoch: 032, Loss: 1.2031
Epoch: 033, Loss: 1.1897
Epoch: 034, Loss: 1.1788
Epoch: 035, Loss: 1.1419
Epoch: 036, Loss: 1.0673
Epoch: 037, Loss: 1.1066
Epoch: 038, Loss: 1.0952
Epoch: 039, Loss: 1.0542
Epoch: 040, Loss: 1.0819


In [12]:
test_acc = test()
print(f'Test Accuracy: {test_acc:.4f}')

Test Accuracy: 0.5920


Using the training mask with an GCN intermediate layer and removing a % of the edges gives a test accuracy (20% removed, 67.7%)(25% removed, 68.7%)(33% removed, 67.1%)(50% removed, 68.1 %)(66% removed, 62%)(75% removed, 61.1%)(80% removed, 59.2%) versus the 71.4% obtained with the correct edges and the 58% of the one intermediate layer neural network.