In [1]:
# Install required packages.
import os
import torch
os.environ['TORCH'] = torch.__version__
print(torch.__version__)

!pip install -q torch-scatter -f https://data.pyg.org/whl/torch-${TORCH}.html
!pip install -q torch-sparse -f https://data.pyg.org/whl/torch-${TORCH}.html
!pip install -q git+https://github.com/pyg-team/pytorch_geometric.git

# Helper function for visualization.
%matplotlib inline
import matplotlib.pyplot as plt

1.12.1+cu113
[K     |████████████████████████████████| 7.9 MB 35.4 MB/s 
[K     |████████████████████████████████| 3.5 MB 11.8 MB/s 
[?25h  Building wheel for torch-geometric (setup.py) ... [?25l[?25hdone


In [2]:
import numpy as np
import random

In [3]:
def set_seed(seed: int = 42) -> None:
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    # When running on the CuDNN backend, two further options must be set
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    # Set a fixed value for the hash seed
    os.environ["PYTHONHASHSEED"] = str(seed)
    print(f"Random seed set as {seed}")


In [4]:
from torch_geometric.datasets import Planetoid
from torch_geometric.transforms import NormalizeFeatures

dataset = Planetoid(root='data/Planetoid', name='Cora', transform=NormalizeFeatures())

print()
print(f'Dataset: {dataset}:')
print('======================')
print(f'Number of graphs: {len(dataset)}')
print(f'Number of features: {dataset.num_features}')
print(f'Number of classes: {dataset.num_classes}')

data = dataset[0]  # Get the first graph object.

print()
print(data)
print('===========================================================================================================')

# Gather some statistics about the graph.
print(f'Number of nodes: {data.num_nodes}')
print(f'Number of edges: {data.num_edges}')
print(f'Average node degree: {data.num_edges / data.num_nodes:.2f}')
print(f'Number of training nodes: {data.train_mask.sum()}')
print(f'Training node label rate: {int(data.train_mask.sum()) / data.num_nodes:.2f}')
print(f'Has isolated nodes: {data.has_isolated_nodes()}')
print(f'Has self-loops: {data.has_self_loops()}')
print(f'Is undirected: {data.is_undirected()}')

Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.x
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.tx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.allx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.y
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ty
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ally
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.graph
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.test.index
Processing...
Done!



Dataset: Cora():
Number of graphs: 1
Number of features: 1433
Number of classes: 7

Data(x=[2708, 1433], edge_index=[2, 10556], y=[2708], train_mask=[2708], val_mask=[2708], test_mask=[2708])
Number of nodes: 2708
Number of edges: 10556
Average node degree: 3.90
Number of training nodes: 140
Training node label rate: 0.05
Has isolated nodes: False
Has self-loops: False
Is undirected: True


In [5]:
from torch_geometric.nn import GATConv
import torch.nn.functional as F

class GAT(torch.nn.Module):
    def __init__(self, hidden_channels=8,p1=0.6,p2=0.6,activation='relu',h1=8,h2=1):
        super().__init__()
        self.p1=p1
        self.p2=p2
        self.conv1 = GATConv(dataset.num_features, hidden_channels,num_heads=h1,dropout=0.6)
        self.conv2 = GATConv(hidden_channels, dataset.num_classes,num_heads=h2)
        self.activation=activation

    def forward(self, x, edge_index):
        x = F.dropout(x, p=self.p1, training=self.training)
        x = self.conv1(x, edge_index)
        if self.activation=='relu':
          x = x.relu()
        else:
          x=F.elu(x)
        x = F.dropout(x, p=self.p2, training=self.training)
        x = self.conv2(x, edge_index)
        return x

In [6]:
def train(model,optimizer,criterion):
      model.train()
      optimizer.zero_grad()  # Clear gradients.
      out = model(data.x, data.edge_index)  # Perform a single forward pass.
      loss = criterion(out[data.train_mask], data.y[data.train_mask])  # Compute the loss solely based on the training nodes.
      loss.backward()  # Derive gradients.
      optimizer.step()  # Update parameters based on gradients.
      return loss

def validate(model,criterion):
      model.eval()
      out = model(data.x, data.edge_index)
      pred = out.argmax(dim=1)  # Use the class with highest probability.
      loss=criterion(out[data.val_mask], data.y[data.val_mask])
      return loss.item()

def test(model,criterion):
      out = model(data.x, data.edge_index)
      pred = out.argmax(dim=1)  # Use the class with highest probability.
      test_correct = pred[data.test_mask] == data.y[data.test_mask]  # Check against ground-truth labels.
      test_acc = int(test_correct.sum()) / int(data.test_mask.sum())
      return test_acc

In [7]:
import time

In [8]:
test_acc=[]
train_time=[]
for i in [1,12, 123, 1234, 12345, 123456, 1234567, 12345678, 123456789, 1234567890]:
  set_seed(i)
  start=time.process_time()
  model = GAT(hidden_channels=8,p1=0.6,p2=0.6,activation='elu')
  optimizer = torch.optim.Adam(model.parameters(), lr=0.05, weight_decay=5e-4)
  criterion = torch.nn.CrossEntropyLoss()
  train_loss = train(model,optimizer,criterion)
  best_loss=validate(model,criterion)
  torch.save(model, 'best-model.pt') 
  for epoch in range(1,100):
      train_loss = train(model,optimizer,criterion)
      validate_loss=validate(model,criterion)
      if abs(validate_loss)<abs(best_loss):
          torch.save(model, 'best-model.pt') 
  train_time.append(time.process_time()-start)
  model = torch.load('best-model.pt')
  model.eval()
  print(test(model,criterion))
  test_acc.append(test(model,criterion))

Random seed set as 1
0.828
Random seed set as 12
0.798
Random seed set as 123
0.727
Random seed set as 1234
0.827
Random seed set as 12345
0.827
Random seed set as 123456
0.814
Random seed set as 1234567
0.787
Random seed set as 12345678
0.797
Random seed set as 123456789
0.834
Random seed set as 1234567890
0.799


In [9]:
print("average acuracy: ", sum(test_acc)/len(test_acc))

average acuracy:  0.8037999999999998


In [10]:
print("train time: ", train_time)
print("average train time: ",sum(train_time)/len(train_time))

train time:  [9.936439482, 9.903210871000002, 9.923749109000003, 9.901288727999997, 9.963781994000001, 10.417248029, 9.924607131999991, 9.918483828000006, 9.888212772000003, 9.896193561000004]
average train time:  9.967321550600001
