In [None]:
# Install torch geometric
#This cell will take almost 10-15 mins
!pip install -q torch-scatter -f https://pytorch-geometric.com/whl/torch-1.7.0+cu101.html
!pip install -q torch-sparse -f https://pytorch-geometric.com/whl/torch-1.7.0+cu101.html
!pip install -q torch-geometric

In [12]:
from torch_geometric.datasets import Planetoid
from torch_geometric.transforms import NormalizeFeatures

dataset = Planetoid(root='data/Planetoid', name='Cora', transform=NormalizeFeatures())

print()
print(f'Dataset: {dataset}:')
print('======================')
print(f'Number of graphs: {len(dataset)}')
print(f'Number of features: {dataset.num_features}')

print(f'Number of classes: {dataset.num_classes}')

data = dataset[0]  # Get the first graph object.

print()
print(data)
print('===========================================================================================================')

# Gather some statistics about the graph.
print(f'Number of nodes: {data.x.dtype}')
print(f'Number of edges: {data.num_edges}')
print(f'Average node degree: {data.num_edges / data.num_nodes:.2f}')
print(f'Number of training nodes: {data.train_mask.sum()}')
print(f'Training node label rate: {int(data.train_mask.sum()) / data.num_nodes:.2f}')
print(f'Has isolated nodes: {data.has_isolated_nodes()}')
print(f'Has self-loops: {data.has_self_loops()}')
print(f'Is undirected: {data.is_undirected()}')


Dataset: Cora():
Number of graphs: 1
Number of features: 1433
Number of classes: 7

Data(x=[2708, 1433], edge_index=[2, 10556], y=[2708], train_mask=[2708], val_mask=[2708], test_mask=[2708])
Number of nodes: torch.float32
Number of edges: 10556
Average node degree: 3.90
Number of training nodes: 140
Training node label rate: 0.05
Has isolated nodes: False
Has self-loops: False
Is undirected: True


In [9]:
import torch
from torch.nn import Linear
from torch_geometric.nn import GCNConv
import torch.nn.functional as F

class GCN(torch.nn.Module):
  def __init__(self, hidden_channels):
    super().__init__()
    torch.manual_seed(1234567)
    self.conv1 = GCNConv(dataset.num_features, hidden_channels)
    self.conv2 = GCNConv(hidden_channels, dataset.num_classes)
    

  def forward(self, x, edge_index):
    x = self.conv1(x, edge_index)
    x = x.relu()
    x = F.dropout(x, p=0.5, training=self.training)
    x = self.conv2(x, edge_index)
    
    return x

model = GCN(hidden_channels=128)
print(model)

GCN(
  (conv1): GCNConv(1433, 128)
  (conv2): GCNConv(128, 7)
)


In [10]:
criterion = torch.nn.CrossEntropyLoss()  # Define loss criterion.
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)  # Define optimizer.
model.train()
for epoch in range(50):
  out = model(data.x, data.edge_index)  # Perform a single forward pass.
  loss = criterion(out[data.train_mask], data.y[data.train_mask])  # Compute the loss solely based on the training nodes.
  pred = out.argmax(dim=1)  # Use the class with highest probability.
  train_correct = pred[data.train_mask] == data.y[data.train_mask]  # Check against ground-truth labels.
  train_acc = int(train_correct.sum()) / int(data.train_mask.sum())  # Derive ratio of correct predictions.

  loss.backward()  # Derive gradients.
  optimizer.step()  # Update parameters based on gradients.

    
  print(f'Epoch: {epoch:03d}, Train Loss: {loss:.4f}, Accuracy: {train_acc:.4f}')

Epoch: 000, Train Loss: 1.9459, Accuracy: 0.1286
Epoch: 001, Train Loss: 1.9253, Accuracy: 0.5071
Epoch: 002, Train Loss: 1.9056, Accuracy: 0.6071
Epoch: 003, Train Loss: 1.8736, Accuracy: 0.8071
Epoch: 004, Train Loss: 1.8391, Accuracy: 0.8071
Epoch: 005, Train Loss: 1.8024, Accuracy: 0.7929
Epoch: 006, Train Loss: 1.7534, Accuracy: 0.8571
Epoch: 007, Train Loss: 1.7116, Accuracy: 0.8143
Epoch: 008, Train Loss: 1.6537, Accuracy: 0.8286
Epoch: 009, Train Loss: 1.5866, Accuracy: 0.8643
Epoch: 010, Train Loss: 1.5399, Accuracy: 0.8500
Epoch: 011, Train Loss: 1.4716, Accuracy: 0.8643
Epoch: 012, Train Loss: 1.3925, Accuracy: 0.9143
Epoch: 013, Train Loss: 1.3135, Accuracy: 0.9000
Epoch: 014, Train Loss: 1.2689, Accuracy: 0.9000
Epoch: 015, Train Loss: 1.1770, Accuracy: 0.9286
Epoch: 016, Train Loss: 1.0833, Accuracy: 0.9643
Epoch: 017, Train Loss: 1.0208, Accuracy: 0.9643
Epoch: 018, Train Loss: 0.9130, Accuracy: 0.9857
Epoch: 019, Train Loss: 0.8550, Accuracy: 0.9643
Epoch: 020, Train Lo

In [11]:
model.eval()
out = model(data.x, data.edge_index)
pred = out.argmax(dim=1)  # Use the class with highest probability.
test_correct = pred[data.test_mask] == data.y[data.test_mask]  # Check against ground-truth labels.
test_acc = int(test_correct.sum()) / int(data.test_mask.sum())  # Derive ratio of correct predictions.
print(f'Test accuracy: {test_acc:.4f}')

Test accuracy: 0.7660
