In [2]:
!pip install torch_geometric
!pip install torch

In [3]:
from torch_geometric.datasets import Planetoid
from torch_geometric.transforms import NormalizeFeatures #Scale and center the data features

dataset = Planetoid('data/Planetoid', name = 'Cora', transform = NormalizeFeatures())

Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.x
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.tx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.allx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.y
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ty
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ally
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.graph
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.test.index
Processing...
Done!


In [4]:
# Get some basic info about the dataset
print(f'Number of graphs: {len(dataset)}')
print(f'Number of features: {dataset.num_features}')
print(f'Number of classes: {dataset.num_classes}')
print(50*'=')

# There is only one graph in the dataset, use it as new data object
data = dataset[0]

# Gather some statistics about the graph.
print(data)
print(f'Number of nodes: {data.num_nodes}')
print(f'Number of edges: {int(data.num_edges/2)}')
print(f'Number of training nodes: {data.train_mask.sum()}')
print(f'Training node label rate: {int(data.train_mask.sum()) / data.num_nodes:.2f}')
print(f'Is undirected: {data.is_undirected()}')

Number of graphs: 1
Number of features: 1433
Number of classes: 7
Data(x=[2708, 1433], edge_index=[2, 10556], y=[2708], train_mask=[2708], val_mask=[2708], test_mask=[2708])
Number of nodes: 2708
Number of edges: 5278
Number of training nodes: 140
Training node label rate: 0.05
Is undirected: True


#SingleLayer GCN

Traditional GCN

Z = f(X,A) = softmax(Ahat  ReLu(Ahat X W0) W1)

In [5]:
import torch
from torch.nn import Linear # Applies a linear transformation to the incoming data y = xA + b
import torch.nn.functional as F
from torch_geometric.nn import GCNConv

class GCN(torch.nn.Module):
  def __init__(self, in_channels, hidden_channels, out_channels):
    super(GCN, self).__init__()
    torch.manual_seed(42)

    self.conv1 = GCNConv(in_channels, hidden_channels)
    self.conv2 = GCNConv(hidden_channels, hidden_channels)
    self.out = Linear(hidden_channels, out_channels)

  def forward(self, x, edge_index):
    x = self.conv1(x, edge_index)
    x = torch.relu(x)
    x = self.conv2(x, edge_index)
    x = F.softmax(self.out(x), dim = 1)
    return x

GCN with dropout function

In [6]:
import torch
from torch.nn import Linear
import torch.nn.functional as F
from torch_geometric.nn import GCNConv

class GCN(torch.nn.Module):
  def __init__(self, in_channels, hidden_channels, out_channels):
    super(GCN, self).__init__()
    torch.manual_seed(42)

    self.conv1 = GCNConv(in_channels, hidden_channels)
    self.conv2 = GCNConv(hidden_channels, hidden_channels)
    self.out = Linear(hidden_channels, out_channels)

  def forward(self, x, edge_index):
    x = self.conv1(x, edge_index)
    x = torch.relu(x)
    x = F.dropout(x, p = 0.5, training=self.training)

    x = self.conv2(x, edge_index)
    x = torch.relu(x)
    x = F.dropout(x, p = 0.5, training = self.training)

    x = F.softmax(self.out(x), dim = 1)
    return x

In [7]:
x = dataset.num_node_features
y = dataset.num_classes

model = GCN(x, 16, y)
print(model)

GCN(
  (conv1): GCNConv(1433, 16)
  (conv2): GCNConv(16, 16)
  (out): Linear(in_features=16, out_features=7, bias=True)
)


In [8]:
device = ('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)
data.to(device)

lr = 0.01
optimizer = torch.optim.Adam(model.parameters(), lr = lr)

criterion = torch.nn.CrossEntropyLoss()

def train():
  model.train()
  optimizer.zero_grad()
  out = model(data.x, data.edge_index)
  loss = criterion(out[data.train_mask], data.y[data.train_mask])
  loss.backward()
  optimizer.step()
  return loss

def test():
  model.eval()
  out = model(data.x, data.edge_index)
  pred = out.argmax(dim = 1)
  test_correct = pred[data.test_mask] == data.y[data.test_mask]
  test_acc = int(test_correct.sum()) / int(data.test_mask.sum())
  return test_acc

losses = []
for epoch in range(0, 1001):
  loss = train()
  losses.append(loss)
  if epoch % 100 == 0:
    print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}')


Epoch: 000, Loss: 1.9461
Epoch: 100, Loss: 1.4218
Epoch: 200, Loss: 1.3214
Epoch: 300, Loss: 1.2743
Epoch: 400, Loss: 1.2951
Epoch: 500, Loss: 1.2492
Epoch: 600, Loss: 1.2465
Epoch: 700, Loss: 1.2404
Epoch: 800, Loss: 1.2136
Epoch: 900, Loss: 1.2309
Epoch: 1000, Loss: 1.2074


In [9]:
test_acc = test()
print(f'Test Accuracy: {test_acc:.4f}')

Test Accuracy: 0.7240


#MultiLayer GCN

In [13]:
import torch
from torch.nn import Linear, ModuleList
import torch.nn.functional as F
from torch_geometric.nn import GCNConv

class GCN_improved(torch.nn.Module):
  def __init__(self, in_channels, hidden_channels, out_channels, num_layers):
    super(GCN_improved, self).__init__()


    self.conv_layers = ModuleList(
        [GCNConv(in_channels, hidden_channels)] +
        [GCNConv(hidden_channels, hidden_channels) for i in range(num_layers - 2)]
    )

    self.out = Linear(hidden_channels, out_channels)

  def forward(self, x, edge_index):
    for conv in self.conv_layers:
      x = conv(x, edge_index)
      x = torch.relu(x)
      x = F.dropout(x, p = 0.5, training = self.training)

    x = F.softmax(self.out(x), dim = 1)
    return x

In [14]:
x = dataset.num_node_features
y = dataset.num_classes

model = GCN_improved(in_channels= x, hidden_channels= 16, out_channels=7 , num_layers=5)
print(model)

GCN_improved(
  (conv_layers): ModuleList(
    (0): GCNConv(1433, 16)
    (1-3): 3 x GCNConv(16, 16)
  )
  (out): Linear(in_features=16, out_features=7, bias=True)
)


In [15]:
model = GCN_improved(in_channels=dataset.num_features, hidden_channels=16, out_channels=dataset.num_classes, num_layers=5)

# Define loss function and optimizer
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

# Train the model
model.train()
for epoch in range(1001):
    optimizer.zero_grad()
    out = model(data.x, data.edge_index)
    loss = criterion(out[data.train_mask], data.y[data.train_mask])
    loss.backward()
    optimizer.step()
    if epoch % 100 == 0:
      print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}')



Epoch: 000, Loss: 1.9460
Epoch: 100, Loss: 1.5775
Epoch: 200, Loss: 1.3739
Epoch: 300, Loss: 1.3270
Epoch: 400, Loss: 1.2823
Epoch: 500, Loss: 1.3030
Epoch: 600, Loss: 1.2692
Epoch: 700, Loss: 1.3021
Epoch: 800, Loss: 1.2689
Epoch: 900, Loss: 1.2515
Epoch: 1000, Loss: 1.2743


In [16]:
# Evaluate the model
model.eval()
with torch.no_grad():
    pred = model(data.x, data.edge_index).argmax(dim=1)
    test_correct = pred[data.test_mask] == data.y[data.test_mask]
    test_acc = int(test_correct.sum()) / int(data.test_mask.sum())
print(f'Test Accuracy: {test_acc:.4f}')

Test Accuracy: 0.6680
