<a href="https://colab.research.google.com/github/brunoneri/colab_notebooks/blob/main/GCN_Pytorch_Lightning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Install required packages.
!pip install -q torch-scatter -f https://data.pyg.org/whl/torch-1.10.0+cu113.html
!pip install -q torch-sparse -f https://data.pyg.org/whl/torch-1.10.0+cu113.html
!pip install -q git+https://github.com/pyg-team/pytorch_geometric.git
!pip install -q git+https://github.com/PyTorchLightning/pytorch-lightning

# Helper function for visualization.
%matplotlib inline
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE

import torch
from torch import nn
from torch.nn import functional as F
from torch.utils.data import DataLoader

import pytorch_lightning as pl
print(pl.__version__)


device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")
print(device)

def visualize(h, color):
    z = TSNE(n_components=2).fit_transform(h.detach().cpu().numpy())

    plt.figure(figsize=(10,10))
    plt.xticks([])
    plt.yticks([])

    plt.scatter(z[:, 0], z[:, 1], s=70, c=color, cmap="Set2")
    plt.show()

In [None]:
from torch_geometric.datasets import Planetoid
from torch_geometric.transforms import NormalizeFeatures
import torch_geometric.data as geom_data

pl.seed_everything(1234567)

dataset = Planetoid(root='data/Planetoid', name='Cora', transform=NormalizeFeatures())

print()
print(f'Dataset: {dataset}:')
print('======================')
print(f'Number of graphs: {len(dataset)}')
print(f'Number of features: {dataset.num_features}')
print(f'Number of classes: {dataset.num_classes}')

data = dataset[0]  # Get the first graph object.

print()
print(data)
print('===========================================================================================================')

# Gather some statistics about the graph.
print(f'Number of nodes: {data.num_nodes}')
print(f'Number of edges: {data.num_edges}')
print(f'Average node degree: {data.num_edges / data.num_nodes:.2f}')
print(f'Number of training nodes: {data.train_mask.sum()}')
print(f'Training node label rate: {int(data.train_mask.sum()) / data.num_nodes:.2f}')
print(f'Has isolated nodes: {data.has_isolated_nodes()}')
print(f'Has self-loops: {data.has_self_loops()}')
print(f'Is undirected: {data.is_undirected()}')

In [None]:
from torch_geometric.nn import GCNConv


class GCN(pl.LightningModule):
    def __init__(self, num_features,num_classes, hidden_channels):
        super().__init__()
        self.conv1 = GCNConv(num_features, hidden_channels)
        self.conv2 = GCNConv(hidden_channels,num_classes)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, p=0.5,training=self.training)
        x = self.conv2(x, edge_index)
        return x

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=0.1, weight_decay=5e-4)
        return  optimizer

    def training_step(self, batch, idx):
        out = self.forward( batch.x,batch.edge_index)
        loss = nn.CrossEntropyLoss()(out[batch.train_mask], batch.y[batch.train_mask] )
        self.log('train_loss', loss)
        return loss
    
    def validation_step(self, batch, idx):
        out = self.forward( batch.x,batch.edge_index)
        loss = nn.CrossEntropyLoss()( out[batch.train_mask], batch.y[batch.train_mask] )
        self.log('val_loss', loss)
        pred = out.argmax(dim=1)  
        val_correct = pred[batch.test_mask] == batch.y[batch.test_mask]  
        val_acc = int(val_correct.sum()) / int(batch.test_mask.sum())  
        self.log('val_acc', val_acc)
        return val_acc
       
    def test_step(self, batch, idx): 
        out = self.forward( batch.x,batch.edge_index)
        pred = out.argmax(dim=1)  
        test_correct = pred[batch.test_mask] == batch.y[batch.test_mask]  
        test_acc = int(test_correct.sum()) / int(batch.test_mask.sum())  
        self.log('test_acc', test_acc)
        return {'test_acc':test_acc}

In [None]:
# model
model = GCN(dataset.num_features, dataset.num_classes,hidden_channels=16)
model.eval()

out = model(data.x, data.edge_index)
visualize(out, color=data.y)

In [None]:
# data
graphdata = geom_data.DataLoader(dataset)

# training
trainer = pl.Trainer(gpus=1 if str(device).startswith("cuda") else 0, 
                     max_epochs=101)

trainer.fit(model, graphdata,graphdata)
test=trainer.test(model,graphdata)


print(f'Accuracy: {test[0]["test_acc"]:.4f}')

In [None]:
model.eval()

out = model(data.x, data.edge_index)
visualize(out, color=data.y)