<a href="https://colab.research.google.com/github/giordamaug/BIONETdatasets/blob/main/TUD/notebooks/GNN pyGeo example.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>
[![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/giordamaug/BIONETdatasets/main?filepath=TUD%2Fnotebooks%2FGNN+pyGeo+example.ipynb)
<a href="https://kaggle.com/kernels/welcome?src=https://github.com/giordamaug/BIONETdatasets/blob/main/TUD/notebooks/GNN pyGeo example.ipynb" target="_parent"><img src="https://kaggle.com/static/images/open-in-kaggle.svg" alt="Open In Kaggle"/></a>

# Install StellargGraph library (if required)

In [3]:
!pip install torch-scatter
!pip install torch-sparse
!pip install torch-cluster
!pip install torch-spline-conv
!pip install torch-geometric

[33mDEPRECATION: Configuring installation scheme with distutils config files is deprecated and will no longer work in the near future. If you are using a Homebrew or Linuxbrew Python, please see discussion at https://github.com/Homebrew/homebrew-core/issues/76621[0m
Looking in links: https://pytorch-geometric.com/whl/torch-1.10.0.html
Collecting torch-scatter==2.0.5
  Downloading torch_scatter-2.0.5.tar.gz (20 kB)
  Preparing metadata (setup.py) ... [?25ldone
[?25hBuilding wheels for collected packages: torch-scatter
  Building wheel for torch-scatter (setup.py) ... [?25ldone
[?25h  Created wheel for torch-scatter: filename=torch_scatter-2.0.5-cp39-cp39-macosx_10_13_x86_64.whl size=364028 sha256=b78c9da9623d9889ce4ac722bdc41863cd0a599e5ed867e892b43561bcc4e2ac
  Stored in directory: /Users/maurizio/Library/Caches/pip/wheels/a1/3b/1a/b1a8b1b9e2ef93724eef0abaf9ce0160673331cb28e4de6672
Successfully built torch-scatter
Installing collected packages: torch-scatter
[33mDEPRECATION: Con

# Load the dataset

In [1]:
dataname = 'KIDNEY'
import sys, os
sys.path.append(os.path.abspath('.'))
from wrappers.pygeo_wrapper import MyTUDataset
import shutil
shutil.unpack_archive(f'datasets/{dataname}.zip', '/Users/maurizio/Downloads')
dataset = MyTUDataset(dataname, path=f'/Users/maurizio/Downloads/{dataname}', use_node_attr=True, use_edge_attr=True)
print(dataset)

KIDNEY(299)


# Create model

In [2]:
import torch
from torch.nn import Linear
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric.nn import global_mean_pool


class GCN(torch.nn.Module):
    def __init__(self, hidden_channels):
        super(GCN, self).__init__()
        torch.manual_seed(12345)
        self.conv1 = GCNConv(dataset.num_node_features, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, hidden_channels)
        self.conv3 = GCNConv(hidden_channels, hidden_channels)
        self.lin = Linear(hidden_channels, dataset.num_classes)

    def forward(self, x, edge_index, batch):
        # 1. Obtain node embeddings 
        x = self.conv1(x, edge_index)
        x = x.relu()
        x = self.conv2(x, edge_index)
        x = x.relu()
        x = self.conv3(x, edge_index)

        # 2. Readout layer
        x = global_mean_pool(x, batch)  # [batch_size, hidden_channels]

        # 3. Apply a final classifier
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.lin(x)
        
        return x

model = GCN(hidden_channels=64)
print(model)

GCN(
  (conv1): GCNConv(2887, 64)
  (conv2): GCNConv(64, 64)
  (conv3): GCNConv(64, 64)
  (lin): Linear(in_features=64, out_features=3, bias=True)
)


# Evaluate the model

In [3]:
from sklearn.model_selection import train_test_split
from torch_geometric.loader import DataLoader
import numpy as np
batch_size = 1
dataset = dataset.shuffle()
train_dataset, test_dataset = train_test_split(dataset, test_size=0.10, random_state=42)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

model = GCN(hidden_channels=64)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = torch.nn.CrossEntropyLoss()

def train():
    model.train()

    for data in train_loader:  # Iterate in batches over the training dataset.
         out = model(data.x, data.edge_index, data.batch)  # Perform a single forward pass.
         loss = criterion(out, data.y)  # Compute the loss.
         loss.backward()  # Derive gradients.
         optimizer.step()  # Update parameters based on gradients.
         optimizer.zero_grad()  # Clear gradients.

def test(loader):
     model.eval()

     correct = 0
     for data in loader:  # Iterate in batches over the training/test dataset.
         out = model(data.x, data.edge_index, data.batch)  
         pred = out.argmax(dim=1)  # Use the class with highest probability.
         correct += int((pred == data.y).sum())  # Check against ground-truth labels.
     return correct / len(loader.dataset)  # Derive ratio of correct predictions.

results = []
for epoch in range(1, 100):
    train()
    train_acc = test(train_loader)
    test_acc = test(test_loader)
    results.append(test_acc)
    print(f'Epoch: {epoch:03d}, Train Acc: {train_acc:.4f}, Test Acc: {test_acc:.4f}')
print("Done. Test acc: {}".format(np.mean(results)))

Epoch: 001, Train Acc: 0.5353, Test Acc: 0.5000
Epoch: 002, Train Acc: 0.5353, Test Acc: 0.5000
Epoch: 003, Train Acc: 0.6357, Test Acc: 0.5000
Epoch: 004, Train Acc: 0.8699, Test Acc: 0.7333
Epoch: 005, Train Acc: 0.9033, Test Acc: 0.7000
Epoch: 006, Train Acc: 0.9926, Test Acc: 0.7667
Epoch: 007, Train Acc: 1.0000, Test Acc: 0.6667
Epoch: 008, Train Acc: 1.0000, Test Acc: 0.7667
Epoch: 009, Train Acc: 1.0000, Test Acc: 0.7333
Epoch: 010, Train Acc: 1.0000, Test Acc: 0.7333
Epoch: 011, Train Acc: 1.0000, Test Acc: 0.7667
Epoch: 012, Train Acc: 1.0000, Test Acc: 0.7667
Epoch: 013, Train Acc: 1.0000, Test Acc: 0.7667
Epoch: 014, Train Acc: 1.0000, Test Acc: 0.7333
Epoch: 015, Train Acc: 1.0000, Test Acc: 0.7333
Epoch: 016, Train Acc: 1.0000, Test Acc: 0.7000
Epoch: 017, Train Acc: 1.0000, Test Acc: 0.7000
Epoch: 018, Train Acc: 1.0000, Test Acc: 0.7000
Epoch: 019, Train Acc: 1.0000, Test Acc: 0.7333
Epoch: 020, Train Acc: 1.0000, Test Acc: 0.7000
Epoch: 021, Train Acc: 1.0000, Test Acc: