In [None]:
from google.colab import drive
drive.mount('/gdrive')

Mounted at /gdrive


In [None]:
%cd /gdrive/MyDrive/GNN/packages

/gdrive/MyDrive/GNN/packages


In [None]:
%pwd

'/gdrive/MyDrive/GNN/packages'

In [None]:
!pip install --upgrade torch-scatter
!pip install --upgrade torch-sparse
!pip install --upgrade torch-cluster
!pip install --upgrade torch-spline-conv 
!pip install torch-geometric



In [None]:
import torch
from torch_geometric.data import Data

edge_index = torch.tensor([[0, 1],      # define node relationships (connected between nodes)
                           [1, 0],
                           [1, 2],
                           [2, 1]], dtype= torch.long)

x = torch.tensor([[-1], [0], [1]], dtype = torch.float)  # number of nodes and node features
data = Data(x = x, edge_index= edge_index.t().contiguous()) # define data dict kind object with node features and edge relationships
data

Data(x=[3, 1], edge_index=[2, 4])

- Useful utility functions and attributes

In [None]:
print(f'keys of graph attributes: {data.keys}') # show node and edge keys

print(f"node features: {data['x']}")

for key, item in data:
  print(f'{key} found in the data')

print('edge_attr' in data)

print('Number of nodes: {}'.format(data.num_nodes))

print("Number of edges: {}".format(data.num_edges))

print("number of node features: {}".format(data.num_node_features))

print("is node isolaged? : {}".format(data.has_isolated_nodes()))

print("has graph self loops? : {}".format(data.has_self_loops()))

print("is graph directed? : {}".format(data.is_directed()))

keys of graph attributes: ['edge_index', 'x']
node features: tensor([[-1.],
        [ 0.],
        [ 1.]])
x found in the data
edge_index found in the data
False
Number of nodes: 3
Number of edges: 4
number of node features: 1
is node isolaged? : False
has graph self loops? : False
is graph directed? : False


In [None]:
from torch_geometric.datasets import TUDataset

dataset = TUDataset(root= '/tmp/ENZYMES', name = "ENZYMES")

print("dataset information e,g. Number of graphs in the dataset: {}".format(dataset))

print("Number of classes in the dataset: {}".format(dataset.num_classes))       

print("Number of node features: {}".format(dataset.num_node_features))                    

dataset information e,g. Number of graphs in the dataset: ENZYMES(600)
Number of classes in the dataset: 6
Number of node features: 3


In [None]:
data = dataset[0]                # first graph
print(data.is_undirected())      
data                             # dataset info e,g 168/2 = 84 edges, 37 nodes, and each node has 3 features

True


Data(edge_index=[2, 168], x=[37, 3], y=[1])

- splitting the graph dataset into training and test sets (90/10)

In [None]:
train_dataset = dataset[:540]

print(train_dataset)

test_dataset = dataset[540:]

print(test_dataset)

ENZYMES(540)
ENZYMES(60)


In [None]:
train_dataset = train_dataset.shuffle()


- Let's download another dataset. 'Cora'

In [None]:
from torch_geometric.datasets import Planetoid

dataset = Planetoid(root = 'tmp/Cora', name = 'Cora')

dataset

print('dataset length: {}'.format(len(dataset)))

print('Number of classes in Cora dataset: {}'.format(dataset.num_classes))

print('Number of node features in the dataset: {}'.format(dataset.num_node_features))

print('Information about the graph: {}'\
      .format(dataset[0]))

print('is data undirected?: {}'.format(dataset[0].is_undirected()))

print("sum of the elements of the train masks: {}".format(dataset[0].train_mask.sum().item()))

print("sum of the elements of the validation masks: {}".format(dataset[0].val_mask.sum().item()))

print("sum of the elements of the test masks: {}".format(dataset[0].test_mask.sum().item()))

Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.x
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.tx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.allx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.y
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ty
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ally
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.graph
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.test.index


dataset length: 1
Number of classes in Cora dataset: 7
Number of node features in the dataset: 1433
Information about the graph: Data(x=[2708, 1433], edge_index=[2, 10556], y=[2708], train_mask=[2708], val_mask=[2708], test_mask=[2708])
is data undirected?: True
sum of the elements of the train masks: 140
sum of the elements of the validation masks: 500
sum of the elements of the test masks: 1000


Processing...
Done!


- Mini-batches 

In [None]:
from torch_geometric.datasets import TUDataset
from torch_scatter import scatter_mean
from torch_geometric.loader import DataLoader

dataset = TUDataset(root = "/tmp/ENZYMES", name = "ENZYMES", use_node_attr= True)
loader = DataLoader(dataset, batch_size = 32, shuffle = True)


for batch in loader:
  print(batch)

print(batch.num_graphs)

DataBatch(edge_index=[2, 3956], x=[1092, 21], y=[32], batch=[1092], ptr=[33])
DataBatch(edge_index=[2, 3974], x=[1008, 21], y=[32], batch=[1008], ptr=[33])
DataBatch(edge_index=[2, 3816], x=[1022, 21], y=[32], batch=[1022], ptr=[33])
DataBatch(edge_index=[2, 3882], x=[1044, 21], y=[32], batch=[1044], ptr=[33])
DataBatch(edge_index=[2, 4154], x=[1074, 21], y=[32], batch=[1074], ptr=[33])
DataBatch(edge_index=[2, 3674], x=[937, 21], y=[32], batch=[937], ptr=[33])
DataBatch(edge_index=[2, 4166], x=[1110, 21], y=[32], batch=[1110], ptr=[33])
DataBatch(edge_index=[2, 4794], x=[1261, 21], y=[32], batch=[1261], ptr=[33])
DataBatch(edge_index=[2, 3488], x=[901, 21], y=[32], batch=[901], ptr=[33])
DataBatch(edge_index=[2, 3726], x=[1011, 21], y=[32], batch=[1011], ptr=[33])
DataBatch(edge_index=[2, 4010], x=[1036, 21], y=[32], batch=[1036], ptr=[33])
DataBatch(edge_index=[2, 3894], x=[1054, 21], y=[32], batch=[1054], ptr=[33])
DataBatch(edge_index=[2, 4038], x=[1074, 21], y=[32], batch=[1074], 

Data transforms

In [None]:
from torch_geometric.datasets import ShapeNet

dataset = ShapeNet(root='/tmp/ShapeNet', categories=['Airplane'])

dataset[0]


Data(x=[2518, 3], y=[2518], pos=[2518, 3], category=[1])

In [None]:
import torch_geometric.transforms as T
from torch_geometric.datasets import ShapeNet

dataset = ShapeNet(root='/tmp/ShapeNet', categories=['Airplane'],
                    pre_transform=T.KNNGraph(k=6))
dataset[0]

  f"The `pre_transform` argument differs from the one used in "


Data(x=[2518, 3], y=[2518], pos=[2518, 3], category=[1])

In [None]:
import torch_geometric.transforms as T
from torch_geometric.datasets import ShapeNet

dataset = ShapeNet(root='/tmp/ShapeNet', categories=['Airplane'],
                    pre_transform=T.KNNGraph(k=6),
                    transform=T.RandomTranslate(0.01))

dataset[0]

  f"The `pre_transform` argument differs from the one used in "


Data(x=[2518, 3], y=[2518], pos=[2518, 3], category=[1])

# Learning methods on graphs

- load cora dataset

In [None]:
from torch_geometric.datasets import Planetoid

dataset = Planetoid(root = 'Cora', name = 'Cora')
dataset

Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.x
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.tx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.allx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.y
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ty
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ally
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.graph
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.test.index
Processing...
Done!


Cora()

In [None]:
index = 0
print(f'dataset size: {len(dataset)} and number of classes: {dataset.num_classes}')
print(f'first graph in the dataset: {dataset[index]}')
print(f"Nodes in the {index}th graph: {dataset[index].num_nodes}")
print(f"features in the {index}th graph nodes: {dataset.num_node_features}")
print(f"edges in the {index}th graph: {dataset[index].num_edges/2}")
# dataset[0].edge_index.shape
# dataset.num_edges


# Train GNN


In [None]:
import torch
import torch.nn.functional as F
import matplotlib.pyplot as plt
from torch_geometric.nn import GCNConv

class GCN(torch.nn.Module):
  def __init__(self):
    super().__init__()
    self.conv1 = GCNConv(dataset.num_node_features, 16)
    self.conv2 = GCNConv(16, dataset.num_classes)

  def forward(self, data):
    x, edge_index = data.x, data.edge_index

    x = self.conv1(x, edge_index)
    x = F.relu(x)

    x = F.dropout(x, training = self.training)
    x = self.conv2(x, edge_index)

    return F.log_softmax(x, dim=1)

In [None]:
dataset[0].y[dataset[0].train_mask]

tensor([3, 4, 4, 0, 3, 2, 0, 3, 3, 2, 0, 0, 4, 3, 3, 3, 2, 3, 1, 3, 5, 3, 4, 6,
        3, 3, 6, 3, 2, 4, 3, 6, 0, 4, 2, 0, 1, 5, 4, 4, 3, 6, 6, 4, 3, 3, 2, 5,
        3, 4, 5, 3, 0, 2, 1, 4, 6, 3, 2, 2, 0, 0, 0, 4, 2, 0, 4, 5, 2, 6, 5, 2,
        2, 2, 0, 4, 5, 6, 4, 0, 0, 0, 4, 2, 4, 1, 4, 6, 0, 4, 2, 4, 6, 6, 0, 0,
        6, 5, 0, 6, 0, 2, 1, 1, 1, 2, 6, 5, 6, 1, 2, 2, 1, 5, 5, 5, 6, 5, 6, 5,
        5, 1, 6, 6, 1, 5, 1, 6, 5, 5, 5, 1, 5, 1, 1, 1, 1, 1, 1, 1])

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = GCN().to(device)
data = dataset[0].to(device)
optimizer = torch.optim.Adam(model.parameters(), lr = 0.01, weight_decay=5e-4)

model.train()
loss_metric = []
steps = []
num_epochs = 500
for epoch in range(num_epochs):
  optimizer.zero_grad()
  out = model(data)
  loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
  # print(f'Loss: {loss: 0.3f}')
  loss_metric.append(loss.cpu())
  steps.append(epoch)
  loss.backward()
  optimizer.step()


In [None]:
model.eval()
pred = model(data).argmax(dim=1)
correct = (pred[data.test_mask] == data.y[data.test_mask]).sum()
acc = int(correct) / int(data.test_mask.sum())
print(f'Accuracy: {acc:.4f}')

Accuracy: 0.8010
