## Pytorch-geometric (PyG) tests

In [None]:
import torch

## Datasets
Basic graph data definition in PyG

### Data points
Example of graph implementation in PyG as data points (`Data` class), i.e. elements of a graph dataset.

In [None]:
from torch_geometric.data import Data

# edge_index contains indexes of nodes with incident edges in the form [2, num_edges],
# one row for the staring node index, one row for the ending node index of each edge.
# In the following example edge_index is encoding a directed graph with three nodes
# (0,1,2) and four edges (0->1, 1->0, 1->2, 2->1).   
edge_index = torch.tensor([[0, 1, 1, 2],
                           [1, 0, 2, 1]], dtype=torch.long)

# Data x parameters encodes graph node features. In the following example x assign a 
# 1-dimensional feature vector to each node (x0= -1, x1= 0, x2= 1)
x = torch.tensor([[-1], [0], [1]], dtype=torch.float)

# Having the edge_index and the feature matrix we can construct a PyG data point (i.e
# a graph) with the following.
data = Data(x=x, edge_index=edge_index)
# NOTE: default toString shows only the size of x and edge_index  
print(data)

### Implemented Datasets
How to import one of the graph datasets already implemented in PyG, using the `Dataset` class. 

In [None]:
from torch_geometric.datasets import BAShapes

# the constructor from the library return an instance of the implemented dataset chosen.
# In this example, BAShapes() returns a Barabasi-albert (BA) graph enhanced with some motifs
# (300 nodes and a set of 80 "house"-structured graphs connected to it), generated following
# the "GNNExplainer: Generating Explanations for Graph Neural Networks" paper.
dataset = BAShapes(connection_distribution="random")
print(f"[dataset]> ...loading dataset '{dataset}' from PyG")

# a Dataset object exposes some attributes abuot the data 
print("\t#entries:      ", len(dataset))
print("\t#classes:      ", dataset.num_classes)
print("\t#node_features:", dataset.num_node_features)
print("\t#edge_features:", dataset.num_edge_features)

# a dataset entry (i.e. a graph) is retrieved as a Data object (i.e. a data point)
graph = dataset[0]
print(f"\n[dataset]> {dataset} dataset graph...")
print("\t->", graph)
print("\t#nodes:", graph.num_nodes)
print("\t#edges:", graph.num_edges)

#### sparse COO Tensors

In [None]:
import torch

print("node_features:", graph.x.size())

edge_idx = graph.edge_index
print(edge_idx.size)

# densify a sparse COO matrix using torch
i = edge_idx
v = torch.ones(edge_idx.size(1))
s = (graph.num_nodes,graph.num_nodes)
print(v.size())
dense = torch.sparse_coo_tensor(i, v, s).to_dense()
print(dense.size())

### Extract node neighborhood
How to use k_hop_subgrpah()

In [1]:
import torch
from torch_geometric.utils import k_hop_subgraph

In [2]:
edge_index = torch.tensor([[0, 1, 2, 3, 4, 5], \
                           [2, 2, 4, 4, 6, 6]])

subset, edge_index, mapping, edge_mask = k_hop_subgraph(6, 2, edge_index, relabel_nodes=True)

In [8]:
print("edge_index:\n", edge_index)
print("edge_index:\t", subset)
print("mapping:\t", mapping)

edge_index:
 tensor([[0, 1, 2, 3],
        [2, 2, 4, 4]])
edge_index:	 tensor([2, 3, 4, 5, 6])
mapping:	 tensor([4])


## Learning

In [1]:
from torch_geometric.datasets import Planetoid
dataset = Planetoid(root='/tmp/Cora', name='Cora')

import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv

class GCN(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = GCNConv(dataset.num_node_features, 16)
        self.conv2 = GCNConv(16, dataset.num_classes)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index

        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        x = self.conv2(x, edge_index)

        return F.log_softmax(x, dim=1)

Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.x
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.tx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.allx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.y
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ty
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ally
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.graph
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.test.index
Processing...
Done!


In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = GCN().to(device)
data = dataset[0].to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)

model.train()
for epoch in range(200):
    optimizer.zero_grad()
    out = model(data)
    loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
    print(f"epoch {epoch}, loss:{loss}")
    loss.backward()
    optimizer.step()

epoch 0, loss:1.9529213905334473
epoch 1, loss:1.8417927026748657
epoch 2, loss:1.7148265838623047
epoch 3, loss:1.5779656171798706
epoch 4, loss:1.439921259880066
epoch 5, loss:1.280748963356018
epoch 6, loss:1.1596226692199707
epoch 7, loss:1.0392756462097168
epoch 8, loss:0.9414781928062439
epoch 9, loss:0.8248002529144287
epoch 10, loss:0.7415125370025635
epoch 11, loss:0.6432251930236816
epoch 12, loss:0.5623167157173157
epoch 13, loss:0.4970880150794983
epoch 14, loss:0.4246888756752014
epoch 15, loss:0.3953545093536377
epoch 16, loss:0.32873061299324036
epoch 17, loss:0.27703437209129333
epoch 18, loss:0.2684898376464844
epoch 19, loss:0.2486361414194107
epoch 20, loss:0.18370674550533295
epoch 21, loss:0.20511119067668915
epoch 22, loss:0.16694317758083344
epoch 23, loss:0.16266244649887085
epoch 24, loss:0.13111881911754608
epoch 25, loss:0.14079374074935913
epoch 26, loss:0.11301562935113907
epoch 27, loss:0.10305289179086685
epoch 28, loss:0.09408984333276749
epoch 29, loss: