In [2]:
import numpy as np
import torch
import torch.optim as optim

from torch_geometric.datasets import TUDataset
from torch_geometric.datasets import Planetoid
from torch_geometric.data import DataLoader

import torch_geometric.nn as pyg_nn

import models
import utils

### PyTorch Geometric Basics

In [3]:
from torch_geometric.data import Data

# A single graph in PyTorch Geometric is described by an instance of torch_geometric.data.Data
edge_index = torch.tensor([[0, 1, 1, 2],
                           [1, 0, 2, 1]], dtype=torch.long)
x = torch.tensor([[-1], [0], [1]], dtype=torch.float)
Data(x=x, edge_index=edge_index)

Data(edge_index=[2, 4], x=[3, 1])

In [4]:
# If edge_index are tuples, transpose it
edge_index = torch.tensor([[0, 1],
                           [1, 0],
                           [1, 2],
                           [2, 1]], dtype=torch.long)
x = torch.tensor([[-1], [0], [1]], dtype=torch.float)
data = Data(x=x, edge_index=edge_index.t().contiguous())
data

Data(edge_index=[2, 4], x=[3, 1])

In [5]:
print(data.num_nodes)
print(data.num_edges)
print(data.is_directed())

3
4
False


In [6]:
# The first graph in the ENZYMES dataset contains 37 nodes, each one having 3 features. There are 168/2 = 84 
# undirected edges and the graph is assigned to exactly one class
dataset = TUDataset(root='/tmp/ENZYMES', name='ENZYMES')
print(len(dataset), dataset.num_classes, dataset.num_node_features)
print(dataset[0])

600 6 3
Data(edge_index=[2, 168], x=[37, 3], y=[1])


In [7]:
dataset = dataset.shuffle()

In [8]:
from torch_geometric.datasets import Planetoid
dataset2 = Planetoid(root='/tmp/Cora', name='Cora')
print(len(dataset2), dataset2.num_classes, dataset2.num_node_features)
print(dataset2[0])

1 7 1433
Data(edge_index=[2, 10556], test_mask=[2708], train_mask=[2708], val_mask=[2708], x=[2708, 1433], y=[2708])


In [9]:
from torch_scatter import scatter_mean
loader = DataLoader(dataset, batch_size=32, shuffle=True)
for data in loader:
    print(data)
    print(data.num_graphs)
    print(scatter_mean(data.x, data.batch, dim=0).size())

Batch(batch=[1051], edge_index=[2, 4036], x=[1051, 3], y=[32])
32
torch.Size([32, 3])
Batch(batch=[1094], edge_index=[2, 4228], x=[1094, 3], y=[32])
32
torch.Size([32, 3])
Batch(batch=[1047], edge_index=[2, 3616], x=[1047, 3], y=[32])
32
torch.Size([32, 3])
Batch(batch=[1044], edge_index=[2, 4170], x=[1044, 3], y=[32])
32
torch.Size([32, 3])
Batch(batch=[948], edge_index=[2, 3666], x=[948, 3], y=[32])
32
torch.Size([32, 3])
Batch(batch=[1040], edge_index=[2, 4030], x=[1040, 3], y=[32])
32
torch.Size([32, 3])
Batch(batch=[953], edge_index=[2, 3610], x=[953, 3], y=[32])
32
torch.Size([32, 3])
Batch(batch=[985], edge_index=[2, 3746], x=[985, 3], y=[32])
32
torch.Size([32, 3])
Batch(batch=[1017], edge_index=[2, 3834], x=[1017, 3], y=[32])
32
torch.Size([32, 3])
Batch(batch=[1092], edge_index=[2, 4106], x=[1092, 3], y=[32])
32
torch.Size([32, 3])
Batch(batch=[990], edge_index=[2, 3792], x=[990, 3], y=[32])
32
torch.Size([32, 3])
Batch(batch=[1183], edge_index=[2, 4306], x=[1183, 3], y=[32])

In [10]:
# Transform
from torch_geometric.datasets import ShapeNet
dataset = ShapeNet(root='/tmp/ShapeNet', categories=['Airplane'])
dataset[0]

Data(category=[1], pos=[2518, 3], x=[2518, 3], y=[2518])

In [11]:
# Convert the point cloud dataset into a graph dataset by generating nearest neighbor graphs from the point clouds via transforms
import torch_geometric.transforms as T
dataset = ShapeNet(root='/tmp/ShapeNet', categories=['Airplane'],
                    pre_transform=T.KNNGraph(k=6))
dataset[0]



Data(category=[1], pos=[2518, 3], x=[2518, 3], y=[2518])

In [12]:
dataset = ShapeNet(root='/tmp/ShapeNet', categories=['Airplane'],
                    pre_transform=T.KNNGraph(k=6),
                    transform=T.RandomTranslate(0.01))
dataset[0]



Data(category=[1], pos=[2518, 3], x=[2518, 3], y=[2518])

In [13]:
dataset = Planetoid(root='/tmp/Cora', name='Cora')

### A Simple 2-layer GCN

In [14]:
import torch.nn.functional as F
from torch_geometric.nn import GCNConv

class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = GCNConv(dataset.num_node_features, 16)
        self.conv2 = GCNConv(16, dataset.num_classes)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index

        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        x = self.conv2(x, edge_index)

        return F.log_softmax(x, dim=1)

In [16]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = Net().to(device)
data = dataset[0].to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)

model.train()
for epoch in range(200):
    optimizer.zero_grad()
    out = model(data)
    loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
    loss.backward()
    optimizer.step()

In [17]:
model.eval()
_, pred = model(data).max(dim=1)
correct = int(pred[data.test_mask].eq(data.y[data.test_mask]).sum().item())
acc = correct / int(data.test_mask.sum())
print('Accuracy: {:.4f}'.format(acc))

Accuracy: 0.8030


### Steps of Message Passing

In [18]:
from torch_geometric.nn import MessagePassing
from torch_geometric.utils import add_self_loops, degree

class GCNConv(MessagePassing):
    def __init__(self, in_channels, out_channels):
        super(GCNConv, self).__init__(aggr='add')  # "Add" aggregation (Step 5).
        self.lin = torch.nn.Linear(in_channels, out_channels)

    def forward(self, x, edge_index):
        # x has shape [N, in_channels]
        # edge_index has shape [2, E]

        # Step 1: Add self-loops to the adjacency matrix.
        edge_index, _ = add_self_loops(edge_index, num_nodes=x.size(0))

        # Step 2: Linearly transform node feature matrix.
        x = self.lin(x)

        # Step 3: Compute normalization.
        row, col = edge_index
        deg = degree(col, x.size(0), dtype=x.dtype)
        deg_inv_sqrt = deg.pow(-0.5)
        norm = deg_inv_sqrt[row] * deg_inv_sqrt[col]

        # Step 4-5: Start propagating messages.
        return self.propagate(edge_index, x=x, norm=norm)

    def message(self, x_j, norm):
        # x_j has shape [E, out_channels]

        # Step 4: Normalize node features.
        return norm.view(-1, 1) * x_j

In [21]:
conv = GCNConv(16, 32)

### Edge Conv

In [22]:
from torch.nn import Sequential as Seq, Linear, ReLU
from torch_geometric.nn import MessagePassing

class EdgeConv(MessagePassing):
    def __init__(self, in_channels, out_channels):
        super(EdgeConv, self).__init__(aggr='max') #  "Max" aggregation.
        self.mlp = Seq(Linear(2 * in_channels, out_channels),
                       ReLU(),
                       Linear(out_channels, out_channels))

    def forward(self, x, edge_index):
        # x has shape [N, in_channels]
        # edge_index has shape [2, E]

        return self.propagate(edge_index, x=x)

    def message(self, x_i, x_j):
        # x_i has shape [E, in_channels]
        # x_j has shape [E, in_channels]

        tmp = torch.cat([x_i, x_j - x_i], dim=1)  # tmp has shape [E, 2 * in_channels]
        return self.mlp(tmp)

In [23]:
from torch_geometric.nn import knn_graph

class DynamicEdgeConv(EdgeConv):
    def __init__(self, in_channels, out_channels, k=6):
        super(DynamicEdgeConv, self).__init__(in_channels, out_channels)
        self.k = k

    def forward(self, x, batch=None):
        edge_index = knn_graph(x, self.k, batch, loop=False, flow=self.flow)
        return super(DynamicEdgeConv, self).forward(x, edge_index)

In [25]:
from torch_geometric.data import InMemoryDataset

class MyOwnDataset(InMemoryDataset):
    def __init__(self, root, transform=None, pre_transform=None):
        super(MyOwnDataset, self).__init__(root, transform, pre_transform)
        self.data, self.slices = torch.load(self.processed_paths[0])

    @property
    def raw_file_names(self):
        return ['some_file_1', 'some_file_2', ...]

    @property
    def processed_file_names(self):
        return ['data.pt']

    def download(self):
        # Download to `self.raw_dir`.
        pass

    def process(self):
        # Read data into huge `Data` list.
        data_list = [...]

        if self.pre_filter is not None:
            data_list = [data for data in data_list if self.pre_filter(data)]

        if self.pre_transform is not None:
            data_list = [self.pre_transform(data) for data in data_list]

        data, slices = self.collate(data_list)
        torch.save((data, slices), self.processed_paths[0])

In [26]:
from torch_geometric.data import Dataset

class MyOwnDataset(Dataset):
    def __init__(self, root, transform=None, pre_transform=None):
        super(MyOwnDataset, self).__init__(root, transform, pre_transform)

    @property
    def raw_file_names(self):
        return ['some_file_1', 'some_file_2', ...]

    @property
    def processed_file_names(self):
        return ['data_1.pt', 'data_2.pt', ...]

    def download(self):
        # Download to `self.raw_dir`.

    def process(self):
        i = 0
        for raw_path in self.raw_paths:
            # Read data from `raw_path`.
            data = Data(...)

            if self.pre_filter is not None and not self.pre_filter(data):
                continue

            if self.pre_transform is not None:
                data = self.pre_transform(data)

            torch.save(data, osp.join(self.processed_dir, 'data_{}.pt'.format(i)))
            i += 1

    def len(self):
        return len(self.processed_file_names)

    def get(self, idx):
        data = torch.load(osp.join(self.processed_dir, 'data_{}.pt'.format(idx)))
        return data

IndentationError: expected an indented block (<ipython-input-26-128d225f5d8d>, line 20)