In [2]:
from torch_geometric.datasets import TUDataset

dataset = TUDataset(root='/tmp/ENZYMES', name='ENZYMES')

Downloading https://www.chrsmrrs.com/graphkerneldatasets/ENZYMES.zip
Extracting /tmp/ENZYMES/ENZYMES/ENZYMES.zip
Processing...
Done!


In [5]:
len(dataset)

600

In [6]:
dataset.num_classes

6

In [7]:
dataset.num_node_features

3

In [9]:
data = dataset[0]
print(data)

Data(edge_index=[2, 168], x=[37, 3], y=[1])


In [10]:
data.is_undirected()

True

In [12]:
# Making training data, testing data
train_dataset = dataset[:540]
test_dataset = dataset[540:]

In [14]:
# Shuffling data before split
dataset = dataset.shuffle()

# This is equivalent of doing:
import torch
perm  = torch.randperm(len(dataset))
dataset = dataset[perm]

In [15]:
# Cora dataset
# for semi-supervised graph node classification

from torch_geometric.datasets import Planetoid

dataset = Planetoid(root='/tmp/Cora', name='Cora')


Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.x
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.tx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.allx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.y
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ty
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ally
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.graph
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.test.index
Processing...
Done!


In [17]:
print(len(dataset))

1


In [18]:
dataset.num_classes

7

In [19]:
dataset.num_node_features

1433

In [21]:
# Single, undirected citation graph
data = dataset[0]
print(data)

Data(x=[2708, 1433], edge_index=[2, 10556], y=[2708], train_mask=[2708], val_mask=[2708], test_mask=[2708])


In [24]:
# train_mask -> denotes which nodes to train
# val_mask -> denotes which nodes to use for validation
# test_mask -> denotes against which nodes to test

print(data.is_undirected())
print(data.train_mask.sum().item())
print(data.test_mask.sum().item())

True
140
1000


In [27]:
# Mini batches
from torch_geometric.loader import DataLoader

dataset = TUDataset(root='/tmp/ENZYMES', name="ENZYMES", use_node_attr=True)
loader = DataLoader(dataset, batch_size=32, shuffle=True)

for batch in loader:
    print(batch)
    print(batch.num_graphs)

DataBatch(edge_index=[2, 4046], x=[1071, 21], y=[32], batch=[1071], ptr=[33])
32
DataBatch(edge_index=[2, 3904], x=[967, 21], y=[32], batch=[967], ptr=[33])
32
DataBatch(edge_index=[2, 3878], x=[1011, 21], y=[32], batch=[1011], ptr=[33])
32
DataBatch(edge_index=[2, 4308], x=[1139, 21], y=[32], batch=[1139], ptr=[33])
32
DataBatch(edge_index=[2, 3800], x=[1001, 21], y=[32], batch=[1001], ptr=[33])
32
DataBatch(edge_index=[2, 3544], x=[984, 21], y=[32], batch=[984], ptr=[33])
32
DataBatch(edge_index=[2, 4632], x=[1210, 21], y=[32], batch=[1210], ptr=[33])
32
DataBatch(edge_index=[2, 4262], x=[1145, 21], y=[32], batch=[1145], ptr=[33])
32
DataBatch(edge_index=[2, 3760], x=[1042, 21], y=[32], batch=[1042], ptr=[33])
32
DataBatch(edge_index=[2, 4222], x=[1170, 21], y=[32], batch=[1170], ptr=[33])
32
DataBatch(edge_index=[2, 4054], x=[1060, 21], y=[32], batch=[1060], ptr=[33])
32
DataBatch(edge_index=[2, 4018], x=[1014, 21], y=[32], batch=[1014], ptr=[33])
32
DataBatch(edge_index=[2, 3934], 

Learning Methods on Graphs

In [31]:
from torch_geometric.datasets import Planetoid

dataset = Planetoid(root='/tmp/Cora', name="Cora")

In [32]:
# Implementing two-layer GCN

import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv

class GCN(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = GCNConv(dataset.num_node_features, 16)
        self.conv2 = GCNConv(16, dataset.num_classes)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index

        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        x = self.conv2(x, edge_index)

        return F.log_softmax(x, dim=1)


In [33]:
# Training the model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = GCN().to(device)
data = dataset[0].to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)

model.train()
for epoch in range(200):
    optimizer.zero_grad()
    out = model(data)
    loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
    loss.backward()
    optimizer.step()

In [34]:
#Evaluate the model
model.eval()
pred = model(data).argmax(dim=1)
correct = (pred[data.test_mask] == data.y[data.test_mask]).sum()
acc = int(correct) / int(data.test_mask.sum())
print(f'Accuracy: {acc:.4f}')

Accuracy: 0.8130


In [41]:
dataset[0]

Data(x=[2708, 1433], edge_index=[2, 10556], y=[2708], train_mask=[2708], val_mask=[2708], test_mask=[2708])

In [40]:
print(dataset.num_classes)

7
