In [1]:
# tutorial comes from https://pytorch-geometric.readthedocs.io/en/latest/notes/introduction.html
# 

In [2]:
import torch
from torch_geometric.data import Data


In [3]:
# part 1 - basic network
edge_index = torch.tensor([[0, 1, 1, 2], [1, 0, 2, 1]], dtype=torch.long)
x = torch.tensor([[-1], [0], [1]], dtype=torch.float)
data = Data(x=x, edge_index=edge_index) # note the edge_index is NOT a list of node index tuples.

print(data)
print(data.x)
print(data.edge_index) # note it is the source-target index. two edges, but we defined four tuples
print(data.y)

Data(edge_index=[2, 4], x=[3, 1])
tensor([[-1.],
        [ 0.],
        [ 1.]])
tensor([[0, 1, 1, 2],
        [1, 0, 2, 1]])
None


In [4]:
# sw analyzes the block above for the data obj
print(data.keys)
print()
print(data['x'])
print()
for key,item in data:
    print("{} found in data".format(key))
print()
print('edge_attr' in data)
print()
print(data.num_nodes)
print()
print(data.num_edges)
print()
print(data.contains_isolated_nodes())
print()
print(data.contains_self_loops())
print()
print(data.is_directed())

['x', 'edge_index']

tensor([[-1.],
        [ 0.],
        [ 1.]])

edge_index found in data
x found in data

False

3

4

False

False

False


In [5]:
# common benchmark data sets
from torch_geometric.datasets import TUDataset

# ENZYMES data set
dataset = TUDataset(root='/tmp/ENZYMES', name='ENZYMES')
print(len(dataset))
print(dataset.num_classes)
print(dataset.num_node_features)

data = dataset[0]
print(data)
train_dataset = dataset[:540]
test_dataset = dataset[540:]

600
6
3
Data(edge_index=[2, 168], x=[37, 3], y=[1])


In [6]:
from torch_geometric.datasets import Planetoid

dataset = Planetoid(root='/tmp/Cora', name='Cora')
print(len(dataset)) # sw: one graph only! Nice. 
print(dataset.num_classes)
print(dataset.num_node_features)

1
7
1433


In [9]:
data = dataset[0]
print(data)

print(data.is_undirected())
print(data.train_mask.sum().item())
print(data.val_mask.sum().item())
print(data.test_mask.sum().item())
print(dataset[0].y)
print(dataset[0].y.type())

# Q: the train, val, and test masks do not add up to 2708, why?
# sw: I guess it is the semi-supervised learning thing - small training, but large testing. 

Data(edge_index=[2, 10556], test_mask=[2708], train_mask=[2708], val_mask=[2708], x=[2708, 1433], y=[2708])
True
140
500
1000
tensor([3, 4, 4,  ..., 3, 3, 3])
torch.LongTensor


In [41]:
data.x[0,:]
data.train_mask

tensor([ True,  True,  True,  ..., False, False, False])

In [42]:
# Mini-batches 
# sw: I think mini-batches fit into the case where many small graphs exist. 
# sw: like the graph embedding, not node embedding. 
from torch_geometric.datasets import TUDataset
from torch_geometric.data import DataLoader

dataset = TUDataset(root='/tmp/ENZYMES', name='ENZYMES', use_node_attr=True)
loader = DataLoader(dataset, batch_size=32, shuffle=True)


In [45]:
### 

for batch in loader:
    b = batch
    break

print(b)
print(b.num_graphs)
print(b.batch)
print(b.edge_index)
print(b.x)
print(b.y)


Batch(batch=[1203], edge_index=[2, 4200], x=[1203, 21], y=[32])
32
tensor([ 0,  0,  0,  ..., 31, 31, 31])
tensor([[   0,    0,    0,  ..., 1202, 1202, 1202],
        [   1,    2,   16,  ..., 1185, 1200, 1201]])
tensor([[ 3.0000,  5.4656, 12.6300,  ...,  1.0000,  0.0000,  0.0000],
        [ 3.0000,  5.6784,  8.4900,  ...,  1.0000,  0.0000,  0.0000],
        [14.0000, 19.9349, 43.9900,  ...,  1.0000,  0.0000,  0.0000],
        ...,
        [ 5.0000,  0.0000, 18.9000,  ...,  0.0000,  1.0000,  0.0000],
        [ 5.0000,  0.0000, 14.3300,  ...,  0.0000,  1.0000,  0.0000],
        [ 4.0000,  0.0000, 16.9100,  ...,  0.0000,  1.0000,  0.0000]])
tensor([1, 0, 5, 5, 0, 4, 1, 5, 2, 4, 3, 4, 0, 4, 0, 4, 5, 1, 0, 4, 1, 5, 1, 4,
        3, 0, 0, 5, 3, 4, 1, 5])


In [46]:
# Data transform 
# sw: don't run it. It takes some time...
# sw: a lot of pre-transformations we can do - but I skip them here.

from torch_geometric.datasets import ShapeNet

dataset = ShapeNet(root='/tmp/ShapeNet', categories=['Airplane'])
dataset[0]


Downloading https://shapenet.cs.stanford.edu/media/shapenetcore_partanno_segmentation_benchmark_v0_normal.zip


KeyboardInterrupt: 

In [10]:
# Learning methods on graphs.
from torch_geometric.datasets import Planetoid

dataset = Planetoid(root='/tmp/Cora', name='Cora')
print(dataset)
print(dataset[0])

Cora()
Data(edge_index=[2, 10556], test_mask=[2708], train_mask=[2708], val_mask=[2708], x=[2708, 1433], y=[2708])


In [48]:
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv

class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = GCNConv(dataset.num_node_features, 16)
        # sw: I think some coding practice to open up this GCNConv layer also helps...
        self.conv2 = GCNConv(16, dataset.num_classes)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index

        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training) # Q: why this self.training?
        x = self.conv2(x, edge_index)
        
        return F.log_softmax(x, dim=1)

In [52]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = Net().to(device)
data = dataset[0].to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)

In [53]:
model.train()
for epoch in range(200):
    optimizer.zero_grad()
    out = model(data)
    loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
    loss.backward()
    optimizer.step()

In [54]:
model.eval()
_, pred = model(data).max(dim=1)
correct = int(pred[data.test_mask].eq(data.y[data.test_mask]).sum().item())
acc = correct / int(data.test_mask.sum())
print('Accuracy: {:.4f}'.format(acc))

Accuracy: 0.8170


In [50]:
dataset[0].num_node_features

1433

In [51]:
dataset[0]

Data(edge_index=[2, 10556], test_mask=[2708], train_mask=[2708], val_mask=[2708], x=[2708, 1433], y=[2708])