### Introduction by Example

### Data handling of graphs 

In [1]:
# pytorch tensor data types: https://pytorch.org/docs/stable/tensors.html

# torch.float32: torch.float (default)
# torch.float64: torch.double 
# torch.float16: torch.half 

# torch.int16: torch.short 
# torch.int32: torch.int 
# torch.int64: torch.long (default)

In [2]:
# unqeighted, undirected graph 

import torch 
from torch_geometric.data import Data 

edge_index = torch.tensor([[0,1,1,2],
                           [1,0,2,1]], dtype=torch.long) # [2, edges]

x = torch.tensor([[-1],[0],[1]], dtype=torch.float)

x.dtype, edge_index.dtype

data = Data(x=x, edge_index=edge_index)

In [3]:
data.validate()

True

In [4]:
data.keys(), data.num_nodes, data.num_edges, data.num_node_features

(['x', 'edge_index'], 3, 4, 1)

In [5]:
data, data.to_dict()

(Data(x=[3, 1], edge_index=[2, 4]),
 {'x': tensor([[-1.],
          [ 0.],
          [ 1.]]),
  'edge_index': tensor([[0, 1, 1, 2],
          [1, 0, 2, 1]])})

## common benchmark datasets

1. Planetoid Dataset: Cora, PubMed, Citeseer
2. Graph classification: TUDatasets
3. Chemistry/molecules: QM7 & QM9 datasets
4. 3D mesh/point cloud datasets: FAUST, ModelNet10/40, ShapeNet

In [6]:
from torch_geometric.datasets import TUDataset

dataset = TUDataset(root='./datasets/ENZYMES', name='ENZYMES')
len(dataset), dataset.num_classes, dataset.num_node_features

(600, 6, 3)

In [7]:
data = dataset[0]
data, data.is_directed(), data.is_undirected(), type(dataset)

(Data(edge_index=[2, 168], x=[37, 3], y=[1]),
 False,
 True,
 torch_geometric.datasets.tu_dataset.TUDataset)

In [8]:
train_dataset = dataset[:540]
test_dataset = dataset[540:]
train_dataset[0],dataset[0]

(Data(edge_index=[2, 168], x=[37, 3], y=[1]),
 Data(edge_index=[2, 168], x=[37, 3], y=[1]))

In [9]:
from torch_geometric.datasets import Planetoid 

dataset = Planetoid(root='./datasets/Cora', name='Cora')
len(dataset), dataset.num_classes, dataset.num_node_features

(1, 7, 1433)

In [10]:
data = dataset[0]
data, data.is_directed(), data.train_mask.sum().item(), data.val_mask.sum().item(), data.test_mask.sum().item()

(Data(x=[2708, 1433], edge_index=[2, 10556], y=[2708], train_mask=[2708], val_mask=[2708], test_mask=[2708]),
 False,
 140,
 500,
 1000)

### Mini-batches

In [11]:
from torch_geometric.datasets import TUDataset
from torch_geometric.loader import DataLoader

dataset = TUDataset(root='./datasets/ENZYMES', name='ENZYMES', use_node_attr=True)
loader = DataLoader(dataset, batch_size=32, shuffle=True)

print(len(dataset), len(loader), len(dataset)/32)

for batch in loader:
    print(batch)
    print(batch.num_graphs)
    break

600 19 18.75
DataBatch(edge_index=[2, 4034], x=[1057, 21], y=[32], batch=[1057], ptr=[33])
32


In [12]:
from torch_geometric.utils import scatter
from torch_geometric.datasets import TUDataset
from torch_geometric.loader import DataLoader

dataset = TUDataset(root='./datasets/ENZYMES', name='ENZYMES', use_node_attr=True)
loader = DataLoader(dataset, batch_size=32, shuffle=True)

print(dataset[0])

for data in loader:
    print(data)
    print(data.num_graphs)
    print(data.batch.shape)

    x = scatter(data.x, data.batch, dim=0, reduce='mean')
    print(x.size())
    break



Data(edge_index=[2, 168], x=[37, 21], y=[1])
DataBatch(edge_index=[2, 4458], x=[1182, 21], y=[32], batch=[1182], ptr=[33])
32
torch.Size([1182])
torch.Size([32, 21])


### Data transforms

In [13]:
from torch_geometric.datasets import ShapeNet, ModelNet

# dataset = ShapeNet(root='./datasets/ShapeNet', categories=['Airplane'])
# dataset = ModelNet(root='./datasets/ModelNet', name='10')
# dataset[0]

### Learning methods on graphs

In [14]:
from torch_geometric.datasets import Planetoid

dataset = Planetoid(root='./datasets/Cora', name='Cora')
len(dataset), dataset[0], dataset.num_node_features
# dir(dataset[0])

(1,
 Data(x=[2708, 1433], edge_index=[2, 10556], y=[2708], train_mask=[2708], val_mask=[2708], test_mask=[2708]),
 1433)

In [15]:
import torch 
import torch.nn.functional as F
from torch_geometric.nn import GCNConv

# torch.nn.ReLU: class-based, used in model definition i.e., self.relu = nn.ReLU(), has learnable parameters
# torch.relu: functional interface, directly applied on tensor i.e., torch.relu(x), stateless operation 

class GCN(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = GCNConv(dataset.num_node_features,16)
        self.conv2 = GCNConv(16, dataset.num_classes)
    
    def forward(self, data):
        x, edge_index = data.x, data.edge_index

        x = self.conv1(x,edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        x = self.conv2(x,edge_index)

        return F.log_softmax(x, dim=1)


In [17]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# print(torch.cuda.get_device_name(device))
model = GCN().to(device)
data = dataset[0].to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)

model.train()
for epoch in range(300):
    optimizer.zero_grad()
    out = model(data)
    loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
    loss.backward()
    optimizer.step()

In [19]:
model.eval()
pred = model(data).argmax(dim=1)
correct = (pred[data.test_mask] == data.y[data.test_mask]).sum()
acc = int(correct)/int(data.test_mask.sum())
print(f'Accuracy: {acc:.4f}')

Accuracy: 0.8010
