# GNN basic

## torch_geometric.data.Data

包括 5 个属性，每一个属性都不是必须的，可以为空。

* x: 用于存储每个节点的特征，形状是[num_nodes, num_node_features]。
* edge_index: 用于存储节点之间的边，形状是 [2, num_edges]。
* pos: 存储节点的坐标，形状是[num_nodes, num_dimensions]。
* y: 存储样本标签。如果是每个节点都有标签，那么形状是[num_nodes, *]；如果是整张图只有一个标签，那么形状是[1, *]。
* edge_attr: 存储边的特征。形状是[num_edges, num_edge_features]。

In [None]:
import os
os.environ['http_proxy'] = "http://proxy.intern.yuansuan.cn:30890" 
os.environ['https_proxy'] = "http://proxy.intern.yuansuan.cn:30890" 

In [None]:
import torch
from torch_geometric.data import Data

In [None]:
import torch
from torch_geometric.data import Data
# 由于是无向图，因此有 4 条边：(0 -> 1), (1 -> 0), (1 -> 2), (2 -> 1)
# edge_index中边的存储方式，有两个list，第 1 个list是边的起始点，第 2 个list是边的目标节点
edge_index = torch.tensor([[0, 1, 1, 2],
                           [1, 0, 2, 1]], dtype=torch.long)
# 节点的特征                           
x = torch.tensor([[-1], [0], [1]], dtype=torch.float)

data = Data(x=x, edge_index=edge_index)
data

In [None]:
import torch
from torch_geometric.data import Data

edge_index = torch.tensor([[0, 1],
                           [1, 0],
                           [1, 2],
                           [2, 1]], dtype=torch.long)
x = torch.tensor([[-1], [0], [1]], dtype=torch.float)

data = Data(x=x, edge_index=edge_index.t().contiguous())
data

## DataLoader

In [None]:
from torch_geometric.datasets import TUDataset
dataset = TUDataset(root='/tmp/ENZYMES', name='ENZYMES')
data = dataset[0]
data

### DataLoader.batching 流程

In [None]:
# from torch_scatter import scatter_mean
from torch_geometric.datasets import TUDataset
from torch_geometric.data import DataLoader

dataset = TUDataset(root='/tmp/ENZYMES', name='ENZYMES', use_node_attr=True)
loader = DataLoader(dataset, batch_size=32, shuffle=True)

for data in loader:
    print(data)
    #data: Batch(batch=[1082], edge_index=[2, 4066], x=[1082, 21], y=[32])

    # x = scatter_mean(data.x, data.batch, dim=0)
    # x.size(): torch.Size([32, 21])

## Transforms 数据增强

In [None]:
import torch_geometric.transforms as T
from torch_geometric.datasets import ShapeNet

dataset = ShapeNet(root='/tmp/ShapeNet', categories=['Airplane'],
                    pre_transform=T.KNNGraph(k=6))
dataset[0]
# dataset[0]: Data(edge_index=[2, 15108], pos=[2518, 3], y=[2518])

In [None]:
import torch_geometric.transforms as T
from torch_geometric.datasets import ShapeNet

dataset = ShapeNet(root='/tmp/ShapeNet', categories=['Airplane'],
                    pre_transform=T.KNNGraph(k=6),
                    transform=T.RandomTranslate(0.01))
# dataset[0]: Data(edge_index=[2, 15108], pos=[2518, 3], y=[2518])

## Mini example

In [3]:
import os
os.environ['http_proxy'] = "http://proxy.intern.yuansuan.cn:30890" 
os.environ['https_proxy'] = "http://proxy.intern.yuansuan.cn:30890" 

In [5]:
from torch_geometric.datasets import Planetoid, Taobao

dataset = Taobao(root='./data',)

TypeError: Taobao.__init__() got an unexpected keyword argument 'name'

In [4]:
from torch_geometric.datasets import Planetoid

dataset = Planetoid(root='./Cora', name='Cora')

Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.x


FSTimeoutError: 

In [None]:
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv

class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = GCNConv(dataset.num_node_features, 16)
        self.conv2 = GCNConv(16, dataset.num_classes)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index

        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        x = self.conv2(x, edge_index)

        return F.log_softmax(x, dim=1)

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = Net().to(device)
data = dataset[0].to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)

model.train()
for epoch in range(200):
    optimizer.zero_grad()
    out = model(data)
    loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
    loss.backward()
    optimizer.step()

In [None]:
model.eval()
_, pred = model(data).max(dim=1)
correct = float (pred[data.test_mask].eq(data.y[data.test_mask]).sum().item())
acc = correct / data.test_mask.sum().item()
print('Accuracy: {:.4f}'.format(acc))

## Resources

* https://zhuanlan.zhihu.com/p/120311352
* https://pytorch.zhangxiann.com/9-qi-ta/tu-shen-jing-wang-luo-pytorch-geometric-ru-men-jiao-cheng