1.   安装pytorch geometric

In [None]:
# !pip uninstall torch torch-scatter torch-sparse torch-spline-conv torch-geometric
!pip install torch==1.7.1+cu110 torchvision==0.8.2+cu110  torchaudio===0.7.2 torchtext==0.8.1 -f https://download.pytorch.org/whl/torch_stable.html
!pip install torch-scatter -f https://pytorch-geometric.com/whl/torch-1.7.1+cu110.html
!pip install torch-sparse -f https://pytorch-geometric.com/whl/torch-1.7.1+cu110.html
!pip install torch-cluster -f https://pytorch-geometric.com/whl/torch-1.7.1+cu110.html
!pip install torch-spline-conv -f https://pytorch-geometric.com/whl/torch-1.7.1+cu110.html
!pip install torch-geometric 
!pip install tensorboardX
!wget https://bin.equinox.io/c/4VmDzA7iaHb/ngrok-stable-linux-amd64.zip
!unzip ngrok-stable-linux-amd64.zip

2. 导入需要的包

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

import torch_geometric.nn as pyg_nn
import torch_geometric.utils as pyg_utils

import time
from datetime import datetime

import networkx as nx
import numpy as np
import torch
import torch.optim as optim

from torch_geometric.datasets import TUDataset
from torch_geometric.datasets import Planetoid

# 增加实验所使用的数据集
from torch_geometric.datasets import Flickr
from torch_geometric.datasets import Reddit2

from torch_geometric.data import DataLoader

import torch_geometric.transforms as T

from tensorboardX import SummaryWriter
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt


3. 定义三层GNN模型 

In [None]:
class GNNStack(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, task='node'):
        super(GNNStack, self).__init__()
        self.task = task
        
        # 其实这里可以定义一个pre-message-passing 将输入从input_size变换成hidden_size
        self.pre_mlp = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.Dropout(0.25),
            nn.Linear(hidden_dim, hidden_dim)
        )

        self.convs = nn.ModuleList()
        # self.convs.append(self.build_conv_model(input_dim, hidden_dim))  # 第一层卷积层
        self.lns = nn.ModuleList()
        for i in range(2):
          self.lns.append(nn.LayerNorm(hidden_dim))  # Layer Norm
        self.bns = nn.ModuleList()
        for i in range(3):
          self.bns.append(pyg_nn.BatchNorm(hidden_dim))   # 每层中都增加一个batch norm
        # for l in range(2):  # 第二层和第三层卷积层
        #     self.convs.append(self.build_conv_model(hidden_dim, hidden_dim))
        for l in range(3):  # 定义三层卷积层
             self.convs.append(self.build_conv_model(hidden_dim, hidden_dim))

        # post-message-passing  最后一个卷积层结束后，经过一个多层感知机模型
        self.post_mp = nn.Sequential(
            nn.Linear(hidden_dim, hidden_dim), nn.Dropout(0.25), 
            nn.Linear(hidden_dim, output_dim))
        if not (self.task == 'node' or self.task == 'graph'):
            raise RuntimeError('Unknown task.')

        self.dropout = 0.25
        self.num_layers = 3  # 在真实的应用中，这些参数都应该为用户自定义超参数

    def build_conv_model(self, input_dim, hidden_dim):
        # refer to pytorch geometric nn module for different implementation of GNNs.
        if self.task == 'node':
            return CustomConv(input_dim, hidden_dim)  # 用户自定义图神经网络
        else:  # 如果是graph 任务，当前最好的图神经网络为GIN
            return pyg_nn.GINConv(nn.Sequential(nn.Linear(input_dim, hidden_dim),
                                  nn.ReLU(), nn.Linear(hidden_dim, hidden_dim)))

    def forward(self, data):  # 定义整个图神经网络计算图
        x, edge_index, batch = data.x, data.edge_index, data.batch
        x = self.pre_mlp(x)  # 开始前调用pre-message-passing函数
        if data.num_node_features == 0:
          x = torch.ones(data.num_nodes, 1)  # 如果初始时，没有给节点分配特征，则初始化每个节点的特征向量为常数1
        
        for i in range(self.num_layers):
            x = self.convs[i](x, edge_index)  # 调用GNN的forward方法，forward会通过propagate传递来的参数分别启动message和update函数
            emb = x
            x = self.bns[i](x) # 增加batch_norm层
            x = F.dropout(x, p=self.dropout, training=self.training)  # 做了dropout
            pyg_utils.dropout_adj(edge_index, p = self.dropout)  # 每一层都随机dropout一些边，需要做实验看这一不分是否有必要
            x = F.leaky_relu(x)  # 使用leaky_relu激活函数，这里的激活函数可以考虑换成F.prelu
            if not i == self.num_layers - 1:  # 除了最后一层之外，其它层都做了layer norm
                x = self.lns[i](x)

        if self.task == 'graph':
            x = pyg_nn.global_mean_pool(x, batch)  # 得到整个图的一个feature， 问题：为什么这里要以batch为单位？这实际上与训练方法有关，可以参见训练过程的代码，明白是如何调用该forward方法的

        x = self.post_mp(x)  #调用后post-message-passing

        return emb, F.log_softmax(x, dim=1)

    def loss(self, pred, label):
        return F.nll_loss(pred, label)

4. 自定义图卷积层（单层实现）

4.1 增加self_loop从零实现

In [None]:
# 这是下文中pyb_utils.add_self_loop的从零实现过程，内容本身很简单
def add_self_loop(edge_index, num_nodes=None):
    loop_index = torch.arange(0, num_nodes, dtype = torch.long, device = edge_index.device)   # 一维tensor, [0, 1, ..., num_nodes-1]
    loop_index = torch.unsqueeze(loop_index, dim = 0).repeat(2, 1)  #首先变成1*num_nodes的矩阵，即[[0,1,..., num_nodes-1]], 然后调用repeat函数，在dim=0维度上复制2次，得到[[0, 1, ..., num_node-1],[0, 1, ..., num_node-1]],在dim=1维度上复制一次，即保持不变
    edge_index = torch.cat([edge_index, loop_index], dim = 1)  # 在dim=1维度上将原先的edge_index和现在的loop_index concat到一起，最终得到增加了自循环的edge_index，它被作为返回值返回给foward函数
    return edge_index

4.2 单层图卷积网络定义

In [None]:
class CustomConv(pyg_nn.MessagePassing):
    def __init__(self, in_channels, out_channels):
        super(CustomConv, self).__init__(aggr='add')  # 聚集函数使用 "Add"，表达能力比min、max、mean等强
        self.lin = nn.Linear(in_channels, out_channels)
        self.lin_self = nn.Linear(in_channels, out_channels)

    def forward(self, x, edge_index):   # 自定义图卷积层，给出这一层的计算图
        # x has shape [N, in_channels]
        # edge_index has shape [2, E]
      
        # A' = A + I
        # D ** (-1/2) A' D ** (-z1/2)
        # edge_index, _ = pyg_utils.add_self_loops(edge_index, num_nodes = x.size(0))    # 增加self_loop到edge_index中
        edge_index = add_self_loop(edge_index, x.size(dim = 0))   # 增加self_loop到edge_index中
        # edge_index, _ = pyg_utils.remove_self_loops(edge_index)    # 移除self_loop
        
        # Transform node feature matrix.
        # x = self.lin(x)
        self_x = self.lin_self(x)  # 将输入转换成为消息
        # print("x: ", x)  # 跑实验时需要注释掉改行代码 
        print("self_x: ", self_x)  # 跑实验时需要注释掉改行代码 
        # return self.propagate(edge_index, x = x)
        return self_x +  self.propagate(edge_index, x = self_x)  # skip connection

    def message(self,x,  x_i,  x_j, edge_index):
        # x_j has shape [E, out_channels]  # x_j is neighborhood embeddings,  x_i is yourself embeddings
        print("x_i: ", x_i)  # 跑实验时需要注释掉改行代码
        row, col = edge_index # 因为edge_index 为[2, num_edges]，所以这个tensor包含两行，row为edge_index的第一行，col为edge_index的第二行
        deg = pyg_utils.degree(col, x.size(0), dtype=x_j.dtype)   # 这里的返回值实际上是调用了torch.scatter
        print("degree: ", deg)  # 跑实验时需要注释掉改行代码
        deg_inv_sqrt = deg.pow(-0.5)
        deg_inv_sqrt[deg_inv_sqrt == float('inf')] = 0  #  这一句很重要，如果去掉，norm将出现inf,后续的计算也将会是inf
        norm = deg_inv_sqrt[row] * deg_inv_sqrt[col]
        print("norm: ", norm)  # 跑实验时需要注释掉改行代码
        print("x_j: ", x_j)  # 跑实验时需要注释掉改行代码
        out = norm.view(-1, 1) * x_j
        print("norm*x_j: ", out)   # 跑实验时需要注释掉改行代码
        return out

    def update(self, aggr_out):  # 这里实际上可以通过多层感知机处理aggr_out，最终的输出会经过aggr='add'
        # aggr_out has shape [N, out_channels]
        return aggr_out

4.3 简单测试CustomConv层的行为，主要是为了用简单的数据确定输出形状

In [None]:
# 在真实运行测试用例时，需要注释掉这段代码
edge_index = torch.tensor([[1,2,3], [0,0,0]], dtype=torch.long)
x = torch.tensor([[1], [1], [1], [1]], dtype=torch.float)
conv = CustomConv(1, 2)
y = conv(x, edge_index)
print("y: ", y)

5. 训练

In [None]:
def train(dataset, task, writer):
    if task == 'graph':
        data_size = len(dataset)
        loader = DataLoader(dataset[:int(data_size * 0.8)], batch_size=64, shuffle=True)
        test_loader = DataLoader(dataset[int(data_size * 0.8):], batch_size=64, shuffle=True)
    else:
        test_loader = loader = DataLoader(dataset, batch_size=64, shuffle=True)

    # build model
    model = GNNStack(max(dataset.num_node_features, 1), 32, dataset.num_classes, task=task)  # 调用GNNStack类的__init__方法
    opt = optim.Adam(model.parameters(), lr=0.01)
    
    # train
    for epoch in range(200):
        total_loss = 0
        model.train()
        for batch in loader:
            #print(batch.train_mask, '----')
            opt.zero_grad()
            embedding, pred = model(batch)  # 调用forward方法，这个过程决定了task='graph'时，global_mean_pooling(x,batch)的参数
            label = batch.y
            if task == 'node':
                pred = pred[batch.train_mask]
                label = label[batch.train_mask]
            loss = model.loss(pred, label)
            loss.backward()
            opt.step()
            total_loss += loss.item() * batch.num_graphs
        total_loss /= len(loader.dataset)
        writer.add_scalar("loss", total_loss, epoch)

        if epoch % 10 == 0:
            test_acc = test(test_loader, model)
            print("Epoch {}. Loss: {:.4f}. Test accuracy: {:.4f}".format(
                epoch, total_loss, test_acc))
            writer.add_scalar("test accuracy", test_acc, epoch)

    return model


6. 测试

In [None]:
def test(loader, model, is_validation=False):
    model.eval()

    correct = 0
    for data in loader:
        with torch.no_grad():
            emb, pred = model(data)
            pred = pred.argmax(dim=1)
            label = data.y

        if model.task == 'node':
            mask = data.val_mask if is_validation else data.test_mask
            # node classification: only evaluate on nodes in test set
            pred = pred[mask]
            label = data.y[mask]
            
        correct += pred.eq(label).sum().item()
    
    if model.task == 'graph':
        total = len(loader.dataset) 
    else:
        total = 0
        for data in loader.dataset:
            total += torch.sum(data.test_mask).item()
    return correct / total

7. 训练模型，并使用tensorboardX可视化loss和accuracy

In [None]:
LOG_DIR = './log'
get_ipython().system_raw(
    'tensorboard --logdir {} --host 0.0.0.0 --port 6006 &'
    .format(LOG_DIR)
)
get_ipython().system_raw('./ngrok http 6006 &')
! curl -s http://localhost:4040/api/tunnels | python3 -c \
    "import sys, json; print(json.load(sys.stdin)['tunnels'][0]['public_url'])"

Traceback (most recent call last):
  File "<string>", line 1, in <module>
IndexError: list index out of range


In [None]:
writer = SummaryWriter("./log/" + datetime.now().strftime("%Y%m%d-%H%M%S"))

dataset = TUDataset(root='/tmp/ENZYMES', name='ENZYMES')   # 只能做task = 'graph'任务
# dataset = Flickr(root='/tmp/FLICKR')
# dataset = Reddit2(root='/tmp/Reddit2')
dataset = dataset.shuffle()
task = 'graph'
model = train(dataset, task, writer)

8. 学习出的embeddings可视化

In [None]:
# tsne clustering in 2-d， 该数据只适用于task='graph'类的数据集，其他数据集不适用
color_list = ["red", "orange", "green", "blue", "purple", "brown"]
loader = DataLoader(dataset, batch_size=64, shuffle=True)
embs = []
colors = []
for batch in loader:
    emb, pred = model(batch)
    embs.append(emb)
    colors += [color_list[y] for y in batch.y]
embs = torch.cat(embs, dim=0)

xs, ys = zip(*TSNE().fit_transform(embs.detach().numpy()))
plt.scatter(xs[:600], ys[:600], color=np.squeeze(colors))

In [None]:
%reload_ext tensorboard
%tensorboard --logdir './log'

9. 在不同数据集上测试模型

In [None]:
writer = SummaryWriter("./log" + datetime.now().strftime("%Y%m%d-%H%M%S"))

dataset = Planetoid(root='/tmp/cora', name='cora')
task = 'node'

model = train(dataset, task, writer)


# tsne clustering in 2-d
color_list = ["red", "orange", "green", "blue", "purple", "brown"]
loader = DataLoader(dataset, batch_size=64, shuffle=True)
embs = []
colors = []
for batch in loader:
    emb, pred = model(batch)
    embs.append(emb)
    colors += [color_list[y] for y in batch.y]
embs = torch.cat(embs, dim=0)

xs, ys = zip(*TSNE().fit_transform(embs.detach().numpy()))
plt.scatter(xs, ys, color=colors)