# 图分类

流程：每个图进行消息传递，更新节点和边特征 -> 每张图都进行边和节点的聚合，形成一个表示 -> 放入分类层（需根据任务自己设计）

图分类通常按批次进行训练

# 图读出

每个图尽可能多的汇聚信息，常用方法：所有节点/边特征求和、求平均值、取最大、取最小……  

$$  
h_g = 1/n (h_1 + h_2 + h_3 + ... + h_n )
$$

DGL内置了常见的图读出函数，例如 dgl.mean_nodes() 就实现了上述的平均值读出计算。在得到图的特征hg后，用户可将其传给一个多层感知机(MLP)来获得分类输出。

In [18]:
import dgl
import torch

g1 = dgl.graph(([0, 1], [1, 0]))
g1.ndata['h'] = torch.tensor([1., 2.])
g2 = dgl.graph(([0, 1], [1, 2]))
g2.ndata['h'] = torch.tensor([1., 2., 3.])

dgl.readout_nodes(g1, 'h')
# tensor([3.])  # 1 + 2

bg = dgl.batch([g1, g2])
dgl.readout_nodes(bg, 'h')
# tensor([3., 6.])  # [1 + 2, 1 + 2 + 3]

tensor([3., 6.])

# 同构图完整过程



In [19]:
import dgl.nn.pytorch as dglnn
import torch.nn as nn

class Classifier(nn.Module):
    def __init__(self, in_dim, hidden_dim, n_classes):
        super(Classifier, self).__init__()
        self.conv1 = dglnn.GraphConv(in_dim, hidden_dim)
        self.conv2 = dglnn.GraphConv(hidden_dim, hidden_dim)
        self.classify = nn.Linear(hidden_dim, n_classes)

    def forward(self, g, h):
        # 应用图卷积和激活函数
        h = F.relu(self.conv1(g, h))
        h = F.relu(self.conv2(g, h))
        with g.local_scope():
            g.ndata['h'] = h
            # 使用平均读出计算图表示
            hg = dgl.mean_nodes(g, 'h')
            return self.classify(hg)

In [20]:
import dgl.data
dataset = dgl.data.GINDataset('MUTAG', False)

from dgl.dataloading import GraphDataLoader
dataloader = GraphDataLoader(
    dataset,
    batch_size=1024,
    drop_last=False,
    shuffle=True)

In [21]:
import torch.nn.functional as F

# 这仅是个例子，特征尺寸是7
model = Classifier(7, 20, 5)
opt = torch.optim.Adam(model.parameters())
for epoch in range(20):
    for batched_graph, labels in dataloader:
        feats = batched_graph.ndata['attr']
        logits = model(batched_graph, feats)
        loss = F.cross_entropy(logits, labels)
        opt.zero_grad()
        loss.backward()
        opt.step()
        print("{:.1f}:  损失{:.4f}".format(epoch, loss.item()))

0.0:  损失1.7097
1.0:  损失1.7047
2.0:  损失1.6999
3.0:  损失1.6950
4.0:  损失1.6902
5.0:  损失1.6855
6.0:  损失1.6808
7.0:  损失1.6762
8.0:  损失1.6716
9.0:  损失1.6670
10.0:  损失1.6625
11.0:  损失1.6582
12.0:  损失1.6538
13.0:  损失1.6495
14.0:  损失1.6452
15.0:  损失1.6410
16.0:  损失1.6368
17.0:  损失1.6327
18.0:  损失1.6284
19.0:  损失1.6241


# 异构图:在一张图上需要把不同类别的节点都聚合

区别：
（1）使用的GNN层的类型不太一样  
（2）在分类模型forward()时，需要把每种节点的平均加一起

In [None]:
class RGCN(nn.Module):
    def __init__(self, in_feats, hid_feats, out_feats, rel_names):
        super().__init__()
        self.conv1 = dglnn.HeteroGraphConv({
            rel: dglnn.GraphConv(in_feats, hid_feats)
            for rel in rel_names}, aggregate='sum')
        self.conv2 = dglnn.HeteroGraphConv({
            rel: dglnn.GraphConv(hid_feats, out_feats)
            for rel in rel_names}, aggregate='sum')

    def forward(self, graph, inputs):
        # inputs是节点的特征
        h = self.conv1(graph, inputs)
        h = {k: F.relu(v) for k, v in h.items()}
        h = self.conv2(graph, h)
        return h

class HeteroClassifier(nn.Module):
    def __init__(self, in_dim, hidden_dim, n_classes, rel_names):
        super().__init__()
        self.rgcn = RGCN(in_dim, hidden_dim, hidden_dim, rel_names)
        self.classify = nn.Linear(hidden_dim, n_classes)

    def forward(self, g):
        h = g.ndata['feat']
        h = self.rgcn(g, h)
        with g.local_scope():
            g.ndata['h'] = h
            # 通过平均读出值来计算单图的表征
            hg = 0
            for ntype in g.ntypes:
                hg = hg + dgl.mean_nodes(g, 'h', ntype=ntype)
            return self.classify(hg)