class Data:图的表示基类

x (Tensor, optional): 节点属性矩阵，大小为`[num_nodes, num_node_features]`

edge_index (LongTensor, optional): 边索引矩阵，大小为`[2, num_edges]`

edge_attr (Tensor, optional): 边属性矩阵，大小为`[num_edges, num_edge_features]`

y (Tensor, optional): 节点或边或图的标签，任意大小

**kwargs 可以接收一个任意大小的字典，也就是还可以加图里面的其他属性

```
for key, item in kwargs.items():
    if key == 'num_nodes':
        self.__num_nodes__ = item
    else:
        self[key] = item
```

ctor:  
def __init__(self, x=None, edge_index=None, edge_attr=None, y=None, **kwargs):

In [4]:
import torch
from torch_geometric.data import Data

edge_index = torch.tensor([[0, 1, 1, 2],
                            [1, 0, 2, 1]], dtype=torch.long)

x = torch.tensor([[-1], [0], [1]], dtype=torch.float)

# 点 边
data = Data(x=x, edge_index=edge_index)

# 输出形状
print(data) 

Data(x=[3, 1], edge_index=[2, 4])


In [15]:
import torch
from torch_geometric.nn import MessagePassing
from torch_geometric.utils import add_self_loops, degree

from torch_geometric.datasets import Planetoid

class GCNConv(MessagePassing):

    def __init__(self, in_channels, out_channels):
        """
        初始化: 这里在基类的基础上定义了一个Linear层
        :param in_channels: 输入特征个数
        :param out_channels: 输出特征个数,Linear的神经单元数量
        """
        super(GCNConv, self).__init__(aggr='add', flow='source_to_target')
        self.lin = torch.nn.Linear(in_channels, out_channels)

    def forward(self, x, edge_index):
        """
        前向传播逻辑
        :param x: 节点表示矩阵， [N, in_channels], N个节点, in_channles个特征
        :param edge_index: 边集合
        """
        # 1. 增加自环
        edge_index, _ = add_self_loops(edge_index, num_nodes=x.size(0))  
        # _那个是权重，这里无权，所以不要这个
        
        # 2. 特征矩阵的线性变换
        x = self.lin(x)   # [N, out_channels]

        # 3. 计算归一化系数
        source, target = edge_index
        # 这个函数在计算每个节点的度
        deg = degree(target, x.size(0), dtype=x.dtype)  

        #print(deg)         # 每个节点的度  [4., 4., 6.,  ..., 2., 5., 5.]
        #print(deg.shape)   # 2708
        
        deg_inv_sqrt = deg.pow(-0.5)
        norm = deg_inv_sqrt[source] * deg_inv_sqrt[target]   # [E]
        print("norm:")
        print(len(norm))

        # 4-5 开始前向转播
        return self.propagate(edge_index, x=x, norm=norm)

    def message(self, x_j, norm):
        """
        :param x_j: [E, out_channels]
        :param norm: 归一化的节点特征
        """
        return norm.view(-1, 1) * x_j   # [E, out_channels], 这里是经历过广播

In [16]:
if __name__ == '__main__':
    dataset = Planetoid(root='/tmp/Cora', name='Cora')
    data = dataset[0]

    net = GCNConv(data.num_features, 64)
    h_nodes = net(data.x, data.edge_index)   
    # x [2708, 1433], data.edge_index [2, 10556]
    print(h_nodes.shape)   # [2708, 64]

norm:
13264
torch.Size([2708, 64])


# 快速检查图的性质

In [7]:
print(data.has_isolated_nodes())

print(data.has_self_loops())

print(data.is_directed())

False
False
False


# 访问数据集

In [8]:
from torch_geometric.datasets import TUDataset

dataset = TUDataset(root='/tmp/ENZYMES', name='ENZYMES')

data = dataset[0]
print(data)


Data(edge_index=[2, 168], x=[37, 3], y=[1])


# 切片

In [9]:
train_dataset = dataset[:540]
print(train_dataset)

test_dataset = dataset[540:]

ENZYMES(540)


# Cora数据集

In [10]:
from torch_geometric.datasets import Planetoid
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv

dataset = Planetoid(root='/tmp/Cora', name='Cora')

print(dataset.num_node_features)

class GCN(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = GCNConv(dataset.num_node_features, 16)
        self.conv2 = GCNConv(16, dataset.num_classes)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        x = self.conv2(x, edge_index)

        return F.log_softmax(x, dim=1)

1433


In [11]:
device = torch.device('cpu')
model = GCN().to(device)
data = dataset[0].to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)

model.train()
for epoch in range(200):
    optimizer.zero_grad()
    out = model(data)
    loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
    loss.backward()
    optimizer.step()

In [12]:
model.eval()
pred = model(data).argmax(dim=1)

correct = (pred[data.test_mask] == data.y[data.test_mask]).sum()
acc = int(correct) / int(data.test_mask.sum())
print(f'Accuracy: {acc:.4f}')

Accuracy: 0.7890
