In [96]:
import dgl
import dgl.function as fn
import networkx as nx
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from dgl.data import MiniGCDataset
from dgl.nn.pytorch import *
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import pickle


import os
os.environ["KMP_DUPLICATE_LIB_OK"] = "true"

if torch.cuda.is_available():
    device = torch.device("cuda")
# print(device)

GAT层流程：
1. 参数
in_dim：输入的特征维度
out_dim：输出的特征维度
num_heads：多头注意力机制的头的数量
feat_drop：特征丢弃概率（用于特征的丢弃, 也就是数据在传递过程中按照概率把一部分特征信息丢弃掉，让模型更加稳定而不容易过拟合）
attn_drop：注意力丢弃概率（不同的是它是在正则化注意力机制的时候使用的。通过随机丢弃掉一些注意力信息，正则化模型，避免过拟合）
alpha：激活函数的斜率
edge_softmax：节点到边的 Softmax 函数
agg_activation：节点到节点的聚合激活函数：elu
【feat_drop：一般是设置为 0.6，也就是丢弃掉 60% 的特征信息；attn_drop：一般设置为 0.2；alpha：一般设置为 0.2.】

In [97]:
class GATLayer(nn.Module):
    def __init__(self,
                 in_dim,
                 out_dim,
                 num_heads,
                 feat_drop=0.,
                 attn_drop=0.,
                 alpha=0.2,
                 agg_activation=F.elu):
        super(GATLayer, self).__init__()

        self.num_heads = num_heads
        self.feat_drop = nn.Dropout(feat_drop)
        self.fc = nn.Linear(in_dim, num_heads * out_dim, bias=False)
        print(self.fc)
        self.attn_l = nn.Parameter(torch.Tensor(size=(num_heads, out_dim * 900, 1)))
        self.attn_r = nn.Parameter(torch.Tensor(size=(num_heads, out_dim * 900, 1)))
        self.attn_drop = nn.Dropout(attn_drop)
        self.activation = nn.LeakyReLU(alpha)
        self.softmax = edge_softmax
        self.agg_activation=agg_activation

    def clean_data(self):
        ndata_names = ['h', 'a1', 'a2']
        edata_names = ['a_drop']
        for name in ndata_names:
            self.g.ndata.pop(name)
        for name in edata_names:
            self.g.edata.pop(name)
#         print(ndata_names)

    def edge_attention(self, edges):
        # an edge UDF to compute un-normalized attention values from src and dst
        a = self.activation(edges.src['a1'] + edges.dst['a2'])
        return {'a' : a}

    def edge_softmax(self):
        attention = self.softmax(self.g, self.g.edata.pop('a'))
        # Dropout attention scores and save them
        self.g.edata['a_drop'] = self.attn_drop(attention)
        
        
    def forward(self, feat, bg):
        # prepare, inputs are of shape V x F, V the number of nodes, F the dim of input features
        self.g = bg
        h = self.feat_drop(feat)
        print("h")
        print(h.shape)
        # V x K x F', K number of heads, F' dim of transformed features
        ft = self.fc(h).reshape((h.shape[0], self.num_heads, -1))
        print("self.fc")
        print(self.fc(h).shape)
        print("fc")
        print(ft.shape)
        head_ft = ft.transpose(0, 1)                              # K x V x F'
#         print("222")
        print("head_ft")
        print(head_ft.shape)
        print("attn_l")
        print(self.attn_l.shape)
        a1 = torch.bmm(head_ft, self.attn_l).transpose(0, 1)      # V x K x 1
        print(a1.shape)
        a2 = torch.bmm(head_ft, self.attn_r).transpose(0, 1)      # V x K x 1
        print(a2.shape)
        self.g.ndata.update({'h' : ft, 'a1' : a1, 'a2' : a2})
        print(self.g)
        # 1. compute edge attention
        self.g.apply_edges(self.edge_attention)
        # 2. compute softmax in two parts: exp(x - max(x)) and sum(exp(x - max(x)))
        self.edge_softmax()
        # 2. compute the aggregated node features scaled by the dropped,
        # unnormalized attention values.
        self.g.update_all(fn.u_mul_e('h', 'a_drop', 'h'), fn.sum('h', 'h'))
#         print(self.g)
        # 3. apply normalizer
        ret = self.g.ndata['h']                                  # V x K x F'
#         print(ret.shape)
        ret = ret.flatten(1)

        if self.agg_activation is not None:
            ret = self.agg_activation(ret)

        # Clean ndata and edata
        self.clean_data()
#         print(ret)
        return ret



In [98]:
class GATClassifier(nn.Module):
    def __init__(self, in_dim, hidden_dim, num_heads, n_classes):
        super(GATClassifier, self).__init__()

        self.layers = nn.ModuleList([
#              初始化第一层 GATLayer
            GATLayer(in_dim, hidden_dim, num_heads),
#              初始化第二层 GATLayer
            GATLayer(hidden_dim * num_heads, hidden_dim, num_heads)
        ])
#         分类
        self.classify = nn.Linear(hidden_dim * num_heads, n_classes)

    def forward(self, bg):
        # For undirected graphs, in_degree is the same as out_degree.
#          以 bg 中节点的入度为特征
        h = bg.ndata["h"].float()
#         print(h.shape)
#     两层 GATLayer遍历
        for i, gnn in enumerate(self.layers):
#         将特征的值当作 GATLayer 层的输入
            h = gnn(h, bg)
            print(h)
#     把 GATLayer 层的输出作为新的特征
        bg.ndata['h'] = h
#         print(bg.ndata['h'])
#     通过求均值，得到新特征的图表示
        hg = dgl.mean_nodes(bg, 'h')
        return self.classify(hg)

In [99]:
from dgl.data import DGLDataset
class NTU(DGLDataset):
    def __init__(self, raw_dir=None, force_reload=False, verbose=False):
        super(NTU, self).__init__(name='NTU',
                                          raw_dir=raw_dir,
                                          force_reload=force_reload,
                                          verbose=verbose)

    def process(self):
        src = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,21,22,23,24]
        dst = [1,20,20,2,20,4,5,6,20,8,9,10,0,12,13,14,0,16,17,18,22,7,24,11]
        windows = 5
        for i in range(windows-1):
        #     print(i)
            src_tensor = torch.tensor(src[-24:])
            dst_tensor = torch.tensor(dst[-24:])
            flg = np.random.randint((i+1)*25+2,(i+1)*25+3,25)
            dst+=src[-24:]
            src+=flg.tolist()
        #     print((src_tensor+25).tolist())
            src+=(src_tensor+25).tolist()
            dst+=(dst_tensor+25).tolist()
        
        data_path = r"C:/Users/YU TAO/Desktop/STGAT-main/prepare/ntu_60/ntu_60_new/xsub/val_data_joint.npy"
        label_path = "C:/Users/YU TAO/Desktop/STGAT-main/prepare/ntu_60/ntu_60_new/xsub/val_label.pkl"
        # 处理标签
        with open(label_path, 'rb') as f:
            sample_name, label = pickle.load(f)
        self.label = label
    #     print(label.shape)
        # 处理特征
        ndata = np.load(data_path)
        graphlist = []
        for index, X in enumerate(ndata):
            # 图创建
            g = dgl.graph((src,dst))
            g = dgl.to_bidirected(g)
    #         print(index)
    #         print(X.transpose(2,0,1,3).shape)
            torch_X = torch.from_numpy(X.transpose(2,0,1,3))
            g.ndata['h'] = torch_X
            graphlist.append(g)
        print("done")
#         print(dgl.batch（graphlist）)
        self.graphs = graphlist
#         print(self.graphs)
#         print(self.graphs)
       

#     def save():
#         """save the graph list and the labels"""
#         graphlist, label = load_graph()
#         graph_path = os.path.join("C:/Users/YU TAO/Desktop/STGAT-main/prepare/ntu_60/ntu_60_new/", 'dgl_graph.bin')
#         save_graphs(str(graph_path), graphlist, {'labels': label})

#     def has_cache():
#         graph_path = os.path.join(self.save_path, 'dgl_graph.bin')
#         return os.path.exists(graph_path)

#     def load():
#         graphs, label_dict = load_graph()
#         self.graphs = graphs
#         self.label = label_dict['labels']


    @property
    def num_labels():
        """Number of labels for each graph, i.e. number of prediction tasks."""
        return 60

    def __getitem__(self, idx):
        r""" Get graph and label by index

        Parameters
        ----------
        idx : int
            Item index

        Returns
        -------
        (:class:`dgl.DGLGraph`, Tensor)
        """
        return self.graphs[idx], self.label[idx]


    def __len__(self):
        r"""Number of graphs in the dataset.

        Return
        -------
        int
        """
        return len(self.graphs)

In [100]:
def collate(samples):
    # The input `samples` is a list of pairs
    #  (graph, label). 
#     print(samples)
    graphs, labels = map(list, zip(*samples))
#     print(graphs)
    batched_graph = dgl.batch(graphs)
    return batched_graph, torch.tensor(labels)

In [101]:
from dgl.dataloading import GraphDataLoader
dataset = NTU()

# data_loader = DataLoader(trainset, batch_size=32, shuffle=True, collate_fn=collate)
dataloader = GraphDataLoader(dataset, batch_size=1, shuffle=True, collate_fn=collate)
# for iter, (bg, label) in enumerate(dataloader):
#     print(bg)
#     print(label)
model = GATClassifier(2, 16, 8, 60)
# print(model)
model.to(device)
loss_func = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
model.train()

epoch_losses = []
for epoch in range(10):
    epoch_loss = 0
    correct = 0
    for iter, (bg, label) in enumerate(dataloader):
       
        bg = bg.to(device)
        label = label.to(device)
#         print(bg)
        prediction = model(bg)
#         print(prediction)
        prediction = prediction.to(device)
#         print(label)
        loss = loss_func(prediction, label)
#         print(loss)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        epoch_loss += loss.detach().item()
        
        prediction = torch.softmax(model(bg), 1)
        prediction = torch.multinomial(prediction, 1)
#         print((prediction,label))
        correct += (prediction == label.view(-1, 1)).sum().item()
#         print(correct)
#     print(iter)
    acc = correct / ((iter + 1)*32)
    epoch_loss /= (iter + 1)
    print('Epoch {}, loss {:.4f}, Acc {:.4f}'.format(epoch, epoch_loss, acc))
    epoch_losses.append(epoch_loss)

# model.eval()
# print(next(model.parameters()).is_cuda)


# # Convert a list of tuples to two lists
# test_X, test_Y = map(list, zip(*testset))
# test_bg = dgl.batch(test_X).to(device)
# # print(test_bg.device)
# test_Y = torch.tensor(test_Y).float().view(-1, 1).to(device)
# probs_Y = torch.softmax(model(test_bg), 1).to(device)
# # sampled_Y可能通过比argmax_Y预测更多的不同特征来估计类的潜在分布，而argmax_Y预测只考虑最大概率的类别

# # torch.multinomial函数是从一组概率值中获取某一索引的函数。第一个参数probs_Y是一个概率矩阵，第二个参数1表示只从概率矩阵中抽取一次，所以结果中会返回一个索引。
# sampled_Y = torch.multinomial(probs_Y, 1)
# # torch.max可以用来在一个张量中查找某个数值或者向量的最大值，
# # 本例中的torch.max(probs_Y, 1)用于在probs_Y的每一行（即dim=1）中查找最大值，返回一个元组，元组的第一个元素为probs_Y每行的最大值，第二个元素为最大值的索引（即样本的预测类别）。
# # [1].view(-1, 1)则用于将索引变换为与输入的Y样本同样的张量形状，即将其变换为列向量。
# argmax_Y = torch.max(probs_Y, 1)[1].view(-1, 1).to(device)
# print('Accuracy of sampled predictions on the test set: {:.4f}%'.format(
#     (test_Y == sampled_Y.float()).sum().item() / len(test_Y) * 100))
# print('Accuracy of argmax predictions on the test set: {:4f}%'.format(
#     (test_Y == argmax_Y.float()).sum().item() / len(test_Y) * 100))

# for graph, label in dataloader:
#     print(graph)
    
# 图创建
# g = dgl.DGLGraph((src,dst))
# g = dgl.to_bidirected(g)
# 无向图创建
# graph = g.to_networkx().to_undirected()
# 节点分布的方式，将各个节点的坐标通过字典的方式存储
# pos = nx.kamada_kawai_layout(graph, center=[2,20])
# pos[3]=[2,20.5]
# pos[2]=[2.01,20.3]
# pos[20]=[2.01,20.1]
# pos[1]=[2.01,19.9]
# pos[0]=[2.01,19.7]
# print(pos)
# options = {"edgecolors": "tab:grey", "node_size": 200, "alpha": 0.8, "font_color":"whitesmoke", "font_size":6, "width":1}
# nx.draw(graph, pos, with_labels=True, node_color="tab:blue",**options)
# plt.show()
# x = torch.randn(3, 25, 128, 2)
# print(x.shape)
# 维度交换
# print(x.permute(1,0,2,3).shape)
# g.ndata['f'] = x.permute(1,0,2,3)
# print(g)

DGLError: [01:43:32] C:\Users\Administrator\dgl-0.5\src\graph\unit_graph.cc:71: Check failed: src->shape[0] == dst->shape[0] (220 vs. 216) : Input arrays should have the same length.

collate用于将不同大小的数据集合并为统一格式（将图数据和标签数据分成两个list），它主要用于打包可变大小的输入以构建小批次

In [None]:
def collate(samples):
    # The input `samples` is a list of pairs
    #  (graph, label).
#     print(samples)
    graphs, labels = map(list, zip(*samples))
    batched_graph = dgl.batch(graphs)
#     print(batched_graph)
    return batched_graph, torch.tensor(labels)

In [None]:
# Create training and test sets.
# dgl.data.MiniGCDataset(num_graphs, min_num_v, max_num_v, seed=0, save_graph=True, force_reload=False, verbose=False, transform=None)
trainset = MiniGCDataset(320, 10, 20)
testset = MiniGCDataset(80, 10, 20)

# print(trainset)

# Use PyTorch's DataLoader and the collate function
# defined before.
data_loader = DataLoader(trainset, batch_size=32, shuffle=True,
                         collate_fn=collate)

# print(data_loader)
# fn参数：def __init__(self, in_dim, hidden_dim, num_heads, n_classes):
model = GATClassifier(1, 32, 8, trainset.num_classes)
# print(model)
model.to(device)
loss_func = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
model.train()

epoch_losses = []
for epoch in range(100):
    epoch_loss = 0
    correct = 0
    for iter, (bg, label) in enumerate(data_loader):
       
        bg = bg.to(device)
        label = label.to(device)
#         print(bg.shape)
        prediction = model(bg)
        prediction = prediction.to(device)
#         print(prediction)
#         print(label)
        loss = loss_func(prediction, label)
#         print(loss)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        epoch_loss += loss.detach().item()
        
        prediction = torch.softmax(model(bg), 1)
        prediction = torch.multinomial(prediction, 1)
#         print((prediction,label))
        correct += (prediction == label.view(-1, 1)).sum().item()
#         print(correct)
#     print(iter)
    acc = correct / ((iter + 1)*32)
    epoch_loss /= (iter + 1)
    print('Epoch {}, loss {:.4f}, Acc {:.4f}'.format(epoch, epoch_loss, acc))
    epoch_losses.append(epoch_loss)

model.eval()
# print(next(model.parameters()).is_cuda)


# Convert a list of tuples to two lists
test_X, test_Y = map(list, zip(*testset))
test_bg = dgl.batch(test_X).to(device)
# print(test_bg.device)
test_Y = torch.tensor(test_Y).float().view(-1, 1).to(device)
probs_Y = torch.softmax(model(test_bg), 1).to(device)
# sampled_Y可能通过比argmax_Y预测更多的不同特征来估计类的潜在分布，而argmax_Y预测只考虑最大概率的类别

# torch.multinomial函数是从一组概率值中获取某一索引的函数。第一个参数probs_Y是一个概率矩阵，第二个参数1表示只从概率矩阵中抽取一次，所以结果中会返回一个索引。
sampled_Y = torch.multinomial(probs_Y, 1)
# torch.max可以用来在一个张量中查找某个数值或者向量的最大值，
# 本例中的torch.max(probs_Y, 1)用于在probs_Y的每一行（即dim=1）中查找最大值，返回一个元组，元组的第一个元素为probs_Y每行的最大值，第二个元素为最大值的索引（即样本的预测类别）。
# [1].view(-1, 1)则用于将索引变换为与输入的Y样本同样的张量形状，即将其变换为列向量。
argmax_Y = torch.max(probs_Y, 1)[1].view(-1, 1).to(device)
print('Accuracy of sampled predictions on the test set: {:.4f}%'.format(
    (test_Y == sampled_Y.float()).sum().item() / len(test_Y) * 100))
print('Accuracy of argmax predictions on the test set: {:4f}%'.format(
    (test_Y == argmax_Y.float()).sum().item() / len(test_Y) * 100))