## 1.设定形参

In [61]:
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('--time_steps', type=int, nargs='?', default=16,
                        help="total time steps used for train, eval and test")
    # Experimental settings.
parser.add_argument('--dataset', type=str, nargs='?', default='Enron',
                        help='dataset name')

parser.add_argument('--epochs', type=int, nargs='?', default=200,
                        help='# epochs')
parser.add_argument('--val_freq', type=int, nargs='?', default=1,
                        help='Validation frequency (in epochs)')
parser.add_argument('--test_freq', type=int, nargs='?', default=1,
                        help='Testing frequency (in epochs)')
parser.add_argument('--batch_size', type=int, nargs='?', default=512,
                        help='Batch size (# nodes)')
parser.add_argument('--featureless', type=bool, nargs='?', default=True,
                    help='True if one-hot encoding.')
parser.add_argument("--early_stop", type=int, default=10,
                        help="patient")
    # 1-hot encoding is input as a sparse matrix - hence no scalability issue for large datasets.
    # Tunable hyper-params
    # TODO: Implementation has not been verified, performance may not be good.
parser.add_argument('--residual', type=bool, nargs='?', default=True,
                        help='Use residual')
    # Number of negative samples per positive pair.
parser.add_argument('--neg_sample_size', type=int, nargs='?', default=10,
                        help='# negative samples per positive')
    # Walk length for random walk sampling.
parser.add_argument('--walk_len', type=int, nargs='?', default=20,
                        help='Walk length for random walk sampling')
    # Weight for negative samples in the binary cross-entropy loss function.
parser.add_argument('--neg_weight', type=float, nargs='?', default=1.0,
                        help='Weightage for negative samples')
parser.add_argument('--learning_rate', type=float, nargs='?', default=0.01,
                        help='Initial learning rate for self-attention model.')
parser.add_argument('--spatial_drop', type=float, nargs='?', default=0.1,
                        help='Spatial (structural) attention Dropout (1 - keep probability).')
parser.add_argument('--temporal_drop', type=float, nargs='?', default=0.5,
                        help='Temporal attention Dropout (1 - keep probability).')
parser.add_argument('--weight_decay', type=float, nargs='?', default=0.0005,
                        help='Initial learning rate for self-attention model.')
    # Architecture params
parser.add_argument('--structural_head_config', type=str, nargs='?', default='16,8,8',
                        help='Encoder layer config: # attention heads in each GAT layer')
parser.add_argument('--structural_layer_config', type=str, nargs='?', default='128',
                        help='Encoder layer config: # units in each GAT layer')
parser.add_argument('--temporal_head_config', type=str, nargs='?', default='16',
                        help='Encoder layer config: # attention heads in each Temporal layer')
parser.add_argument('--temporal_layer_config', type=str, nargs='?', default='128',
                        help='Encoder layer config: # units in each Temporal layer')
parser.add_argument('--position_ffn', type=str, nargs='?', default='True',
                        help='Position wise feedforward')
parser.add_argument('--window', type=int, nargs='?', default=-1,
                        help='Window for temporal attention (default : -1 => full)')
args, _ = parser.parse_known_args()

## 2.导入数据

In [62]:
import networkx as nx
import pickle as pkl
def load_graphs(dataset_str):
    with open("../data/{}/{}".format(dataset_str, "graph.pkl"), "rb") as f:
        graphs = pkl.load(f)
    print("Loaded {} graphs ".format(len(graphs)))
    adjs = [nx.adjacency_matrix(g) for g in graphs]
    return graphs, adjs

In [63]:
graphs, adjs = load_graphs(args.dataset)
print(len(graphs))

Loaded 16 graphs 
16


  graphs = pkl.load(f)


## 3. 没有节点特征的话使用独热编码创建特征

In [64]:
import scipy
if args.featureless == True:
##创建单位阵 ## 最后一个时间点包括的节点数量并对其进行独热编码从而得到节点特征矩阵
    feats = [scipy.sparse.identity(adjs[args.time_steps - 1].shape[0]).tocsr()[range(0, x.shape[0]), :] for x in adjs if
            x.shape[0] <= adjs[args.time_steps - 1].shape[0]]

##查看每个图的节点信息和节点特征维度
for x in range(len(feats)):
    print(f'第{len(feats[:x+1])}个图，包含{feats[x].shape[0]}个节点,节点特征维度{feats[x].shape[1]}')

assert args.time_steps <= len(adjs), "Time steps is illegal"

第1个图，包含18个节点,节点特征维度143
第2个图，包含23个节点,节点特征维度143
第3个图，包含24个节点,节点特征维度143
第4个图，包含50个节点,节点特征维度143
第5个图，包含66个节点,节点特征维度143
第6个图，包含79个节点,节点特征维度143
第7个图，包含98个节点,节点特征维度143
第8个图，包含110个节点,节点特征维度143
第9个图，包含117个节点,节点特征维度143
第10个图，包含125个节点,节点特征维度143
第11个图，包含131个节点,节点特征维度143
第12个图，包含135个节点,节点特征维度143
第13个图，包含137个节点,节点特征维度143
第14个图，包含138个节点,节点特征维度143
第15个图，包含141个节点,节点特征维度143
第16个图，包含143个节点,节点特征维度143


## 4. 在每个图上进行随机游走

In [65]:
def get_context_pairs(graphs, adjs):
   ##遍历每个时间步骤的图
    context_pairs_train = []
   ## 遍历所有时间快照生成的图，从而每个静态图进行随机游走采样4-1 context_pairs_train
    for i in range(len(graphs)):
        context_pairs_train.append(run_random_walks_n2v(graphs[i], adjs[i], num_walks=10, walk_len=20))

    return context_pairs_train

## 4-1 run_random_walks_n2v

In [66]:
##4-1 随机游走函数
def run_random_walks_n2v(graph, adj, num_walks, walk_len):
    ## 建立一个空图
    nx_G = nx.Graph()
    
    ##加入的是有边连接的节点
    for e in graph.edges():
        nx_G.add_edge(e[0], e[1])
        
    ##将边的权重也加入也就是连接的次数    
    for edge in graph.edges():
        nx_G[edge[0]][edge[1]]['weight'] = adj[edge[0], edge[1]]

    ## 实例化Graph_RandomWalk该类，在每张经过重构为nx_G的时间快照图上进行随机游走
    G = Graph_RandomWalk(nx_G, False, 1.0, 1.0)
    
    ## 4-1-1 preprocess_transition_probs 随机游走概率计算 
    G.preprocess_transition_probs()
    
    ## 4-1-2 simulate_walks 正式进行随机游走-得到随机采样序列 
    walks = G.simulate_walks(num_walks, walk_len)

## 4-1-3 通过滑动窗口生成节点对
    WINDOW_SIZE = 10
    pairs = defaultdict(list)
    pairs_cnt = 0
    for walk in walks:
        for word_index, word in enumerate(walk):
            for nb_word in walk[max(word_index - WINDOW_SIZE, 0): min(word_index + WINDOW_SIZE, len(walk)) + 1]:
                if nb_word != word:
                    pairs[word].append(nb_word)
                    pairs_cnt += 1
    print("# nodes with random walk samples: {}".format(len(pairs)))
    print("# sampled pairs: {}".format(pairs_cnt))
    return pairs

In [67]:
## 定义图上的随机游走
class Graph_RandomWalk():
    def __init__(self, nx_G, is_directed, p, q):
        self.G = nx_G
        self.is_directed = is_directed
        self.p = p
        self.q = q

    ##4-1-1 preprocess_transition_probs 随机游走概率计算    
    def preprocess_transition_probs(self):
        G = self.G
        is_directed = self.is_directed
        alias_nodes = {}
    ##遍历图中所有节点
        for node in G.nodes():
    ##求邻居节点所对应的边的权重
            unnormalized_probs = [G[node][nbr]['weight'] for nbr in sorted(G.neighbors(node))]
    ##将权重进行归一化得出不同邻居节点被采样的概率
            norm_const = sum(unnormalized_probs)
            normalized_probs =  [float(u_prob)/norm_const for u_prob in unnormalized_probs]
    ##4-1-1-1 alias_setup使用alias方法进行采样
            alias_nodes[node] = alias_setup(normalized_probs)
        alias_edges = {}
        triads = {}
    ##4-1-1-2 get_alias_edge根据是否是有向图来进行边的构建（这里是false）
        if is_directed:
            for edge in G.edges():
                alias_edges[edge] = self.get_alias_edge(edge[0], edge[1])
        else:
            for edge in G.edges():
                alias_edges[edge] = self.get_alias_edge(edge[0], edge[1])
                alias_edges[(edge[1], edge[0])] = self.get_alias_edge(edge[1], edge[0])
        ##节点到下一个节点的概率
        self.alias_nodes = alias_nodes
        ## node2vec采样概率
        self.alias_edges = alias_edges
        return
    
    
    ## 4-1-2 simulate_walks 正式进行随机游走-得到随机采样序列 
    def simulate_walks(self, num_walks, walk_length):
        '''
        Repeatedly simulate random walks from each node.
        '''
        G = self.G
        walks = []
        nodes = list(G.nodes())
        ##打乱节点，次数为随机游走的次数
        for walk_iter in range(num_walks):
            random.shuffle(nodes)
        ##遍历节点的每次随机游走路径
            for node in nodes:
        ## 4-1-2-1 随机游走策略 node2vec_walk
                walks.append(self.node2vec_walk(walk_length=walk_length, start_node=node))

        return walks
    
    ## 4-1-2-1  单个节点进行随机游走生成walk 
    def node2vec_walk(self, walk_length, start_node):
        '''
        Simulate a random walk starting from start node.
        '''
        G = self.G
        alias_nodes = self.alias_nodes
        alias_edges = self.alias_edges

        walk = [start_node]
        
        ##游走长度20
        while len(walk) < walk_length:
            cur = walk[-1]
            cur_nbrs = sorted(G.neighbors(cur))
            if len(cur_nbrs) > 0:
                ## 初始节点
                if len(walk) == 1:
                    ##alias_draw 
                    walk.append(cur_nbrs[alias_draw(alias_nodes[cur][0], alias_nodes[cur][1])])
                
                ##之后的节点
                else:
                    prev = walk[-2]
                    ## node2vec采样
                    next = cur_nbrs[alias_draw(alias_edges[(prev, cur)][0],
                        alias_edges[(prev, cur)][1])]
                    walk.append(next)
            else:
                break
        return walk
    

##4-1-1-2 get_alias_edge
    def get_alias_edge(self, src, dst):
        '''
        Get the alias edge setup lists for a given edge.
        '''
        G = self.G
        p = self.p
        q = self.q

        unnormalized_probs = []
        
    ##遍历终止节点的邻居节点
        for dst_nbr in sorted(G.neighbors(dst)):
        ##如果终止节点的邻居节点等于初始节点-往回走
            if dst_nbr == src:
            ##其概率等于其权重乘1/p
                unnormalized_probs.append(G[dst][dst_nbr]['weight']/p)
        ##如果终止节点的邻居节点有1个边连接
            elif G.has_edge(dst_nbr, src):
            ##其概率为权重乘1
                unnormalized_probs.append(G[dst][dst_nbr]['weight'])
        ##如果是初始节点的多阶邻居节点
            else:
            ##其概率等于其权重乘1/q
                unnormalized_probs.append(G[dst][dst_nbr]['weight']/q)
        ## 对其进行归一化        
        norm_const = sum(unnormalized_probs)
        normalized_probs =  [float(u_prob)/norm_const for u_prob in unnormalized_probs]
        
        ##alias_setup 使用alias采样方式
        return alias_setup(normalized_probs)

    



In [68]:
## 4-1-1-1  alias_setup 构建alia的随机采样策略
import numpy as np
import random
from collections import defaultdict
def alias_setup(probs):
    ##多少个邻居节点可选择
    K = len(probs)
    ##每个邻居节点被直接抽中的概率
    q = np.zeros(K)
    ##为那些原本概率不足以直接被抽中的结果提供了一个替代邻居节点
    J = np.zeros(K, dtype=int)
    smaller = []
    larger = []
    ##kk为邻居节点，prob为被选择的原始概率面积
    for kk, prob in enumerate(probs):
     ##扩大关系：原始概率面积乘K
        q[kk] = K*prob
        ##判断大于1即需要切割用larger去收集，小于1需要去填补的用smaller去收集。
        if q[kk] < 1.0:
            smaller.append(kk)
        else:
            larger.append(kk)  
    ## 循环执行，直到 smaller 或 larger 其中一个为空
    while len(smaller) > 0 and len(larger) > 0:
        ## 从 smaller 中弹出一个元素，称为 small。
        small = smaller.pop()
        ## 从 larger 中弹出一个元素，称为 large。
        large = larger.pop()
        ## 这建立了 small 的“别名”。‘别名’可以简单理解为small选不到后的备选选项。
        J[small] = large
        ## 更新 q[large]，并根据其新值重新分配到 smaller 或 larger
        q[large] = q[large] + q[small] - 1.0
        if q[large] < 1.0:
            smaller.append(large)
        else:
            larger.append(large)
    return J, q

## 4-1-2-1-1 alias_draw alias采样概率
def alias_draw(J, q):
    '''
    Draw sample from a non-uniform discrete distribution using alias sampling.
    '''
    ## 邻居节点个数
    K = len(J)
    ## 1~K采样 
    kk = int(np.floor(np.random.rand()*K))
    ##0~1生成随机数
    if np.random.rand() < q[kk]:
        return kk
    else:
        return J[kk]

In [69]:
## 实操获得随机游走的上下文节点
context_pairs_train = get_context_pairs(graphs, adjs)

# nodes with random walk samples: 18
# sampled pairs: 40894
# nodes with random walk samples: 18
# sampled pairs: 39116
# nodes with random walk samples: 14
# sampled pairs: 31992
# nodes with random walk samples: 47
# sampled pairs: 103876
# nodes with random walk samples: 57
# sampled pairs: 129048
# nodes with random walk samples: 65
# sampled pairs: 149074
# nodes with random walk samples: 79
# sampled pairs: 194682
# nodes with random walk samples: 97
# sampled pairs: 240298
# nodes with random walk samples: 101
# sampled pairs: 246332
# nodes with random walk samples: 106
# sampled pairs: 256254
# nodes with random walk samples: 103
# sampled pairs: 253480
# nodes with random walk samples: 113
# sampled pairs: 280986
# nodes with random walk samples: 98
# sampled pairs: 232264
# nodes with random walk samples: 79
# sampled pairs: 181354
# nodes with random walk samples: 94
# sampled pairs: 233352
# nodes with random walk samples: 93
# sampled pairs: 227608


## 任务：预测最后一张图

## 5. 获取用于预测的数据

In [70]:
def get_evaluation_data(graphs):
##获取倒数第二张图
    eval_idx = len(graphs) - 2
## 训练语料
    eval_graph = graphs[eval_idx]
## 测试集定为下一张图
    next_graph = graphs[eval_idx+1]

    train_edges, train_edges_false, val_edges, val_edges_false, test_edges, test_edges_false = \
        create_data_splits(eval_graph, next_graph, val_mask_fraction=0.2, 
                            test_mask_fraction=0.6)

    return train_edges, train_edges_false, val_edges, val_edges_false, test_edges, test_edges_false


In [71]:
## 划分数据集
from sklearn.model_selection import train_test_split

def create_data_splits(graph, next_graph, val_mask_fraction=0.2, test_mask_fraction=0.6):
    
##已numpyarray的方式得到下一张图的所有边
    edges_next = np.array(list(nx.Graph(next_graph).edges()))
## 得出下一张图和本张图正关系的边
    edges_positive = [] 
##遍历下一张图的边
    for e in edges_next:
    ##如果是本张图包含下张图边连接的两个节点，将该边添加到正关系边中
        if graph.has_node(e[0]) and graph.has_node(e[1]):
            edges_positive.append(e)
    edges_positive = np.array(edges_positive) 
##进行负采样
    edges_negative = negative_sample(edges_positive, graph.number_of_nodes(), next_graph)
    
## 将正采样边和负采样边 划分训练集，测试集，验证集
    train_edges_pos, test_pos, train_edges_neg, test_neg = train_test_split(edges_positive, 
            edges_negative, test_size=val_mask_fraction+test_mask_fraction)

    val_edges_pos, test_edges_pos, val_edges_neg, test_edges_neg = train_test_split(test_pos, 
            test_neg, test_size=test_mask_fraction/(test_mask_fraction+val_mask_fraction))

    return train_edges_pos, train_edges_neg, val_edges_pos, val_edges_neg, test_edges_pos, test_edges_neg
            

In [72]:
##负采样
def negative_sample(edges_pos, nodes_num, next_graph):
    edges_neg = []
    ##采样和正关系的边相同数量的负关系的边
    while len(edges_neg) < len(edges_pos):
       ##随机选取节点
        idx_i = np.random.randint(0, nodes_num)
        idx_j = np.random.randint(0, nodes_num)
        ##自连接不加入
        if idx_i == idx_j:
            continue
        ##采样到下一张图的正关系边也不加入
        if next_graph.has_edge(idx_i, idx_j) or next_graph.has_edge(idx_j, idx_i):
            continue
        ## 重复出现的负采样的边也不加入
        if edges_neg:
            if [idx_i, idx_j] in edges_neg or [idx_j, idx_i] in edges_neg:
                continue
        edges_neg.append([idx_i, idx_j])
    return edges_neg

In [73]:
## 分割出训练集，测试集，验证集
train_edges_pos, train_edges_neg, val_edges_pos, val_edges_neg, \
    test_edges_pos, test_edges_neg = get_evaluation_data(graphs)
print("No. Train: Pos={}, Neg={} \nNo. Val: Pos={}, Neg={} \nNo. Test: Pos={}, Neg={}".format(
len(train_edges_pos), len(train_edges_neg), len(val_edges_pos), len(val_edges_neg),
len(test_edges_pos), len(test_edges_neg)))

No. Train: Pos=46, Neg=46 
No. Val: Pos=46, Neg=46 
No. Test: Pos=140, Neg=140


## 6. 建立归纳推导图

In [74]:
## 建立归纳推导图，将former图的边和later图的节点加入到归纳推导图中
def inductive_graph(graph_former, graph_later):
    newG = nx.MultiGraph()
    newG.add_nodes_from(graph_later.nodes(data=True))
    newG.add_edges_from(graph_former.edges(data=False))
    return newG

In [75]:
## 将最后一张图替换为归纳推导图
new_G = inductive_graph(graphs[args.time_steps-2], graphs[args.time_steps-1])
graphs[args.time_steps-1] = new_G
adjs[args.time_steps-1] = nx.adjacency_matrix(new_G)

## 7. 建立数据集

In [76]:
from torch_geometric.data import Data
from utilities import fixed_unigram_candidate_sampler
import torch_geometric as tg
import scipy.sparse as sp
import torch
from torch.utils.data import Dataset

class MyDataset(Dataset):
    def __init__(self, args, graphs, features, adjs,  context_pairs):
        super(MyDataset, self).__init__()
        self.args = args
        ## 所有时刻的图
        self.graphs = graphs
        ## 7-1 _preprocess_features特征预处理—— 16个图和16个图的图特征-进行归一化操作
        self.features = [self._preprocess_features(feat) for feat in features]
        ## 7-2 _normalize_graph_gcn标准化邻接矩阵邻接矩阵归一化
        self.adjs = [self._normalize_graph_gcn(a)  for a  in adjs]
        ##时间步
        self.time_steps = args.time_steps
        ##随机游走序列
        self.context_pairs = context_pairs
        ##负采样数量
        self.max_positive = args.neg_sample_size
        ##提取最后一个时间快照下的所有节点作为训练节点
        self.train_nodes = list(self.graphs[self.time_steps-1].nodes()) 
        ##最小时间步
        self.min_t = max(self.time_steps - self.args.window - 1, 0) if args.window > 0 else 0
        ##7-3 construct_degs计算每个时间步中节点的度
        self.degs = self.construct_degs()
        ##7-4 _build_pyg_graphs  定义data loader
        self.pyg_graphs = self._build_pyg_graphs()
        ##7-5 __createitems__ 创建训练语料
        self.__createitems__()

## 7-1 _preprocess_features 特征归一化        
    def _preprocess_features(self, features):
        features = np.array(features.todense())
        ## 按行求和
        rowsum = np.array(features.sum(1))
        ## 特征和的倒数
        r_inv = np.power(rowsum, -1).flatten()
        ## 无穷值赋值为0
        r_inv[np.isinf(r_inv)] = 0.
        ## 转化为对角阵
        r_mat_inv = sp.diags(r_inv)
        features = r_mat_inv.dot(features)
        return features
    
## 7-2 _normalize_graph_gcn 对邻接矩阵进行归一化
    def _normalize_graph_gcn(self, adj):
        adj = sp.coo_matrix(adj, dtype=np.float32)
        ##添加自连接的边
        adj_ = adj + sp.eye(adj.shape[0], dtype=np.float32)
        ## 计算节点的度
        rowsum = np.array(adj_.sum(1), dtype=np.float32)
        ## 计算度矩阵的逆平方根
        degree_mat_inv_sqrt = sp.diags(np.power(rowsum, -0.5).flatten(), dtype=np.float32)
        ## 自连接邻接矩阵和度矩阵的逆平方根相乘，再乘度矩阵逆平方根的转置
        adj_normalized = adj_.dot(degree_mat_inv_sqrt).transpose().dot(degree_mat_inv_sqrt).tocoo()
        return adj_normalized
    
## 7-3 construct_degs计算每个时间步中节点的度 
    def construct_degs(self):
        degs = []
        for i in range(self.min_t, self.time_steps):
            G = self.graphs[i]
            deg = []
            for nodeid in G.nodes():
                deg.append(G.degree(nodeid))
            degs.append(deg)
        return degs
    
## 7-4 _build_pyg_graphs 建立pyg图
    def _build_pyg_graphs(self):
        pyg_graphs = []
    ##特征和邻接矩阵
        for feat, adj in zip(self.features, self.adjs):
            x = torch.Tensor(feat)
    ##将稀疏矩阵转化为index和value的形式
            edge_index, edge_weight = tg.utils.from_scipy_sparse_matrix(adj)
            data = Data(x=x, edge_index=edge_index, edge_weight=edge_weight)
            pyg_graphs.append(data)
        return pyg_graphs

    
##7-5 __createitems__ 创建训练语料
    def __createitems__(self):
        self.data_items = {}
    ##遍历最后一个时间快照中的所有节点
        for node in list(self.graphs[self.time_steps-1].nodes()):
            feed_dict = {}
            node_1_all_time = []
            node_2_all_time = []
            ##遍历所有时间快照
            for t in range(self.min_t, self.time_steps):
                node_1 = []
                node_2 = []
            ##如果当前节点在这个时间步骤的上下文节点数量超过了设定的最大正样本数量
                if len(self.context_pairs[t][node]) > self.max_positive:
            ##随机选择10个训练语料
                    node_1.extend([node]* self.max_positive)
                    node_2.extend(np.random.choice(self.context_pairs[t][node], self.max_positive, replace=False))
            ##否则，使用所有上下文节点
                else:
                    node_1.extend([node]* len(self.context_pairs[t][node]))
                    node_2.extend(self.context_pairs[t][node])
                assert len(node_1) == len(node_2)
            ##得到node_1_all_time所对应的上下文节点node_2_all_time
                node_1_all_time.append(node_1)
                node_2_all_time.append(node_2)
            node_1_list = [torch.LongTensor(node) for node in node_1_all_time]
            node_2_list = [torch.LongTensor(node) for node in node_2_all_time] 
    ##进行负采样
            node_2_negative = []
            for t in range(len(node_2_list)):
            ##得到每个上下文节点的度
                degree = self.degs[t]
            ##正向关系
                node_positive = node_2_list[t][:, None]
    
            ##7-5-1根据负采样函数fixed_unigram_candidate_sampler
                node_negative =fixed_unigram_candidate_sampler(true_clasees=node_positive,num_true=1,num_sampled=self.args.neg_sample_size,unique=False,distortion=0.75,unigrams=degree)
                node_2_negative.append(node_negative)
            node_2_neg_list = [torch.LongTensor(node) for node in node_2_negative]
            ##中心节点
            feed_dict['node_1']=node_1_list
            ##中心节点的上下文正样本节点
            feed_dict['node_2']=node_2_list
            ##中心节点的上下文负样本节点
            feed_dict['node_2_neg']=node_2_neg_list
            ##图信息
            feed_dict["graphs"] = self.pyg_graphs
            ##节点所对应的节点
            self.data_items[node] = feed_dict

    @staticmethod
    def collate_fn(samples):
        batch_dict = {}
        for key in ["node_1", "node_2", "node_2_neg"]:
        #node_1节点本身，node_2节点对应的10个pos节点，node_2_neg:节点对应到的[10,10]的负采样节点
            data_list = []
        
        ## 每个节点的所有信息
            for sample in samples:
                data_list.append(sample[key])
        
        ## 按照时间步涉及到的节点
            concate = []
            ## 遍历每个时间步
            for t in range(len(data_list[0])):
            ##对所有节点，都选择t这个时间步中的节点信息
                concate.append(torch.cat([data[t] for data in data_list]))
            ##key下所有时间涉及到的节点
            batch_dict[key] = concate
        batch_dict["graphs"] = samples[0]["graphs"]
        ##每个类别下，所有时间中涉及到的节点（16个时间步，每个时间步中的节点flatten
        return batch_dict
    
    def __len__(self):
        return len(self.train_nodes)

    def __getitem__(self, index):
        node = self.train_nodes[index]
        return self.data_items[node]

In [77]:
## 7-5-1 负采样函数
import copy
def fixed_unigram_candidate_sampler(true_clasees, 
                                    num_true, 
                                    num_sampled, 
                                    unique,  
                                    distortion, 
                                    unigrams):
    # TODO: implementate distortion to unigrams
    assert true_clasees.shape[1] == num_true
    samples = []
    ## 遍历正样本节点
    for i in range(true_clasees.shape[0]):
        ##节点的degree
        dist = copy.deepcopy(unigrams)
        ##得到候补节点
        candidate = list(range(len(dist)))
        ##获得正样本节点
        taboo = true_clasees[i].cpu().tolist()
        for tabo in sorted(taboo, reverse=True):
            ##剔除正样本节点
            candidate.remove(tabo)
            ##剔除正样本节点的degree
            dist.pop(tabo)
        ##每个正样本采样十个负样本
        sample = np.random.choice(candidate, size=num_sampled, replace=unique, p=dist/np.sum(dist))
        samples.append(sample)
    return samples

In [78]:
## 实际操作
device = torch.device("cpu")
dataset = MyDataset(args, graphs, feats, adjs, context_pairs_train)

## 定义数据加载器-dataloader

In [79]:
from torch.utils.data import DataLoader
dataloader = DataLoader(dataset, 
                            batch_size=args.batch_size, 
                            shuffle=True, 
                            num_workers=0, 
                            collate_fn=MyDataset.collate_fn)

## 定义DySAT模型

In [80]:
import torch.nn as nn
from torch.nn.modules.loss import BCEWithLogitsLoss

class DySAT(nn.Module):
    def __init__(self, args, num_features, time_length):
        """[summary]

        Args:
            args ([type]): [description]
            time_length (int): Total timesteps in dataset.
        """
        super(DySAT, self).__init__()
        self.args = args
        if args.window < 0:
            self.num_time_steps = time_length
        else:
            self.num_time_steps = min(time_length, args.window + 1)  # window = 0 => only self.
        self.num_features = num_features
    ## 结构多头数量信息
        self.structural_head_config = list(map(int, args.structural_head_config.split(",")))
    ## 结构layer层信息
        self.structural_layer_config = list(map(int, args.structural_layer_config.split(",")))
    ## 时序多头数量信息
        self.temporal_head_config = list(map(int, args.temporal_head_config.split(",")))
    ## 时序layer层信息 
        self.temporal_layer_config = list(map(int, args.temporal_layer_config.split(",")))
    ##dropout的定义
        self.spatial_drop = args.spatial_drop
        self.temporal_drop = args.temporal_drop
    ## 定义mould
        self.structural_attn, self.temporal_attn = self.build_model()
    ##定义loss函数：sigmoid和crossentropy
        self.bceloss = BCEWithLogitsLoss()

## 8-2模型定义函数
    def build_model(self):
        input_dim = self.num_features

# 8-2-1: Structural Attention Layers
    ##结构层信息
        structural_attention_layers = nn.Sequential()
        for i in range(len(self.structural_layer_config)):
            layer = StructuralAttentionLayer(input_dim=input_dim,
                                             output_dim=self.structural_layer_config[i],##output维度
                                             n_heads=self.structural_head_config[i],##多头个数
                                             attn_drop=self.spatial_drop,##drop参数
                                             ffd_drop=self.spatial_drop,
                                             residual=self.args.residual)##残差连接
            structural_attention_layers.add_module(name="structural_layer_{}".format(i), module=layer)
            ##下一层的input等于上一层的输出维度
            input_dim = self.structural_layer_config[i]
# 8-2-2: Temporal Attention Layers
    ## 时序层信息
        input_dim = self.structural_layer_config[-1]
        temporal_attention_layers = nn.Sequential()
        for i in range(len(self.temporal_layer_config)):
            layer = TemporalAttentionLayer(input_dim=input_dim,##输入维度
                                           n_heads=self.temporal_head_config[i],##多头数量
                                           num_time_steps=self.num_time_steps,##时间维度
                                           attn_drop=self.temporal_drop,##dropout
                                           residual=self.args.residual)##残差连接
            temporal_attention_layers.add_module(name="temporal_layer_{}".format(i), module=layer)
            input_dim = self.temporal_layer_config[i]
        return structural_attention_layers, temporal_attention_layers
    
    def forward(self, graphs):
    # Structural Attention forward-结构注意力机制
        structural_out = []
        ## 遍历每一个时间步
        for t in range(0, self.num_time_steps):
            structural_out.append(self.structural_attn(graphs[t]))
        ##节点聚合邻居后的特征
        structural_outputs = [g.x[:,None,:] for g in structural_out] # list of [Ni, 1, F]

    # padding outputs along with Ni
        ##获取节点数量
        maximum_node_num = structural_outputs[-1].shape[0]
        ## 输出特征数量
        out_dim = structural_outputs[-1].shape[-1]
        structural_outputs_padded = []
        ##对节点进行补0，使其为同一维度即每个图都有143（全部）个节点
        for out in structural_outputs:
            zero_padding = torch.zeros(maximum_node_num-out.shape[0], 1, out_dim).to(out.device)
            padded = torch.cat((out, zero_padding), dim=0)
            structural_outputs_padded.append(padded)
        ##将16个时刻的拼接再一起；structural最终输出的节点特征
        structural_outputs_padded = torch.cat(structural_outputs_padded, dim=1) # [N, T, F]
        
    # Temporal Attention forward-时序注意力机制
        temporal_out = self.temporal_attn(structural_outputs_padded)
        
        return temporal_out

    

    def get_loss(self, feed_dict):
        node_1, node_2, node_2_negative, graphs = feed_dict.values()
        # run gnn
        final_emb = self.forward(graphs) # [N, T, F]
        self.graph_loss = 0
    ##遍历每个时间步骤
        for t in range(self.num_time_steps - 1):
        ##这一刻时间所对应所有节点的embedding
            emb_t = final_emb[:, t, :].squeeze() #[N, F]
        ##本身节点
            source_node_emb = emb_t[node_1[t]]
        ##上下文节点-正采样的
            tart_node_pos_emb = emb_t[node_2[t]]
        ##负采样节点
            tart_node_neg_emb = emb_t[node_2_negative[t]]
        ##进行本身节点和上下文的正采样节点求内积
            pos_score = torch.sum(source_node_emb*tart_node_pos_emb, dim=1)
        ##负采样节点进行内积
            neg_score = -torch.sum(source_node_emb[:, None, :]*tart_node_neg_emb, dim=2).flatten()
        ##利用定义的损失函数
            pos_loss = self.bceloss(pos_score, torch.ones_like(pos_score))
            neg_loss = self.bceloss(neg_score, torch.ones_like(neg_score))
            graphloss = pos_loss + self.args.neg_weight*neg_loss
            self.graph_loss += graphloss
        return self.graph_loss

In [81]:
## 结构注意力层
import torch.nn.functional as F
from torch_geometric.utils import softmax
from torch_scatter import scatter

class StructuralAttentionLayer(nn.Module):
    def __init__(self, 
                input_dim, 
                output_dim, 
                n_heads, 
                attn_drop, 
                ffd_drop,
                residual):
        super(StructuralAttentionLayer, self).__init__()
    ##out_dim每个头特征的维度
        self.out_dim = output_dim // n_heads
        self.n_heads = n_heads
        self.act = nn.ELU()
        
    ##定义线性层[143,128]-143为节点特征维度，128为多头个数（16）乘每个头的特征维度(8)
        self.lin = nn.Linear(input_dim, n_heads * self.out_dim, bias=False)
        
    ##att_l初始参数是否需要好的优化shape为（1，16，8）
        self.att_l = nn.Parameter(torch.Tensor(1, n_heads, self.out_dim))
        self.att_r = nn.Parameter(torch.Tensor(1, n_heads, self.out_dim))
        
        self.leaky_relu = nn.LeakyReLU(negative_slope=0.2)
        
    ##定义drop out
        self.attn_drop = nn.Dropout(attn_drop)
        self.ffd_drop = nn.Dropout(ffd_drop)
        
    ##定义残差
        self.residual = residual
        if self.residual:
            self.lin_residual = nn.Linear(input_dim, n_heads * self.out_dim, bias=False)

        self.xavier_init()

    def forward(self, graph):
        graph = copy.deepcopy(graph)
        ##点边关系
        edge_index = graph.edge_index
        ##边权重
        edge_weight = graph.edge_weight.reshape(-1, 1)
        H, C = self.n_heads, self.out_dim
        x = self.lin(graph.x).view(-1, H, C) # [N, heads, out_dim]
    # attention
        ##初始节点的attention值
        alpha_l = (x * self.att_l).sum(dim=-1).squeeze() # [N, heads]
        ##终止节点的attention值
        alpha_r = (x * self.att_r).sum(dim=-1).squeeze()
        ## 获得每条边初始节点的注意力得分——这里其实是进行了一种映射关系，其实可以理解edge_index是从节点关系到边关系的一种映射。
        alpha_l = alpha_l[edge_index[0]] # [num_edges, heads]
        ##获得每条边的终止节点的注意力得分
        alpha_r = alpha_r[edge_index[1]]
        ##将初始节点和终止节点的attention拼接一起构成边的注意力得分
        alpha = alpha_r + alpha_l
        ##将alpha乘一个边权重
        alpha = edge_weight * alpha
        alpha = self.leaky_relu(alpha)  
        ##用softmax对同一个注意力头对应的不同相连的邻居节点进行归一化
        coefficients = softmax(alpha, edge_index[1]) # [num_edges, heads]
    # dropout
        if self.training:
            coefficients = self.attn_drop(coefficients)
            x = self.ffd_drop(x)
        ##定义初始节点特征
        x_j = x[edge_index[0]]  # [num_edges, heads, out_dim]
    # output
        ##利用scatter将同种index的值进行结合（原本是num_edges,直接变成num_nodes）
        out = self.act(scatter(x_j * coefficients[:, :, None], edge_index[1], dim=0, reduce="sum"))
        ## 将多头的数量和每个头的维度进行拼接
        out = out.reshape(-1, self.n_heads*self.out_dim) #[num_nodes, output_dim]
        ##进行残差操作
        if self.residual:
            out = out + self.lin_residual(graph.x)
        ##计算后的attention节点特征赋予到图上
        graph.x = out
        return graph

    def xavier_init(self):
        nn.init.xavier_uniform_(self.att_l)
        nn.init.xavier_uniform_(self.att_r)


In [82]:
## 时序注意力层
class TemporalAttentionLayer(nn.Module):
    def __init__(self, 
                input_dim, 
                n_heads, 
                num_time_steps, 
                attn_drop, 
                residual):
        super(TemporalAttentionLayer, self).__init__()
        self.n_heads = n_heads
        self.num_time_steps = num_time_steps
        self.residual = residual

    
    ## 位置embedding信息-以图为单位
        self.position_embeddings = nn.Parameter(torch.Tensor(num_time_steps, input_dim))
    ## 定义QKV三个向量   
        self.Q_embedding_weights = nn.Parameter(torch.Tensor(input_dim, input_dim))
        self.K_embedding_weights = nn.Parameter(torch.Tensor(input_dim, input_dim))
        self.V_embedding_weights = nn.Parameter(torch.Tensor(input_dim, input_dim))
        # ff
        self.lin = nn.Linear(input_dim, input_dim, bias=True)
        # dropout 
        self.attn_dp = nn.Dropout(attn_drop)
        self.xavier_init()


    def forward(self, inputs):
        """In:  attn_outputs (of StructuralAttentionLayer at each snapshot):= [N, T, F]"""
    # 1: Add position embeddings to input
        ##构建position embedding 最终shape为 (N,T)
        position_inputs = torch.arange(0,self.num_time_steps).reshape(1, -1).repeat(inputs.shape[0], 1).long().to(inputs.device)
        ##将结构的attention输出结果和position embedding相加
        temporal_inputs = inputs + self.position_embeddings[position_inputs] # [N, T, F]

    # 2: Query, Key based multi-head self attention.
        ##第一个矩阵的第三个维度 乘 第二个矩阵的第1个维度，添加时间上的自注意力机制
        q = torch.tensordot(temporal_inputs, self.Q_embedding_weights, dims=([2],[0])) # [N, T, F]
        k = torch.tensordot(temporal_inputs, self.K_embedding_weights, dims=([2],[0])) # [N, T, F]
        v = torch.tensordot(temporal_inputs, self.V_embedding_weights, dims=([2],[0])) # [N, T, F]

    # 3: Split, concat and scale.
        ##将第三维按照注意力机制头的数量进行切分 
        ## [143,16,128]=>[143,16,16*8]
        split_size = int(q.shape[-1]/self.n_heads)
        
        ## 将每个头单独计算
        ##[143,16,128]=>[2288,16,8]
        q_ = torch.cat(torch.split(q, split_size_or_sections=split_size, dim=2), dim=0) # [hN, T, F/h]
        k_ = torch.cat(torch.split(k, split_size_or_sections=split_size, dim=2), dim=0) # [hN, T, F/h]
        v_ = torch.cat(torch.split(v, split_size_or_sections=split_size, dim=2), dim=0) # [hN, T, F/h]
        
        ##将Q和K内积
        outputs = torch.matmul(q_, k_.permute(0,2,1)) # [hN, T, T]
        ## scale 操作
        outputs = outputs / (self.num_time_steps ** 0.5)
        
    # 4: Masked (causal) softmax to compute attention weights.
        ## 建立全为1的时间步矩阵，这是一个三维的矩阵
        diag_val = torch.ones_like(outputs[0])
        ## 建立下三角阵从而为聚合不同时间步的信息提供依据，是在hN这个维度下对每个T，T矩阵建立他的下三角矩阵
        tril = torch.tril(diag_val)
        ## 构建mask
        masks = tril[None, :, :].repeat(outputs.shape[0], 1, 1) # [h*N, T, T]
        ## 构建负无穷的时间步矩阵 
        padding = torch.ones_like(masks) * (-2**32+1)
        ## 将mask等于0的替换乘负无穷
        outputs = torch.where(masks==0, padding, outputs)

        ##softmax操作得到不同时间步下的时间维度的注意力得分
        outputs = F.softmax(outputs, dim=2)
        self.attn_wts_all = outputs # [h*N, T, T]
                
    # 5: Dropout on attention weights.
        ##将输出结果和V相乘-将注意力机制的头重新组合
        if self.training:
            outputs = self.attn_dp(outputs)
        outputs = torch.matmul(outputs, v_)  # [hN, T, F/h]
        outputs = torch.cat(torch.split(outputs, split_size_or_sections=int(outputs.shape[0]/self.n_heads), dim=0), dim=2) # [N, T, F]
        
    # 6: Feedforward and residual
        ##引入残差
        outputs = self.feedforward(outputs)
        ##所有节点聚合时序self-attention后的节点embedding
        if self.residual:
            outputs = outputs + temporal_inputs
        return outputs
    ##经过线性层
    def feedforward(self, inputs):
        outputs = F.relu(self.lin(inputs))
        return outputs + inputs


    def xavier_init(self):
        nn.init.xavier_uniform_(self.position_embeddings)
        nn.init.xavier_uniform_(self.Q_embedding_weights)
        nn.init.xavier_uniform_(self.K_embedding_weights)
        nn.init.xavier_uniform_(self.V_embedding_weights)


In [83]:
model = DySAT(args, feats[0].shape[1], args.time_steps).to(device)

## 定义优化器

In [84]:
opt = torch.optim.AdamW(model.parameters(), lr=args.learning_rate, weight_decay=args.weight_decay)

## 进行训练

In [85]:
##对使用设备进行定义
def to_device(batch, device):
    feed_dict = copy.deepcopy(batch)
    node_1, node_2, node_2_negative, graphs = feed_dict.values()
    # to device
    feed_dict["node_1"] = [x.to(device) for x in node_1]
    feed_dict["node_2"] = [x.to(device) for x in node_2]
    feed_dict["node_2_neg"] = [x.to(device) for x in node_2_negative]
    feed_dict["graphs"] = [g.to(device) for g in graphs]

    return feed_dict

## 进行之前提及的任务-已知倒数第二张图，求倒数第一张图的边关系

In [86]:
from __future__ import division, print_function
from sklearn.metrics import roc_auc_score
import numpy as np
from sklearn import linear_model
from collections import defaultdict
import random

np.random.seed(123)
operatorTypes = ["HAD"]

def evaluate_classifier(train_pos, train_neg, val_pos, val_neg, test_pos, test_neg, source_embeds, target_embeds):
    """Downstream logistic regression classifier to evaluate link prediction"""
    test_results = defaultdict(lambda: [])
    val_results = defaultdict(lambda: [])

##得到roc的值
    test_auc = get_roc_score_t(test_pos, test_neg, source_embeds, target_embeds)
    val_auc = get_roc_score_t(val_pos, val_neg, source_embeds, target_embeds)
    # Compute AUC based on sigmoid(u^T v) without classifier training.
    test_results['SIGMOID'].extend([test_auc, test_auc])
    val_results['SIGMOID'].extend([val_auc, val_auc])

    test_pred_true = defaultdict(lambda: [])
    val_pred_true = defaultdict(lambda: [])

##获取边的特征
    for operator in operatorTypes:
        train_pos_feats = np.array(get_link_feats(train_pos, source_embeds, target_embeds, operator))
        train_neg_feats = np.array(get_link_feats(train_neg, source_embeds, target_embeds, operator))
        val_pos_feats = np.array(get_link_feats(val_pos, source_embeds, target_embeds, operator))
        val_neg_feats = np.array(get_link_feats(val_neg, source_embeds, target_embeds, operator))
        test_pos_feats = np.array(get_link_feats(test_pos, source_embeds, target_embeds, operator))
        test_neg_feats = np.array(get_link_feats(test_neg, source_embeds, target_embeds, operator))
        
## 对label进行拼接
        train_pos_labels = np.array([1] * len(train_pos_feats))
        train_neg_labels = np.array([-1] * len(train_neg_feats))
        val_pos_labels = np.array([1] * len(val_pos_feats))
        val_neg_labels = np.array([-1] * len(val_neg_feats))

        test_pos_labels = np.array([1] * len(test_pos_feats))
        test_neg_labels = np.array([-1] * len(test_neg_feats))
        train_data = np.vstack((train_pos_feats, train_neg_feats))
        train_labels = np.append(train_pos_labels, train_neg_labels)

        val_data = np.vstack((val_pos_feats, val_neg_feats))
        val_labels = np.append(val_pos_labels, val_neg_labels)

        test_data = np.vstack((test_pos_feats, test_neg_feats))
        test_labels = np.append(test_pos_labels, test_neg_labels)
        
## 使用逻辑回归计算
        logistic = linear_model.LogisticRegression()
        logistic.fit(train_data, train_labels)
        test_predict = logistic.predict_proba(test_data)[:, 1]
        val_predict = logistic.predict_proba(val_data)[:, 1]

        test_roc_score = roc_auc_score(test_labels, test_predict)
        val_roc_score = roc_auc_score(val_labels, val_predict)

        val_results[operator].extend([val_roc_score, val_roc_score])
        test_results[operator].extend([test_roc_score, test_roc_score])

        val_pred_true[operator].extend(zip(val_predict, val_labels))
        test_pred_true[operator].extend(zip(test_predict, test_labels))

    return val_results, test_results, val_pred_true, test_pred_true

In [87]:
## 计算roc值
def get_roc_score_t(edges_pos, edges_neg, source_emb, target_emb):
    """Given test examples, edges_pos: +ve edges, edges_neg: -ve edges, return ROC scores for a given snapshot"""
    def sigmoid(x):
        return 1 / (1 + np.exp(-x))

    # Predict on test set of edges
##获取两两节点的关系
    ##正向关系
    adj_rec = np.dot(source_emb, target_emb.T)
    pred = []
    pos = []
    for e in edges_pos:
        ##进行内积并且通过sigmoid函数
        pred.append(sigmoid(adj_rec[e[0], e[1]]))
        ##标签为1
        pos.append(1.0)
    ##负向关系
    pred_neg = []
    neg = []
    for e in edges_neg:
        pred_neg.append(sigmoid(adj_rec[e[0], e[1]]))
        ##标签为0
        neg.append(0.0)
        
##拼接正向和负向
    pred_all = np.hstack([pred, pred_neg])
    labels_all = np.hstack([np.ones(len(pred)), np.zeros(len(pred_neg))])
    roc_score = roc_auc_score(labels_all, pred_all)
    return roc_score

In [88]:
## 获取边特征的函数
def get_link_feats(links, source_embeddings, target_embeddings, operator):
    """Compute link features for a list of pairs"""
    features = []
    for l in links:
        a, b = l[0], l[1]
        f = get_link_score(source_embeddings[a], target_embeddings[b], operator)
        features.append(f)
    return features

def get_link_score(fu, fv, operator):
    """Given a pair of embeddings, compute link feature based on operator (such as Hadammad product, etc.)"""
    fu = np.array(fu)
    fv = np.array(fv)
    ##将节点相乘作为边特征
    if operator == "HAD":
        return np.multiply(fu, fv)
    else:
        raise NotImplementedError


In [89]:
from tqdm import tqdm

best_epoch_val = 0
patient = 0
for epoch in tqdm(range(args.epochs)):
    model.train()
    epoch_loss = []
    for idx, feed_dict in tqdm(enumerate(dataloader)):
            feed_dict = to_device(feed_dict, device)
            opt.zero_grad()
            loss = model.get_loss(feed_dict)
            loss.backward()
            opt.step()
            epoch_loss.append(loss.item())
    ## 进行训练
    model.eval()
    ##获取倒数第二层的节点embedding，并且训练最后一层的边连接情况
    emb = model(feed_dict["graphs"])[:, -2, :].detach().cpu().numpy()
    val_results, test_results, _, _ = evaluate_classifier(train_edges_pos,
                                                            train_edges_neg,
                                                            val_edges_pos, 
                                                            val_edges_neg, 
                                                            test_edges_pos,
                                                            test_edges_neg, 
                                                            emb, 
                                                            emb)
    epoch_auc_val = val_results["HAD"][1]
    epoch_auc_test = test_results["HAD"][1]

    if epoch_auc_val > best_epoch_val:
        best_epoch_val = epoch_auc_val
        torch.save(model.state_dict(), "../model_checkpoints/model.pt")
        patient = 0
    else:
        patient += 1
        if patient > args.early_stop:
            break
    print("Epoch {:<3},  Loss = {:.3f}, Val AUC {:.3f} Test AUC {:.3f}".format(epoch,np.mean(epoch_loss),epoch_auc_val, epoch_auc_test))

  0%|          | 0/200 [00:00<?, ?it/s]
0it [00:00, ?it/s][A
1it [00:00,  5.77it/s][A
  0%|          | 1/200 [00:00<00:45,  4.37it/s]

Epoch 0  ,  Loss = 40.665, Val AUC 0.565 Test AUC 0.679



0it [00:00, ?it/s][A
1it [00:00,  5.80it/s][A
  1%|          | 2/200 [00:00<00:45,  4.37it/s]

Epoch 1  ,  Loss = 29.227, Val AUC 0.601 Test AUC 0.714



0it [00:00, ?it/s][A
1it [00:00,  6.03it/s][A
  2%|▏         | 3/200 [00:00<00:44,  4.42it/s]

Epoch 2  ,  Loss = 26.145, Val AUC 0.621 Test AUC 0.738



0it [00:00, ?it/s][A
1it [00:00,  6.16it/s][A
  2%|▏         | 4/200 [00:00<00:43,  4.48it/s]

Epoch 3  ,  Loss = 24.117, Val AUC 0.648 Test AUC 0.781



0it [00:00, ?it/s][A
1it [00:00,  6.12it/s][A
  2%|▎         | 5/200 [00:01<00:43,  4.51it/s]

Epoch 4  ,  Loss = 22.806, Val AUC 0.719 Test AUC 0.837



0it [00:00, ?it/s][A
1it [00:00,  6.11it/s][A
  3%|▎         | 6/200 [00:01<00:42,  4.52it/s]

Epoch 5  ,  Loss = 21.879, Val AUC 0.814 Test AUC 0.869



0it [00:00, ?it/s][A
1it [00:00,  5.90it/s][A
  4%|▎         | 7/200 [00:01<00:42,  4.49it/s]

Epoch 6  ,  Loss = 20.975, Val AUC 0.860 Test AUC 0.880



0it [00:00, ?it/s][A
1it [00:00,  6.00it/s][A
  4%|▍         | 8/200 [00:01<00:42,  4.49it/s]

Epoch 7  ,  Loss = 20.244, Val AUC 0.882 Test AUC 0.881



0it [00:00, ?it/s][A
1it [00:00,  5.88it/s][A
  4%|▍         | 9/200 [00:02<00:42,  4.46it/s]

Epoch 8  ,  Loss = 19.521, Val AUC 0.898 Test AUC 0.882



0it [00:00, ?it/s][A
1it [00:00,  6.07it/s][A
  5%|▌         | 10/200 [00:02<00:42,  4.46it/s]

Epoch 9  ,  Loss = 18.924, Val AUC 0.902 Test AUC 0.881



0it [00:00, ?it/s][A
1it [00:00,  5.98it/s][A
  6%|▌         | 11/200 [00:02<00:42,  4.47it/s]

Epoch 10 ,  Loss = 18.405, Val AUC 0.907 Test AUC 0.884



0it [00:00, ?it/s][A
1it [00:00,  6.09it/s][A
  6%|▌         | 12/200 [00:02<00:41,  4.50it/s]

Epoch 11 ,  Loss = 18.040, Val AUC 0.905 Test AUC 0.888



0it [00:00, ?it/s][A
1it [00:00,  6.00it/s][A
  6%|▋         | 13/200 [00:02<00:41,  4.51it/s]

Epoch 12 ,  Loss = 17.807, Val AUC 0.903 Test AUC 0.892



0it [00:00, ?it/s][A
1it [00:00,  6.04it/s][A
  7%|▋         | 14/200 [00:03<00:41,  4.51it/s]

Epoch 13 ,  Loss = 17.527, Val AUC 0.902 Test AUC 0.901



0it [00:00, ?it/s][A
1it [00:00,  5.95it/s][A
  8%|▊         | 15/200 [00:03<00:41,  4.49it/s]

Epoch 14 ,  Loss = 17.394, Val AUC 0.900 Test AUC 0.908



0it [00:00, ?it/s][A
1it [00:00,  6.10it/s][A
  8%|▊         | 16/200 [00:03<00:40,  4.51it/s]

Epoch 15 ,  Loss = 17.190, Val AUC 0.900 Test AUC 0.910



0it [00:00, ?it/s][A
1it [00:00,  6.09it/s][A
  8%|▊         | 17/200 [00:03<00:40,  4.52it/s]

Epoch 16 ,  Loss = 17.018, Val AUC 0.902 Test AUC 0.910



0it [00:00, ?it/s][A
1it [00:00,  6.14it/s][A
  9%|▉         | 18/200 [00:04<00:40,  4.53it/s]

Epoch 17 ,  Loss = 16.859, Val AUC 0.903 Test AUC 0.909



0it [00:00, ?it/s][A
1it [00:00,  5.99it/s][A
 10%|▉         | 19/200 [00:04<00:39,  4.53it/s]

Epoch 18 ,  Loss = 16.723, Val AUC 0.900 Test AUC 0.908



0it [00:00, ?it/s][A
1it [00:00,  6.22it/s][A
 10%|█         | 20/200 [00:04<00:39,  4.55it/s]

Epoch 19 ,  Loss = 16.639, Val AUC 0.899 Test AUC 0.908



0it [00:00, ?it/s][A
1it [00:00,  6.10it/s][A
 10%|█         | 21/200 [00:04<00:39,  4.56it/s]

Epoch 20 ,  Loss = 16.548, Val AUC 0.903 Test AUC 0.908



0it [00:00, ?it/s][A
1it [00:00,  5.90it/s][A
 10%|█         | 21/200 [00:04<00:41,  4.29it/s]


In [91]:
##进行测试
model.load_state_dict(torch.load("../model_checkpoints/model.pt"))
model.eval()
emb = model(feed_dict["graphs"])[:, -2, :].detach().cpu().numpy()
val_results, test_results, _, _ = evaluate_classifier(train_edges_pos,
                                                        train_edges_neg,
                                                        val_edges_pos, 
                                                        val_edges_neg, 
                                                        test_edges_pos,
                                                        test_edges_neg, 
                                                        emb, 
                                                        emb)
auc_val = val_results["HAD"][1]
auc_test = test_results["HAD"][1]
print("Best Test AUC = {:.3f}".format(auc_test))

Best Test AUC = 0.884
