In [11]:
import torch
import torch.nn as nn
from torch.autograd import Variable
import random

class MeanAggregator(nn.Module):
    def __init__(self, features, cuda=False, gcn=False):
        super(MeanAggregator, self).__init__()
        self.features = features
        self.cuda = cuda
        self.gcn = gcn

    def forward(self, nodes, to_neighs, num_sample=10):

        _set = set
        if not num_sample is None:
            _sample = random.sample
            samp_neighs = [_set(_sample(to_neigh, num_sample)) if len(to_neigh) >= num_sample else to_neigh for to_neigh in to_neighs]
            # 注意：此处先执行前面的判断语句，后执行for to_neigh in to_neighs形成list
        else:
            samp_neighs = to_neighs
        if self.gcn:
            samp_neighs = [set.union(samp_neigh, _set([nodes[i]])) for i, samp_neigh in enumerate(samp_neighs)]

        unique_nodes_list = list(set.union(*samp_neighs))

        unique_nodes = {n:i for i, n in enumerate(unique_nodes_list)}
        mask = Variable(torch.zeros(len(samp_neighs), len(unique_nodes)))
        column_indices = [unique_nodes[n] for samp_neigh in samp_neighs for n in samp_neigh]
        row_indices = [i for i in range(len(samp_neighs)) for j in range(len(samp_neighs[i]))]
        mask[row_indices, column_indices] = 1

        if self.cuda:
            mask = mask.cuda()

        num_neigh = mask.sum(1, keepdims=True)
        mask = mask.div(num_neigh)

        if self.cuda:
            embed_matrix = self.features(torch.LongTensor(unique_nodes_list).cuda())
        else:
            embed_matrix = self.features(torch.LongTensor(unique_nodes_list))
        to_feats = mask.mm(embed_matrix)
        return to_feats

In [12]:
# 理解关于set的随机采样
a = set
b = random.sample
c = [1,2,3,4,5]
d = [a(b(c,3))]
print(d)
# print([a(c)+a(c)])

[{2, 3, 4}]


In [13]:
# 理解set的并集操作
e = [2,3,4,5,6]
f = [a(c), a(e)]
print(f)
g = list(set.union(*f))
print(g)
h = set.union(a(c), a([7]))
print(h)

[{1, 2, 3, 4, 5}, {2, 3, 4, 5, 6}]
[1, 2, 3, 4, 5, 6]
{1, 2, 3, 4, 5, 7}


In [14]:
import torch
import torch.nn as nn
from torch.nn import init
import torch.nn.functional as F

class Encoder(nn.Module):
    def __init__(self, features, feature_dim, embed_dim, adj_lists, aggregator, num_sample=10, base_model=None, gcn=False, cuda=False):
        super(Encoder, self).__init__()
        self.features = features
        self.feat_dim = feature_dim
        self.adj_lists = adj_lists
        self.aggregator = aggregator
        self.num_sample = num_sample
        if base_model != None:
            self.base_model = base_model
        self.gcn = gcn
        self.embed_dim = embed_dim
        self.cuda = cuda
        self.aggregator.cuda = cuda
        self.weight = nn.Parameter(torch.FloatTensor(embed_dim, self.feat_dim if self.gcn else 2*self.feat_dim))
        # 注意：此处将","后面的语句作为整体执行
        init.xavier_uniform_(self.weight) # 初始化操作

    def forward(self, nodes):
        # neigh_feats = self.aggregator.forward(nodes, self.adj_lists, self.num_sample)
        neigh_feats = self.aggregator.forward(nodes, [self.adj_lists[int(node)] for node in nodes], self.num_sample)
        if not self.gcn:
            if self.cuda:
                self_feats = self.features(torch.LongTensor(nodes).cuda())
            else:
                self_feats = self.features(torch.LongTensor(nodes))
                # print(self_feats.shape) # (34, 16)
            combined = torch.cat([self_feats, neigh_feats], dim=1)
            # print(combined.shape) # (34, 32)
        else:
            combined = neigh_feats
        # print(combined.shape) # (34, 32)
        # print(self.weight.shape) # (16, 32)
        combined = F.relu(self.weight.mm(combined.t()))
        # print(combined.shape) # (16, 34)
        return combined

In [15]:
import numpy as np
import time
import random
from sklearn.metrics import f1_score
from collections import defaultdict
import networkx as nx
from torch.nn import Linear

class SupervisedGraphSage(nn.Module):
    def __init__(self, num_classes, enc):
        super(SupervisedGraphSage, self).__init__()
        self.enc = enc
        self.xent = nn.CrossEntropyLoss()
        self.weight = nn.Parameter(torch.FloatTensor(num_classes, enc.embed_dim))
        init.xavier_uniform_(self.weight)

    def forward(self, nodes):
        embeds = self.enc(nodes)
        # print(embeds.shape) # (16, 34)
        # print(self.weight.shape) # (4, 16)
        scores = self.weight.mm(embeds)
        # print(scores.shape) # (4, 34)
        # return scores.t() # (34, 4)
        scores_softmax = torch.exp(scores.t())/torch.sum(torch.exp(scores.t()), dim=1).reshape(-1, 1)
        # print(scores_softmax.shape)
        # print(scores_softmax.sum(dim=1))
        return scores_softmax # (34, 4)

    def loss(self, nodes, labels):
        scores = self.forward(nodes)
        # print(scores.sum(dim=1))
        return self.xent(scores, labels)

In [16]:
def get_to_neighs(G):
    num_nodes = len(G.nodes)
    to_neighs = []
    for i in range(num_nodes):
        mid_set = set()
        for j in G.neighbors(i):
            mid_set.add(j)
        to_neighs.append(mid_set)
    return to_neighs

In [17]:
# def get_batch_nodes(graph):
#     batch_nodes = []
#     num_nodes = len(graph.nodes)
#     pos = nx.spring_layout(graph)
#     for i in range(num_nodes):
#         batch_nodes.append(list(pos[i]))
#     return batch_nodes

G = nx.karate_club_graph()
# batch_nodes = get_batch_nodes(G)
# print(batch_nodes)

from torch_geometric.datasets import KarateClub
dataset = KarateClub()
data = dataset[0]
labels = data.y
print(labels)

tensor([1, 1, 1, 1, 3, 3, 3, 1, 0, 1, 3, 1, 1, 1, 0, 0, 3, 1, 0, 1, 0, 1, 0, 0,
        2, 2, 0, 0, 2, 0, 0, 2, 0, 0])


In [18]:
def create_elements(graph = nx.karate_club_graph(), num_embed = 34, feature_dim = 16, embed_dim = 16, num_classes = 4, nodes=None):
    if nodes == None:
        nodes = []
        for i in range(len(graph.nodes)):
            nodes.append(i)
    split = int(len(nodes)/3*2)
    features = nn.Embedding(num_embed, embed_dim)
    to_neighs = get_to_neighs(graph)
    aggregator = MeanAggregator(features, nodes[:split], to_neighs)
    enc = Encoder(features=features, feature_dim=feature_dim, embed_dim=embed_dim, adj_lists=to_neighs, aggregator=aggregator)
    model = SupervisedGraphSage(num_classes=num_classes, enc=enc)
    # criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
    return nodes, to_neighs, model, optimizer, features, split

def train(nodes, labels, model, optimizer):
    times = []
    losses = []
    for i in range(100):
        start_time = time.time()
        optimizer.zero_grad()
        loss = model.loss(nodes, labels)
        losses.append(loss)
        loss.backward()
        optimizer.step()
        end_time = time.time()
        times.append(end_time-start_time)
    # print(times)
    # print(losses)

def predict(nodes, labels, model):
    after_model = model(nodes)
    predicted = torch.argmax(after_model, dim=1)
    acc = (torch.sum(predicted == labels) / len(labels)).item()
    print(acc)

数据集：nx.karate_club_graph()

In [19]:
nodes, to_neights, model, optimizer, features,split = create_elements(graph = nx.karate_club_graph(), num_embed = 34, feature_dim = 16, embed_dim = 16, num_classes = 4, nodes=None)
train(nodes=nodes[:split], labels=labels[:split], model=model, optimizer=optimizer)
predict(nodes=nodes[split:], labels=labels[split:], model=model)

0.25


数据集：nx.read_edgelist("./facebook/414.edges")

In [20]:
G_fb = nx.read_edgelist("./facebook/414.edges")
n = G_fb.number_of_nodes()
m = G_fb.number_of_edges()
print(n, m)
mapping = dict(zip(G_fb.nodes(), range(n)))
# print(mapping)
nx.relabel_nodes(G_fb, mapping, copy=False)
# print(G_fb.nodes)

nodes_fb, to_neights_fb, model_fb, optimizer_fb, features_fb, split_fb = create_elements(graph = G_fb, num_embed = 150, feature_dim = 16, embed_dim = 16, num_classes = 11, nodes=list(G_fb.nodes))
labels_fb = torch.tensor([random.randint(0, 10) for i in range(150)])
train(nodes=nodes_fb[:split_fb], labels=labels_fb[:split_fb], model=model_fb, optimizer=optimizer_fb)
predict(nodes=nodes_fb[:split_fb], labels=labels_fb[:split_fb], model=model_fb)

150 1693
0.9399999976158142


数据集：cora

In [29]:
def load_cora():
    num_nodes = 2708
    num_feats = 1433
    feat_data = np.zeros((num_nodes, num_feats)) # 此处不能用torch代替np
    labels = np.empty((num_nodes, 1), dtype=np.int64) # 此处不能用torch代替np
    node_map = {}
    label_map = {}
    with open("./cora/cora.content") as fp:
        for i, line in enumerate(fp):
            info = line.strip().split()
            info_label = info[-1]
            info = info[:-1]
            info = [int(x) for x in info]
            feat_data[i, :] = info[1:]
            # print(len(feat_data[i, :]))
            node_map[info[0]] = i
            if not info_label in label_map:
                label_map[info_label] = len(label_map) # len({}) = 1
            labels[i] = label_map[info_label]

    adj_lists = defaultdict(set) # 注意：此处不能用普通的set
    print(adj_lists)
    # print(node_map)
    # print(label_map)
    # print(labels.shape)
    with open("./cora/cora.cites") as fp:
        for i, line in enumerate(fp):
            info = line.strip().split()
            info = [int(x) for x in info]
            paper1 = node_map[info[0]]
            paper2 = node_map[info[1]]
            adj_lists[paper1].add(paper2)
            adj_lists[paper2].add(paper1)
    print(adj_lists)
    return feat_data, labels, adj_lists

# def run_cora():

np.random.seed(1) # 应对用np.random生成的随机数
random.seed(1) # 应对用random生成的随机数

num_nodes = 2708
num_feats = 1433
feat_data, labels, adj_lists = load_cora()
features = nn.Embedding(num_nodes, num_feats) # torch.LongTensor等价于torch.tensor
features.weight = nn.Parameter(torch.FloatTensor(feat_data), requires_grad=False) # torch.FloatTensor等价于torch.Tensor
# 上一行的作用：加快训练速度，提升训练效果(F1)
# print(features)
# print(features.weight.sum(dim=1))

agg1 = MeanAggregator(features)
enc1 = Encoder(features=features, feature_dim=1433, embed_dim=128, adj_lists=adj_lists, aggregator=agg1) # gcn=False
print(enc1(nodes).t().shape)
agg2 = MeanAggregator(features=lambda nodes : enc1(nodes).t())
enc2 = Encoder(features=lambda nodes : enc1(nodes).t(), feature_dim=enc1.embed_dim, embed_dim=128, adj_lists=adj_lists, aggregator=agg2, base_model=enc1) # gcn=False

graphsage = SupervisedGraphSage(7, enc2)
rand_indices = np.random.permutation(num_nodes)
# print(num_nodes)
# print(rand_indices)
test = rand_indices[:1000]
val = rand_indices[1000:1500]
train = list(rand_indices[1500:])
optimizer = torch.optim.SGD(filter(lambda p : p.requires_grad, graphsage.parameters()), lr=0.7)
# parameters = filter(lambda p : p.requires_grad, graphsage.parameters())
# print(parameters)
# print(graphsage.parameters())
times = []
for batch in range(10): #
    batch_nodes = train[:256]
    random.shuffle(train)
    start_time = time.time()
    optimizer.zero_grad()
    loss = graphsage.loss(batch_nodes, torch.LongTensor(labels[np.array(batch_nodes)].reshape(1, -1)[0]))# Variable
    loss.backward()
    optimizer.step()
    end_time = time.time()
    times.append(end_time-start_time)
    # print(batch, loss.item())

# print(labels[val])
# print(labels[train]) # 二者在格式上没有区别
val_output = graphsage.forward(val)
# print(val_output.data.numpy().argmax(axis=1))
# print("-"*50)
# print(val_output.argmax(axis=1))
# print("Validation F1:", f1_score(labels[val], val_output.argmax(axis=1), average="micro"))
# print("Average batch time:", np.mean(times))

defaultdict(<class 'set'>, {})
defaultdict(<class 'set'>, {163: {1536, 2563, 2564, 1030, 1544, 523, 530, 1530, 22, 1558, 2077, 546, 1571, 1572, 1573, 1060, 1575, 2600, 1065, 42, 2604, 1069, 1070, 563, 1590, 55, 1594, 2116, 1098, 1611, 1099, 2638, 1106, 1110, 1113, 602, 1114, 606, 2659, 1127, 1130, 2667, 1136, 2673, 624, 1139, 1650, 2173, 2175, 2177, 129, 1153, 1159, 1675, 141, 145, 658, 659, 2196, 1685, 2200, 1689, 2202, 1178, 2204, 1691, 1696, 174, 689, 1715, 1717, 1205, 1206, 1207, 2232, 188, 189, 191, 1728, 1729, 1218, 1219, 1734, 1224, 1225, 2248, 2251, 714, 717, 2259, 727, 2265, 219, 2274, 1253, 743, 744, 2280, 1257, 237, 2286, 1775, 757, 2295, 1784, 2296, 1274, 2298, 765, 1277, 1792, 769, 1286, 1799, 266, 2316, 781, 1807, 1303, 1305, 793, 800, 290, 1831, 813, 309, 1334, 1333, 2361, 1850, 2363, 1872, 1362, 856, 346, 2396, 1890, 1379, 1380, 1905, 380, 1404, 1410, 390, 395, 910, 402, 2451, 1943, 415, 422, 935, 940, 942, 1457, 1971, 1467, 448, 961, 966, 2518, 982, 2521, 1498, 1515, 2

下面均为帮助理解整个网络架构的测试代码

In [22]:
to_neighs = get_to_neighs(G)
samp_neighs = [set(random.sample(to_neigh, 3)) if len(to_neigh) >= 3 else to_neigh for to_neigh in to_neighs]
# print(samp_neighs)
samp_neighs = [set.union(samp_neigh, set([nodes[i]])) for i, samp_neigh in enumerate(samp_neighs)]
# print(samp_neighs)

In [23]:
unique_nodes_list = list(set.union(*samp_neighs))
# print(unique_nodes_list)
unique_nodes = {n:i for i, n in enumerate(unique_nodes_list)}
# print(unique_nodes)
mask = Variable(torch.zeros(len(samp_neighs), len(unique_nodes)))
column_indices = [unique_nodes[n] for samp_neigh in samp_neighs for n in samp_neigh]
row_indices = [i for i in range(len(samp_neighs)) for j in range(len(samp_neighs[i]))]
mask[row_indices, column_indices] = 1
num_neigh = mask.sum(1, keepdims=True)
mask = mask.div(num_neigh) # 对每个节点的影响权权重(此处为mean)，节点自身与采样节点的权重一致

In [24]:
embed_matrix = features(torch.LongTensor(unique_nodes_list)) # 34个节点，每个节点都有唯一的编码
# print(embed_matrix)
to_feats = mask.mm(embed_matrix)
# print(mask.shape)
# print(embed_matrix.shape)
# print(to_feats.shape)

In [25]:
weight = nn.Parameter(torch.FloatTensor(3, 2 if False else 4))
init.xavier_uniform_(weight)
# print(weight)

Parameter containing:
tensor([[ 0.5843,  0.7611, -0.2259,  0.0731],
        [ 0.3337, -0.7150,  0.1374, -0.0413],
        [-0.3988, -0.6006, -0.3064,  0.2972]], requires_grad=True)

In [26]:
self_feats = features(torch.LongTensor(nodes))
# print(self_feats.shape)
# print(features)
# print(torch.LongTensor(nodes))

In [27]:
import numpy as np
a = "asd qwert zxcv"
a = a.strip().split()
print(map(float, a[1:-1]))
c = np.zeros((3,3))

<map object at 0x000001C719552D60>


In [49]:
print(len({}))
print(len({1}))
print(len({1, 2}))
print(len({1, 2, 3}))

0
1
2
3


In [50]:
a = torch.tensor([[1, 0, 1], [1, 1, 0]])
b = torch.tensor([[2, 6, 1, 8], [3, 4, 7, 9], [1, 5, 6, 7]])
# c = torch.zeros(2, 4)
ans = a[0]*b.T
print(ans)
print(ans.max(1)[0])
for i in range(a.shape[0]):
    mid = a[i]*b.T
    c[i] = mid.max(1)[0]
print(c)

tensor([[2, 0, 1],
        [6, 0, 5],
        [1, 0, 6],
        [8, 0, 7]])
tensor([2, 6, 6, 8])
tensor([[2., 6., 6., 8.],
        [3., 6., 7., 9.]])


In [64]:
import scipy.sparse as sp
def normalize_adj(mx):
    rowsum = torch.tensor(mx.sum(1))
    print("rowsum:", rowsum)
    r_inv_sqrt = torch.pow(rowsum, -0.5).flatten()
    print("r_inv_sqrt:", r_inv_sqrt)
    r_inv_sqrt[torch.isinf(r_inv_sqrt)] = 0.
    r_mat_inv_sqrt = torch.diag(r_inv_sqrt)
    # return mx.dot(r_mat_inv_sqrt).transpose().dot(r_mat_inv_sqrt)
    print(r_mat_inv_sqrt.dtype, mx.dtype)
    mid = torch.mm(r_mat_inv_sqrt, mx)
    return torch.mm(mid, r_mat_inv_sqrt)

a = torch.Tensor([[1, 3, 2], [5 ,4, 6], [9, 8, 7]])
b = normalize_adj(a)
print(a)
print(b)

rowsum: tensor([ 6., 15., 24.])
r_inv_sqrt: tensor([0.4082, 0.2582, 0.2041])
torch.float32 torch.float32
tensor([[1., 3., 2.],
        [5., 4., 6.],
        [9., 8., 7.]])
tensor([[0.1667, 0.3162, 0.1667],
        [0.5270, 0.2667, 0.3162],
        [0.7500, 0.4216, 0.2917]])


  rowsum = torch.tensor(mx.sum(1))


In [68]:
a = torch.rand(5,3)
b = torch.tensor([0,2])
print(a)
print(a[b])
print(b)
print(list(b))
print(a[list(b)])

tensor([[0.3131, 0.0491, 0.0690],
        [0.5634, 0.2577, 0.4229],
        [0.6798, 0.9122, 0.8519],
        [0.9068, 0.1730, 0.5262],
        [0.1935, 0.8640, 0.9851]])
tensor([[0.3131, 0.0491, 0.0690],
        [0.6798, 0.9122, 0.8519]])
tensor([0, 2])
[tensor(0), tensor(2)]
tensor(0.0690)


In [71]:
a = torch.tensor([[1, 2, 3], [1, 2, 3], [1, 2, 3]])
b = a * a
c = torch.tensor([1, 2, 3])
print(a)
print(b)
print(a*c)

tensor([[1, 2, 3],
        [1, 2, 3],
        [1, 2, 3]])
tensor([[1, 4, 9],
        [1, 4, 9],
        [1, 4, 9]])
tensor([[1, 4, 9],
        [1, 4, 9],
        [1, 4, 9]])
