### 一、Embedding的Hello World版本

In [None]:
import dgl
import torch

import torch.nn as nn
from dgl.nn import GraphConv 
from torch.optim import Adam

num_nodes = 5
emd_size = 5

g = dgl.rand_graph(num_nodes=num_nodes, num_edges=25)
embed = nn.Embedding(num_nodes, emd_size)

# 有点迷不知道为什么这里不添加自环；在示例里是有 g = dgl.add_self_loop(g)
# 没有自环时可添加：allow_zero_in_degree=True
model = GraphConv(num_nodes, 1)

# 需要注意你看，这里优化器的内容是包括这俩方面参数的
optimizer = Adam(list(model.parameters()) + list(embed.parameters()), lr=1e-3) ### 这一句还漏写了

labels = torch.zeros((num_nodes, 1))
criteria = nn.BCEWithLogitsLoss()
epochs = 5

for epoch in range(epochs):
    pred = model(g, embed.weight)
    loss = criteria(pred, labels)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    
    
## 提醒：注意设置随机种子

### 二、加载数据
预计需要结合：
- mini-batch 小批量
- 创建dataset与dataloader
- 选择性读文件
- 异构图    

#### 1.基本构建异构图

In [19]:
import dgl
import torch

data_dict = {
    ('user','follows','user'):(torch.tensor([0, 0, 0, 0]), torch.tensor([0, 0, 0, 0])),
    ('user','follows','topic'):(torch.tensor([0, 0, 0, 0]), torch.tensor([1, 2, 1, 1])),
    ('user','plays','game'):(torch.tensor([0, 0, 0, 0]), torch.tensor([3, 4, 1, 1]))
}
num_nodes_dict = {'user': 2, 'topic': 3, 'game': 5}  # 通过这条语句修改一种类别的不同属性；注意这里的user设定！

g = dgl.heterograph(data_dict, num_nodes_dict=num_nodes_dict)

g.nodes['user'].data['x'] = torch.randn(2, 4)  # (5, 4)->(1, 4)错误原因在于只有一个user节点；看ID

print(g)
print('---------------')
print(g.nodes['user'].data['x'][1])

# g.data['x'][i] will give the feature 'x' of the node with ID i.

Graph(num_nodes={'game': 5, 'topic': 3, 'user': 2},
      num_edges={('user', 'follows', 'topic'): 4, ('user', 'follows', 'user'): 4, ('user', 'plays', 'game'): 4},
      metagraph=[('user', 'topic', 'follows'), ('user', 'user', 'follows'), ('user', 'game', 'plays')])
---------------
tensor([-0.3391,  0.4894, -1.3954, -0.5781])


#### 2.尝试读取数据
- 阅读他人代码得到的注意点：
  - ***Batching test data for inference***:
     1. `.numpy()`：转化
     2. `torch.unique`：除去多余重复部分
     3. 
       ```
       sampler = dgl.dataloading.MultiLayerNeighborSampler([15, 10, 5])
       dataloader = dgl.dataloading.NodeDataLoader(g, ...)
       # ------
       sampler = dgl.dataloading.MultiLayerFullNeighborSampler(3)
       ```
    


In [None]:
import pandas as pd




### 知识点搜集：
 - features
   - ***in_features:*** corresponds to the size of your input features.
   - ***out_features:*** corresponds to the size of your output, usually the number of classes for classification or 1 for regression.
   - ***hidden_features:*** corresponds to the size of your hidden state, where you set it as a hyperparameter.
 - You can set out_features to be the same as hidden_features so that the representations can be fed into the score predictor to produce the final positive/negative scores.

## PS:
1.inference层的作用，推断embedding
  - inference: 外层循环layers，内层循环sampling等采样
  - training: 相反(同时内层还有消息传递等)

In [8]:
#-------- ---- minibatch training ------ ------ #
import dgl
import torch.nn as nn
import torch.nn.functional as F

class TwoLayerGCN(nn.Module):
    def _init_(self, in_features, hidden_features, out_features):
        super()._init_()
        self.conv1 = dgl.nn.GraphConv(in_features, hidden_features)
        self.conv2 = dgl.nn.GraphConv(hidden_features, out_features)
    
    def forward(self, g, h):  # g->blocks
        h = F.relu(self.conv1(g, h))  # g->blocks[0]
        h = F.relu(self.conv2(g, h))  # g->blocks[1]
        return h

# heterogeneous 注意这其中的不同点：
self.conv1 = dgl.nn.heteroGraphConv({rel: dgl.nn.GraphConv() for rel in rels})

h = self.conv1(g, h)
h = {k: F.relu(v) for k, v in h.items()} # 字典形式
return h

# 训练部分
model.train()
logits = model(g, h)['user']
loss = F.cross_entropy(logits[train_mask], labels[train_mask])

print(loss.item())

2. API接口使用与参数使用
   - apply_edges

In [7]:
import dgl
import torch
## 使用apply_edges方法记录
g = dgl.heterograph({('user','plays','game'): ([0, 1, 1, 2], [0, 0, 2, 1])})
g.edges[('user','plays','game')].data['h'] = torch.ones(4, 5)
g.apply_edges(lambda edges: {'h': edges.data['h'] * 2})

# print(g.edges[('user','plays','game')].data['h'])
print(g.edata['h'])


tensor([[2., 2., 2., 2., 2.],
        [2., 2., 2., 2., 2.],
        [2., 2., 2., 2., 2.],
        [2., 2., 2., 2., 2.]])


In [17]:
import dgl
import torch

g = dgl.heterograph({
    ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2])),
    ('user', 'follows', 'game'): (torch.tensor([0, 1, 2]), torch.tensor([1, 2, 3])),
    ('user', 'plays', 'game'): (torch.tensor([1, 3]), torch.tensor([2, 3]))
})
print(g.canonical_etypes)

[('user', 'follows', 'game'), ('user', 'follows', 'user'), ('user', 'plays', 'game')]


In [21]:
t = [('user', 'follows', 'game'),
     ('user', 'follows', 'user'),
     ('user', 'plays', 'game')]
res = [etype for utype, etype, vtype in t]

print(res)

['follows', 'follows', 'plays']


3.建图 $\rightarrow$ 注意这里输入卷积层的特征和嵌入区别

In [1]:
import torch
import dgl
from dgl.data import RedditDataset

data = RedditDataset(self_loop=True)
g = data[0]


Using backend: pytorch


RuntimeError: Bool type is not supported by dlpack

In [6]:
# -*- coding: utf-8 -*-
import dgl
import torch

import numpy as np
import pandas as pd
import csv
# 设置种子
torch.manual_seed(0)
# 设定参数
num_nodes = 6
num_edges = 15
# 加载数据
src = []
dst = []

with open('signal_1.csv', 'r') as f:
    reader = csv.reader(f, delimiter=',')
    for line in reader:
        src.append(int(line[0]))
        dst.append(int(line[1]))




# 抄来的
class ConceptNetDataset(DGLDataset):
    def __init__(self, path, sep):
        self.path = path
        self.sep = sep
        super().__init__(name='concept_net')

    def process(self):
        bidirections = ["RelatedTo", "Synonym", "Antonym", "DistinctFrom",
                        "LocatedNear", "SimilarTo", "EtymologicallyRelatedTo"]
        data = pd.read_csv(self.path, sep=self.sep)
        # get all the entities
        nodes = pd.concat([data["e1"], data["e2"]], axis=0).unique()
        edges_type = data["rel"].unique().tolist()
        edges = {y: x for x, y in enumerate(edges_type)}
        entities = {y: x for x, y in enumerate(nodes)}
        # encode all entities
        data["e1"] = data["e1"].apply(lambda x: entities[x])
        data["e2"] = data["e2"].apply(lambda x: entities[x])

        # encode all entities in the nodes list
        def encode(x): return entities[x]
        nodes = [encode(x) for x in nodes]
        nodes = np.array(nodes)
        # create node labels
        node_labels = torch.from_numpy(nodes)

        # edge_features = torch.from_numpy(data['score'].to_numpy())
        node_type = "_N"  # '_N' can be replaced by an arbitrary name
        data_dict = dict()
        num_nodes_dict = {node_type: len(entities)}

        # create backlinks to node with certain edge types
        for bd in bidirections:
            aux = data[data["rel"] == bd].copy()
            col_list = list(aux)
            col_list[0], col_list[1] = col_list[1], col_list[0]
            aux.columns = col_list
            aux = aux[sorted(aux)]
            data = pd.concat([data, aux], axis=0, ignore_index=True)

        data.reset_index(drop=True)
        for e_t in edges_type:
            aux = data[data["rel"] == e_t]
            src = torch.from_numpy(aux['e1'].to_numpy())
            dst = torch.from_numpy(aux['e2'].to_numpy())
            data_dict[(node_type, e_t, node_type)] = (src, dst)

        self.graph = dgl.heterograph(data_dict, num_nodes_dict)
        for e_t in edges_type:
            # add the weitght to each node
            self.graph.edges[e_t].data["weight"] = torch.from_numpy(
                data[data["rel"] == e_t]['score'].to_numpy())
            # add the train mask
            e_len = len(data[data['rel'] == e_t])
            self.graph.edges[e_t].data['train_mask'] = torch.zeros(
                e_len, dtype=torch.bool).bernoulli(0.6)
        # add a feature to each node, the feature is the index of the word in the vocab
        self.graph.nodes['_N'].data["feature"] = node_labels
        # add the train tamsk to the nodes
        self.graph.nodes['_N'].data["train_mask"] = torch.zeros(
            len(entities), dtype=torch.bool).bernoulli(0.6)

        # Train val splti for node classification
        n_nodes = nodes.shape[0]
        n_train = int(n_nodes * 0.6)
        n_val = int(n_nodes * 0.2)
        train_mask = torch.zeros(n_nodes, dtype=torch.bool)
        val_mask = torch.zeros(n_nodes, dtype=torch.bool)
        test_mask = torch.zeros(n_nodes, dtype=torch.bool)
        train_mask[:n_train] = True
        val_mask[n_train:n_train + n_val] = True
        test_mask[n_train + n_val:] = True
        self.graph.ndata['train_mask'] = train_mask
        self.graph.ndata['val_mask'] = val_mask
        self.graph.ndata['test_mask'] = test_mask

    def __getitem__(self, i):
        return self.graph[i]

    def __len__(self):
        return 1

[2 2 1 1 0 2 2 1 0]
