按照论文NARS, 2020官方代码中的说明，生成ACM异构图的结点特征。
https://github.com/facebookresearch/NARS

In [8]:
import pickle 
import dgl 

with open('/Dataset/ACM/Processed/ACM_TransE_partial.dglhg.pkl', 'rb') as fp:
    hg = pickle.load(fp)
    
feat_paper = hg.nodes['paper'].data['feat']
feat_author = hg.nodes['author'].data['feat']
feat_field = hg.nodes['field'].data['feat']
label = hg.nodes['paper'].data['label']
train_mask = hg.nodes['paper'].data['train_mask']
val_mask = hg.nodes['paper'].data['val_mask']
test_mask = hg.nodes['paper'].data['test_mask']
PA_edge_index = hg.edges(etype='pa')
PF_edge_index = hg.edges(etype='pf')

hg = dgl.heterograph(
    {
        ('paper', 'PA', 'author'): PA_edge_index,
        ('author', 'AP', 'paper'): PA_edge_index[::-1],
        ('paper', 'PF', 'field'): PF_edge_index,
        ('field', 'FP', 'paper'): PF_edge_index[::-1],
    },
    num_nodes_dict = dict(
        paper = len(feat_paper),
        author = len(feat_author),
        field = len(feat_field),
    ),
)

hg.nodes['paper'].data['feat'] = feat_paper
hg.nodes['author'].data['feat'] = feat_author
hg.nodes['field'].data['feat'] = feat_field
hg.nodes['paper'].data['label'] = label 
hg.nodes['paper'].data['train_mask'] = train_mask
hg.nodes['paper'].data['val_mask'] = val_mask
hg.nodes['paper'].data['test_mask'] = test_mask

with open('/Dataset/ACM/Processed/ACM_TransE.dglhg.pkl', 'wb') as fp:
    pickle.dump(hg, fp)

hg 

Graph(num_nodes={'author': 17431, 'field': 73, 'paper': 4025},
      num_edges={('author', 'AP', 'paper'): 13407, ('field', 'FP', 'paper'): 4025, ('paper', 'PA', 'author'): 13407, ('paper', 'PF', 'field'): 4025},
      metagraph=[('author', 'paper', 'AP'), ('paper', 'author', 'PA'), ('paper', 'field', 'PF'), ('field', 'paper', 'FP')])

In [11]:
hg.num_nodes(), hg.num_edges()

(21529, 34864)

In [9]:
for ntype in hg.ntypes:
    print(ntype, hg.nodes[ntype].data['feat'].shape)

author torch.Size([17431, 128])
field torch.Size([73, 128])
paper torch.Size([4025, 1903])


In [10]:
from collections import Counter


def count_label(label, total):
    N = len(label)
    counter = Counter(label.tolist())
    d = dict(counter)
    
    print(f"{len(d)}类，{N} ({int(N * 100 / total)}%)")
    
    sum_ = sum(d.values())
    cnt_list = list(d.items())
    cnt_list.sort(key=lambda x: -x[1])
    
    str_list = []
    
    for lb, cnt in cnt_list:
        percent = int(cnt * 100 / sum_)
        str_list.append(f"{lb}: {cnt} ({percent}%)") 

    print(', '.join(str_list))
    
    
INFER_NTYPE = 'paper'

label = hg.nodes[INFER_NTYPE].data['label']
count_label(label, hg.num_nodes(INFER_NTYPE))

train_mask = hg.nodes[INFER_NTYPE].data['train_mask']
val_mask = hg.nodes[INFER_NTYPE].data['val_mask']
test_mask = hg.nodes[INFER_NTYPE].data['test_mask']
count_label(label[train_mask], hg.num_nodes(INFER_NTYPE))
count_label(label[val_mask], hg.num_nodes(INFER_NTYPE))
count_label(label[test_mask], hg.num_nodes(INFER_NTYPE))

3类，4025 (100%)
1: 1994 (49%), 0: 1061 (26%), 2: 970 (24%)
3类，808 (20%)
1: 400 (49%), 0: 213 (26%), 2: 195 (24%)
3类，401 (9%)
1: 198 (49%), 0: 106 (26%), 2: 97 (24%)
3类，2816 (69%)
1: 1396 (49%), 0: 742 (26%), 2: 678 (24%)
