In [54]:
import dgl
import torch as th
import sys
import os
sys.path.append('./..')
import pandas as pd
import numpy as np
from pandarallel import pandarallel
pandarallel.initialize()
from dgl.data.utils import save_graphs
from dgl.data.utils import load_graphs

INFO: Pandarallel will run on 16 workers.
INFO: Pandarallel will use Memory file system to transfer data between the main process and workers.


In [145]:
def read_DBLP_data():
    loc = './../DBLP'
    fname_e = 'dblp_edges.csv'
    fname_n = 'dblp_nodes.csv'
    df_e = pd.read_csv(os.path.join(loc,fname_e),low_memory=False)
    df_n = pd.read_csv(os.path.join(loc,fname_n),low_memory=False)
    
    
    # replace the node id by synthetic id 
    def replace_id(val):
        num = val[1:]
        t = val[0]
        return df_n.loc[(df_n['type']==t) & (df_n['ID']==val)]['synID'].values.tolist()
        
    df_e['n1'] = df_e['n1'].parallel_apply(replace_id)
    df_e['n2'] = df_e['n2'].parallel_apply(replace_id)
    
    graph_data = {}
    print(set(df_e['etype']))
    for et in set(df_e['etype']):
        et_R = et[::-1]
        tmp = df_e.loc[df_e['etype']==et]
        n1 = tmp['n1'].values.tolist()
        n2 = tmp['n2'].values.tolist()
        _list = []
        _list_R = []
        for i,j in zip(n1,n2):
            _list.append((i[0], j[0]))
            _list_R.append((j[0], i[0]))
        graph_data [(et[0], et, et[1])] = _list 
        graph_data [(et[1], et_R, et[0])] = _list_R  
    return graph_data

In [146]:
graph_data = read_DBLP_data()
g = dgl.heterograph(graph_data)
print('Node types, edge types', g.ntypes, g.etypes)
print('Graph ::', g)

{'PA', 'PC', 'PT'}
Node types, edge types ['A', 'C', 'P', 'T'] ['AP', 'CP', 'PA', 'PC', 'PT', 'TP']
Graph :: Graph(num_nodes={'A': 4057, 'C': 20, 'P': 14328, 'T': 3590},
      num_edges={('A', 'AP', 'P'): 19645, ('C', 'CP', 'P'): 14328, ('P', 'PA', 'A'): 19645, ('P', 'PC', 'C'): 14328, ('P', 'PT', 'T'): 81823, ('T', 'TP', 'P'): 81823},
      metagraph=[('A', 'P', 'AP'), ('P', 'A', 'PA'), ('P', 'C', 'PC'), ('P', 'T', 'PT'), ('C', 'P', 'CP'), ('T', 'P', 'TP')])


In [147]:
SAVE_FILE = "./dblp_graph_obj.dgl"
save_graphs(SAVE_FILE, g)

In [148]:
g,_ = load_graphs(SAVE_FILE)

In [149]:
graph_obj = g[0]

In [150]:
print('Node types, edge types', graph_obj.ntypes, graph_obj.etypes)
print('Graph ::', graph_obj)

Node types, edge types ['A', 'C', 'P', 'T'] ['AP', 'CP', 'PA', 'PC', 'PT', 'TP']
Graph :: Graph(num_nodes={'A': 4057, 'C': 20, 'P': 14328, 'T': 3590},
      num_edges={('A', 'AP', 'P'): 19645, ('C', 'CP', 'P'): 14328, ('P', 'PA', 'A'): 19645, ('P', 'PC', 'C'): 14328, ('P', 'PT', 'T'): 81823, ('T', 'TP', 'P'): 81823},
      metagraph=[('A', 'P', 'AP'), ('P', 'A', 'PA'), ('P', 'C', 'PC'), ('P', 'T', 'PT'), ('C', 'P', 'CP'), ('T', 'P', 'TP')])


In [155]:
metapaths = {
    'C' : ['CP', 'PC' ],
    'T' : ['TP', 'PT' ]
}

In [159]:
RW_list =[]
for ntype, mp in metapaths.items():
    RW_mp = dgl.sampling.random_walk(
        graph_obj,
        metapath= mp * 2 ,
        nodes = graph_obj.nodes(ntype),
    )
    RW_list.append(RW_mp)

[(tensor([[    0,  1922,     0,  8783,     0],
          [    1, 13277,     1,  4944,     1],
          [    2,  8766,     2, 12121,     2],
          [    3,   333,     3,  6859,     3],
          [    4,  5923,     4,   255,     4],
          [    5,  6961,     5, 13735,     5],
          [    6,  8811,     6, 13174,     6],
          [    7,  5978,     7,  4593,     7],
          [    8,  8137,     8, 10946,     8],
          [    9,  6463,     9,  7147,     9],
          [   10, 10414,    10,  1839,    10],
          [   11, 13580,    11,  1583,    11],
          [   12,  4478,    12,  3728,    12],
          [   13,  4511,    13,  9931,    13],
          [   14,  8921,    14,  5794,    14],
          [   15, 13253,    15,  7825,    15],
          [   16,  4852,    16,   677,    16],
          [   17,  7245,    17,  6981,    17],
          [   18,  3727,    18,  2370,    18],
          [   19,  3554,    19,  3914,    19]]),
  tensor([1, 2, 1, 2, 1])),
 (tensor([[    0, 13955,  1218