-
Notifications
You must be signed in to change notification settings - Fork 0
/
loader.py
493 lines (453 loc) · 25.8 KB
/
loader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
import os
import re
import pickle as pkl
import numpy as np
import networkx as nx
import pandas as pd
from collections import defaultdict
import multiprocessing as mp
import scipy.sparse as sp
from sklearn.feature_extraction.text import CountVectorizer
from littleballoffur import SnowBallSampler, RandomWalkSampler
from tqdm import tqdm
entity2edge_set = defaultdict(set) # entity id -> set of (both incoming and outgoing) edges connecting to this entity
entity2edges = [] # each row in entity2edges is the sampled edges connecting to this entity
edge2entities = [] # each row in edge2entities is the two entities connected by this edge
edge2relation = [] # each row in edge2relation is the relation type of this edge
e2re = defaultdict(set) # entity index -> set of pair (relation, entity) connecting to this entity
G = nx.empty_graph()
S, HH = list(), dict()
node_mapping, not_in_g = list(), list()
bfs_subgraphs, bfs_contexts, bfs_e2nodes, bfs_e2relations, bfs_e2edges = dict(), dict(), dict(), dict(), dict()
dfs_subgraphs, dfs_contexts, dfs_e2nodes, dfs_e2relations, dfs_e2edges = dict(), dict(), dict(), dict(), dict()
null_entity, null_relation, null_edge = 0, 0, 0
def build_kg(train_data):
global entity2edge_set, entity2edges, edge2entities, edge2relation, e2re, null_entity, null_relation, null_edge
for edge_idx, triplet in enumerate(train_data):
head_idx, tail_idx, relation_idx = triplet
if args.use_context:
entity2edge_set[head_idx].add(edge_idx)
entity2edge_set[tail_idx].add(edge_idx)
edge2entities.append([head_idx, tail_idx])
edge2relation.append(relation_idx)
if args.use_path:
e2re[head_idx].add((relation_idx, tail_idx))
e2re[tail_idx].add((relation_idx, head_idx))
# To handle the case where a node does not appear in the training data (i.e., this node has no neighbor edge),
# we introduce a null entity (ID: n_entities), a null edge (ID: n_edges), and a null relation (ID: n_relations).
# entity2edge_set[isolated_node] = {null_edge}
# entity2edge_set[null_entity] = {null_edge}
# edge2entities[null_edge] = [null_entity, null_entity]
# edge2relation[null_edge] = null_relation
# The feature of null_relation is a zero vector. See _build_model() of model.py for details
if args.use_context or args.use_path:
edge2entities.append([null_entity, null_entity])
edge2relation.append(null_relation)
for i in range(len(node_id2index) + 1):
if i not in entity2edge_set:
entity2edge_set[i] = {null_edge}
# sampled_neighbors = np.random.choice(list(entity2edge_set[i]), size=args.neighbor_samples,
# replace=len(entity2edge_set[i]) < args.neighbor_samples)
# entity2edges.append(sampled_neighbors)
def get_h2t(train_triplets, valid_triplets, test_triplets):
head2tails = defaultdict(set)
for head, tail, relation in train_triplets + valid_triplets + test_triplets:
head2tails[head].add(tail)
return head2tails
def get_params_for_mp(n_triples):
n_cores = mp.cpu_count()
pool = mp.Pool(n_cores)
avg = n_triples // n_cores
range_list = []
start = 0
for i in range(n_cores):
num = avg + 1 if i < n_triples - avg * n_cores else avg
range_list.append([start, start + num])
start += num
#print(n_cores) #8
#print(pool) #<multiprocessing.pool.Pool object at 0x7f5e28a90bd0>
#print(range_list) #[[0, 1143], [1143, 2286], [2286, 3429], [3429, 4572], [4572, 5715], [5715, 6858], [6858, 8001], [8001, 9144]]
return n_cores, pool, range_list
def dfs(head, tails, e2re, max_path_len):
global G, S, HH, node_mapping, dfs_subgraphs, dfs_contexts, dfs_e2nodes, dfs_e2relations, dfs_e2edges, not_in_g
ht2paths = defaultdict(set); flag = False
if args.use_path:
if head not in dfs_subgraphs: dfs_subgraphs[head] = list()
for item in S:
if head in list(item.nodes()):
idx = S.index(item)
tmp = node_mapping[idx]
rel_head = tmp[head]
H = HH[idx]
flag = True
if not flag:
# print(head)
not_in_g.append(head)
return ht2paths
# H = nx.relabel_nodes(item, tmp)
''' Node path sampling '''
if H.degree()[rel_head] == 0 or (H.degree()[rel_head] == 2 or rel_head in list(nx.nodes_with_selfloops(H))):
g = nx.Graph()
g.add_node(null_entity, name="NULL", id=null_entity)
g.add_edge(null_entity, null_entity, name="NULL", id=null_edge, relation=null_relation, weight=1.0)
dfs_subgraphs[head].append(g)
for samples in range(args.path_samples):
for length in range(2, max_path_len + 1):
if H.degree()[rel_head] == 0 or (H.degree()[rel_head] == 2 or rel_head in list(nx.nodes_with_selfloops(H))):
continue
else:
try:
model = RandomWalkSampler(number_of_nodes=length)
nG = model.sample(H, rel_head)
except:
if min(H.degree()[rel_head])>1:
model = RandomWalkSampler(number_of_nodes=min(H.degree()[rel_head], length))
nG = model.sample(H, rel_head)
else:
continue
inv_map = {v: k for k, v in tmp.items()}
rev_nG = nx.relabel_nodes(nG, inv_map)
dfs_subgraphs[head].append(rev_nG)
for path_graphs in dfs_subgraphs[head]:
item_list = list()
for n in list(path_graphs.nodes()):
item_list.append(n)
dfs_contexts[head] = item_list
dfs_e2nodes[head] = item_list
rel_list = list()
edge_list = list()
for e in list(path_graphs.edges(data=True)):
rel_list.append(e[2]['relation'])
edge_list.append(e[2]['id'])
dfs_e2relations[head] = rel_list
dfs_e2edges[head] = edge_list
tail = item_list[-1]
if tail in tails: # if this path ends at tail
ht2paths[(head, tail)].add(tuple([i for i in rel_list]))
# print(ht2paths)
'''
defaultdict(<class 'set'>, {(8350, 2047): {(2,), (2, 0, 0)},
(8350, 8712): {(2,), (2, 2, 2), (2, 0, 0), (2, 2)},
(8350, 3893): {(0, 0, 2), (2,), (2, 0, 0), (2, 2)},
(8350, 5876): {(2,), (0, 0), (2, 2, 2), (2, 2)},
(8350, 7756): {(2,), (2, 0, 0)},
(8350, 8322): {(2, 0), (0,), (2, 2, 0)}})
'''
# print()
return ht2paths
# input: [(h1, {t1, t2 ...}), (h2, {t3 ...}), ...]
# output: {(h1, t1): paths, (h1, t2): paths, (h2, t3): paths, ...}
def count_all_paths(inputs):
e2re, max_path_len, head2tails, pid = inputs
ht2paths = {}
for i, (head, tails) in enumerate(head2tails):
ht2paths.update(dfs(head, tails, e2re, max_path_len))
# # print('pid %d: %d / %d' % (pid, i, len(head2tails)))
# print('pid %d done' % pid)
return ht2paths
def count_all_paths_with_mp(e2re, max_path_len, head2tails):
n_cores, pool, range_list = get_params_for_mp(len(head2tails))
results = pool.map(count_all_paths, zip([e2re] * n_cores,
[max_path_len] * n_cores,
[head2tails[i[0]:i[1]] for i in range_list],
range(n_cores)))
res = defaultdict(set)
for ht2paths in results:
res.update(ht2paths)
return res
def count_paths(triplets, ht2paths, train_set):
res = []
for head, tail, relation in triplets:
path_set = ht2paths[(head, tail)] # Extract all the paths with (h,t)
if (tail, head, relation) in train_set:
path_list = list(path_set)
else:
path_list = list(path_set - {tuple([relation])})
res.append([list(i) for i in path_list])
return res
def get_paths(train_triplets, valid_triplets, test_triplets):
directory = 'dataset/' + args.dataset + '/cache/'
length = str(args.max_path_len)
if not os.path.exists(directory):
os.mkdir(directory)
if os.path.exists(directory + 'train_dfs_' + length + '.pkl'):
print('loading paths from files ...')
train_paths = pkl.load(open(directory + 'train_dfs_' + length + '.pkl', 'rb'))
valid_paths = pkl.load(open(directory + 'val_dfs_' + length + '.pkl', 'rb'))
test_paths = pkl.load(open(directory + 'test_dfs_' + length + '.pkl', 'rb'))
else:
print('counting paths from head to tail ...')
head2tails = get_h2t(train_triplets, valid_triplets, test_triplets)
ht2paths = count_all_paths_with_mp(e2re, args.max_path_len, [(k, v) for k, v in head2tails.items()])
# print(ht2paths)
train_set = set(train_triplets)
train_paths = count_paths(train_triplets, ht2paths, train_set)
valid_paths = count_paths(valid_triplets, ht2paths, train_set)
test_paths = count_paths(test_triplets, ht2paths, train_set)
# print(train_paths)
# print(len(train_paths), len(valid_paths), len(test_paths)) # 89122 8000 8000: for context=3
print('dumping paths to files ...')
pkl.dump(train_paths, open(directory + 'train_dfs_' + length + '.pkl', 'wb'))
pkl.dump(valid_paths, open(directory + 'val_dfs_' + length + '.pkl', 'wb'))
pkl.dump(test_paths, open(directory + 'test_dfs_' + length + '.pkl', 'wb'))
# if using rnn and no path is found for the triplet, put an empty path into paths
if args.path_type == 'rnn':
for paths in train_paths + valid_paths + test_paths:
if len(paths) == 0:
paths.append([])
return train_paths, valid_paths, test_paths
def get_path_dict_and_length(train_paths, valid_paths, test_paths, null_relation, max_path_len):
path2id = {}
id2path = []
id2length = []
n_paths = 0
for paths_of_triplet in train_paths + valid_paths + test_paths:
for path in paths_of_triplet:
path_tuple = tuple(path)
if path_tuple not in path2id:
path2id[path_tuple] = n_paths
id2length.append(len(path))
id2path.append(path + [null_relation] * (max_path_len - len(path))) # padding
n_paths += 1
return path2id, id2path, id2length
def get_sparse_feature_matrix(non_zeros, n_cols):
features = sp.lil_matrix((len(non_zeros), n_cols), dtype=np.float64)
for i in range(len(non_zeros)):
for j in non_zeros[i]:
features[i, j] = +1.0
return features
def one_hot_path_id(train_paths, valid_paths, test_paths, path_dict):
res = []
for data in (train_paths, valid_paths, test_paths):
bop_list = [] # bag of paths
for paths in data:
bop_list.append([path_dict[tuple(path)] for path in paths])
res.append(bop_list)
# print(res)
return [get_sparse_feature_matrix(bop_list, len(path_dict)) for bop_list in res]
def sample_paths(train_paths, valid_paths, test_paths, path_dict, path_samples):
res = []
for data in [train_paths, valid_paths, test_paths]:
path_ids_for_data = []
for paths in data:
path_ids_for_triplet = [path_dict[tuple(path)] for path in paths]
sampled_path_ids_for_triplet = np.random.choice(
path_ids_for_triplet, size=path_samples, replace=len(path_ids_for_triplet) < path_samples)
path_ids_for_data.append(sampled_path_ids_for_triplet)
path_ids_for_data = np.array(path_ids_for_data, dtype=np.int32)
res.append(path_ids_for_data)
return res
def load_data(model_args):
global args, node_id2index, edge_type2index, G, S, HH, node_mapping, bfs_subgraphs, bfs_contexts, not_in_g, \
bfs_e2nodes, bfs_e2relations, bfs_e2edges, dfs_e2nodes, dfs_e2relations, dfs_e2edges, null_entity, null_relation, null_edge
args = model_args
''' Loading networked data from pickle file. '''
network = pkl.load(open('dataset/{}.pkl'.format(args.dataset), "rb"))
node_id2index = network["node_id2index"]
edge_type2index = network["edge_type2index"]
train_edges = network["splits"]['train_edges']
val_edges = network["splits"]['val_edges']
test_edges = network["splits"]['test_edges']
''' Making the network undirected. '''
# train_edges = train_edges + ([(tup[1], tup[0], tup[2]) for tup in train_edges])
# val_edges = val_edges + ([(tup[1], tup[0], tup[2]) for tup in val_edges])
# test_edges = test_edges + ([(tup[1], tup[0], tup[2]) for tup in test_edges])
print('Reading entities and relations ...')
print('Reading train, validation, and test edges ...')
print('Processing the heterogeneous graph ...')
# print(len(entity2edges)) #9204
''' Creating a graph for sampling contexts and paths. '''
edges = [train_edges, val_edges, test_edges]
all_edges = train_edges
all_edges += val_edges
all_edges += test_edges
null_entity = len(node_id2index)
null_relation = len(edge_type2index)
null_edge = len(all_edges)
# print(len(train_edges), len(val_edges), len(test_edges)) # 44561 4000 4000
''' Storing train-edge related statistics. '''
build_kg(train_edges)
''' Build train & whole graph. '''
df = pd.DataFrame(all_edges, columns=['target', 'source', 'relation']) # input edge format is: h-t-r
G = nx.from_pandas_edgelist(df, 'target', 'source', ['relation'])
df1 = pd.DataFrame(train_edges, columns=['target', 'source', 'relation']) # input edge format is: h-t-r
G1 = nx.from_pandas_edgelist(df1, 'target', 'source', ['relation'])
# print(nx.number_of_isolates(G)) #147 The below print statements are for train data
# comp_list = [len(c) for c in sorted(nx.connected_components(G), key=len, reverse=True)]
# print(comp_list) #DDB: [9053, 2, 2] Rest are 147 singleton nodes
for edge_idx, triplet in enumerate(all_edges): # all_edges related attributes are stored.
head_idx, tail_idx, relation_idx = triplet
G[head_idx][tail_idx]['weight'] = 1.0
G[head_idx][tail_idx]['id'] = edge_idx
#if 'id' not in G[head_idx]: G[head_idx]['id'] = head_idx
#if 'id' not in G[tail_idx]: G[tail_idx]['id'] = tail_idx
G[head_idx][tail_idx]['relation'] = relation_idx
G[head_idx][tail_idx]['name'] = "N_NULL"
for edge_idx, triplet in enumerate(train_edges): # train_edges related attributes are stored.
head_idx, tail_idx, relation_idx = triplet
G1[head_idx][tail_idx]['weight'] = 1.0
G1[head_idx][tail_idx]['id'] = edge_idx
#if 'id' not in G1[head_idx]: G1[head_idx]['id'] = head_idx
#if 'id' not in G1[tail_idx]: G1[tail_idx]['id'] = tail_idx
G1[head_idx][tail_idx]['relation'] = relation_idx
G1[head_idx][tail_idx]['name'] = "N_NULL"
for i in range(len(node_id2index) + 1):
if i in list(nx.isolates(G)): #nx.is_isolate(G, i):
G.add_edge(i, i, name="NULL", id=null_edge, relation=null_relation, weight=1.0)
elif i not in G.nodes(): # i in list(nx.isolates(G)):
G.add_node(i, name="NULL", id=null_entity)
G.add_edge(i, i, name="NULL", id=null_edge, relation=null_relation, weight=1.0)
if i in list(nx.isolates(G1)): #nx.is_isolate(G, i):
G1.add_edge(i, i, name="NULL", id=null_edge, relation=null_relation, weight=1.0)
elif i not in G1.nodes(): # i in list(nx.isolates(G)):
G1.add_node(i, name="NULL", id=null_entity)
G1.add_edge(i, i, name="NULL", id=null_edge, relation=null_relation, weight=1.0)
''' Sampling for all connected components of the underlying graph. '''
S = [G.subgraph(c).copy() for c in nx.connected_components(G)] #all subgraphs
idx = 0
for item in S:
tmp = {}
for i, I in enumerate(list(item.nodes())): tmp[I] = i
node_mapping.append(tmp) #absolute->relative node id mapping
HH[idx] = nx.relabel_nodes(item, tmp, copy=True) #mapped subgraphs
idx += 1
S1, HH1 = list(), dict()
node_mapping1 = list()
S1 = [G1.subgraph(c).copy() for c in nx.connected_components(G1)]
idx = 0
for item in S1:
tmp = {}
for i, I in enumerate(list(item.nodes())): tmp[I] = i
node_mapping1.append(tmp)
HH1[idx] = nx.relabel_nodes(item, tmp, copy=True)
idx += 1
''' Data-structures to store path-related information. '''
bfs_e2nodes = {new_list: [] for new_list in range(len(list(G.nodes())))}
bfs_e2relations = {new_list: [] for new_list in range(len(list(G.nodes())))}
bfs_e2edges = {new_list: [] for new_list in range(len(list(G.nodes())))}
dfs_e2nodes = {new_list: [] for new_list in range(len(list(G.nodes())))}
dfs_e2relations = {new_list: [] for new_list in range(len(list(G.nodes())))}
dfs_e2edges = {new_list: [] for new_list in range(len(list(G.nodes())))}
''' Checking for pre-existing node contexts. '''
directory = 'dataset/' + args.dataset + '/cache/'
length = str(args.neighbor_samples)
if args.use_context:
if os.path.exists(directory + 'train_bfs_' + length + '.pkl'):
print('Loading contexts from files ...')
bfs_e2edges = pkl.load(open(directory + 'train_bfs_' + length + '.pkl', 'rb'))
else:
''' Node context sampling. '''
for j, n in enumerate(tqdm(G1.nodes())):
if n not in bfs_subgraphs: bfs_subgraphs[n] = list()
for item in S1:
if n in list(item.nodes()):
idx = S1.index(item)
tmp = node_mapping1[idx]
rel_head = tmp[n]
H = HH1[idx]
if H.degree()[rel_head] == 0 or (
H.degree()[rel_head] == 2 or rel_head in list(nx.nodes_with_selfloops(H))):
g = nx.Graph()
g.add_node(null_entity, name="NULL", id=null_entity)
g.add_edge(null_entity, null_entity, name="NULL", id=null_edge, relation=null_relation, weight=1.0)
bfs_subgraphs[n].append(g)
else:
try:
model = SnowBallSampler(number_of_nodes=args.neighbor_samples)
nG = model.sample(H, rel_head)
except:
model = SnowBallSampler(number_of_nodes=min(H.degree()[rel_head], args.neighbor_samples))
nG = model.sample(H, rel_head)
inv_map = {v: k for k, v in tmp.items()}
rev_nG = nx.relabel_nodes(nG, inv_map)
bfs_subgraphs[n].append(rev_nG)
for path_graphs in bfs_subgraphs[n]:
item_list = list()
for m in list(path_graphs.nodes()):
item_list.append(m)
bfs_contexts[n] = item_list
bfs_e2nodes[n] = item_list
rel_list = list()
edge_list = list()
for e in list(path_graphs.edges(data=True)):
rel_list.append(e[2]['relation'])
edge_list.append(e[2]['id'])
bfs_e2relations[n] = rel_list
bfs_e2edges[n] = edge_list
print('Dumping paths to files ...')
pkl.dump(bfs_e2edges, open(directory + 'train_bfs_' + length + '.pkl', 'wb'))
# entity2edges = bfs_e2edges
for key, value in bfs_e2edges.items():
# t_list = []
# print(value)
# for item in value:
# t_list.append(int(item))
# entity2edges.append(np.array(t_list).astype(int))
# print()
# entity2edges.append(t_list)
if len(value) < args.neighbor_samples:
value += [null_edge] * (args.neighbor_samples - len(value))
elif len(value) > args.neighbor_samples: # It can happen since we are sampling nodes not edges
value = value[:args.neighbor_samples]
# print(np.array(value).shape)
entity2edges.append(np.array(value).astype(np.int32))
neighbor_params = [np.array(entity2edges).reshape((len(bfs_e2edges.keys()), args.neighbor_samples)), np.array(edge2entities), np.array(edge2relation)]
else:
neighbor_params = None
# print(neighbor_params[0])
''' {9201: [72724, 72734, 46927, 72726, 72725, 72723, 72728, 72722, 72729, 72727],
9202: [55904, 52017, 52019, 87461, 68982, 40029, 68310, 73046, 73044, 39972, 73045], 9203: [73122]} '''
# print(neighbor_params)
''' [array([[ 0, 2915, 2916, ..., 11495, 16778, 11114],
[ 28, 34, 35, ..., 42903, 42571, 44561],
[11503, 1127, 11509, ..., 5945, 1116, 5948],
...,
[36163, 36173, 10366, ..., 36161, 36168, 36166],
[19343, 15456, 15458, ..., 36485, 36483, 3411],
[44561, 44561, 44561, ..., 44561, 44561, 44561]], dtype=int32), array([[ 0, 1],
[ 2, 1],
[ 4, 1],
...,
[2458, 633],
[2260, 6810],
[9203, 9203]]), array([ 0, 0, 0, ..., 12, 0, 14])]
'''
if args.use_path:
train_paths, valid_paths, test_paths = get_paths(train_edges, val_edges, test_edges)
# print(not_in_g)
print(train_paths)
path2id, id2path, id2length = get_path_dict_and_length(train_paths, valid_paths, test_paths, len(edge_type2index), args.max_path_len)
if args.path_type == 'embedding':
print('transforming paths to one hot IDs ...')
paths = one_hot_path_id(train_paths, valid_paths, test_paths, path2id)
path_params = [len(path2id)]
elif args.path_type == 'rnn':
paths = sample_paths(train_paths, valid_paths, test_paths, path2id, args.path_samples)
path_params = [id2path, id2length]
else:
raise ValueError('unknown path type')
else:
paths = [None] * 3
path_params = None
# print("path2id", path2id)
'''path2id {(3, 2, 3): 0, (4, 2): 1, (2, 4): 2, (4, 4): 3, (2, 2): 4, (11, 11, 11): 5, (11, 2): 6, (2, 3): 7, (4, 12): 8, (12, 2): 9, (7, 8): 10, (6, 7): 11, (7, 7): 12, (8, 2): 13, (3, 2): 14, (2, 11): 15, (12, 12, 11): 16, (2, 2, 12): 17, (6, 6): 18, (7, 2): 19, (4, 4, 4): 20, (0, 11): 21, (3, 2, 2): 22, (5, 4): 23, (2, 2, 2): 24, (2, 12): 25, (6, 0): 26, (0, 8): 27, (12, 2, 2): 28, (0, 0, 12): 29, (12, 4): 30, (2, 11, 2): 31, (2, 0): 32, (11, 4): 33, (2, 12, 2): 34, (9, 8, 8): 35, (8, 8): 36, (2, 5): 37, (2, 2, 5): 38, (5, 0): 39, (3, 3): 40, (4, 6): 41, (2, 2, 3): 42, (2, 7): 43, (4, 5, 5): 44, (5, 5): 45, (2, 3, 4): 46, (4, 5): 47, (0, 2): 48, (11, 4, 4): 49, (6, 2): 50, (3, 0, 0): 51, (8, 12): 52, (2, 8): 53, (2,): 54, (4, 2, 2): 55, (12, 12): 56, (2, 2, 11): 57, (12, 8): 58, (2, 3, 3): 59, (5, 2): 60, (10, 2): 61, (2, 4, 2): 62, (2, 7, 2): 63, (7, 2, 2): 64, (5, 2, 2): 65, (11, 0, 0): 66, (0, 0): 67, (0, 11, 0): 68, (0, 12, 0): 69, (0, 2, 0): 70, (1, 0): 71, (0, 1): 72, (4, 0, 0): 73, (2, 0, 0): 74, (0, 4, 0): 75, (2, 10): 76, (11, 2, 2): 77, (10, 12): 78, (2, 5, 2): 79, (6, 4): 80, (8, 9): 81, (0, 9, 0): 82, (6, 8): 83, (4, 2, 4): 84, (12, 0): 85, (12, 8, 8): 86, (2, 6): 87, (12, 6): 88, (11, 11): 89, (10, 10): 90, (0, 4): 91, (9, 8): 92, (9, 9): 93, (2, 9): 94, (8, 12, 8): 95, (8, 9, 8): 96, (2, 4, 5): 97, (4, 8): 98, (1, 1): 99, (2, 2, 4): 100}
'''
# print("id2path", id2path)
'''id2path [[3, 2, 3], [4, 2, 14], [2, 4, 14], [4, 4, 14], [2, 2, 14], [11, 11, 11], [11, 2, 14], [2, 3, 14], [4, 12, 14], [12, 2, 14], [7, 8, 14], [6, 7, 14], [7, 7, 14], [8, 2, 14], [3, 2, 14], [2, 11, 14], [12, 12, 11], [2, 2, 12], [6, 6, 14], [7, 2, 14], [4, 4, 4], [0, 11, 14], [3, 2, 2], [5, 4, 14], [2, 2, 2], [2, 12, 14], [6, 0, 14], [0, 8, 14], [12, 2, 2], [0, 0, 12], [12, 4, 14], [2, 11, 2], [2, 0, 14], [11, 4, 14], [2, 12, 2], [9, 8, 8], [8, 8, 14], [2, 5, 14], [2, 2, 5], [5, 0, 14], [3, 3, 14], [4, 6, 14], [2, 2, 3], [2, 7, 14], [4, 5, 5], [5, 5, 14], [2, 3, 4], [4, 5, 14], [0, 2, 14], [11, 4, 4], [6, 2, 14], [3, 0, 0], [8, 12, 14], [2, 8, 14], [2, 14, 14], [4, 2, 2], [12, 12, 14], [2, 2, 11], [12, 8, 14], [2, 3, 3], [5, 2, 14], [10, 2, 14], [2, 4, 2], [2, 7, 2], [7, 2, 2], [5, 2, 2], [11, 0, 0], [0, 0, 14], [0, 11, 0], [0, 12, 0], [0, 2, 0], [1, 0, 14], [0, 1, 14], [4, 0, 0], [2, 0, 0], [0, 4, 0], [2, 10, 14], [11, 2, 2], [10, 12, 14], [2, 5, 2], [6, 4, 14], [8, 9, 14], [0, 9, 0], [6, 8, 14], [4, 2, 4], [12, 0, 14], [12, 8, 8], [2, 6, 14], [12, 6, 14], [11, 11, 14], [10, 10, 14], [0, 4, 14], [9, 8, 14], [9, 9, 14], [2, 9, 14], [8, 12, 8], [8, 9, 8], [2, 4, 5], [4, 8, 14], [1, 1, 14], [2, 2, 4]]
'''
# print("id2length", id2length)
'''id2length [3, 2, 2, 2, 2, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 2, 2, 3, 2, 3, 2, 3, 2, 2, 2, 3, 3, 2, 3, 2, 2, 3, 3, 2, 2, 3, 2, 2, 2, 3, 2, 3, 2, 3, 2, 2, 3, 2, 3, 2, 2, 1, 3, 2, 3, 2, 3, 2, 2, 3, 3, 3, 3, 3, 2, 3, 3, 3, 2, 2, 3, 3, 3, 2, 3, 2, 3, 2, 2, 3, 2, 3, 2, 3, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 2, 2, 3]
'''
# print(path_params) #[144], [101]
print(paths)
'''
[<89122x144 sparse matrix of type '<class 'numpy.float64'>'
with 7282 stored elements in List of Lists format>, <8000x144 sparse matrix of type '<class 'numpy.float64'>'
with 792 stored elements in List of Lists format>, <8000x144 sparse matrix of type '<class 'numpy.float64'>'
with 792 stored elements in List of Lists format>]
[<44561x101 sparse matrix of type '<class 'numpy.float64'>'
with 2136 stored elements in List of Lists format>, <4000x101 sparse matrix of type '<class 'numpy.float64'>'
with 234 stored elements in List of Lists format>, <4000x101 sparse matrix of type '<class 'numpy.float64'>'
with 234 stored elements in List of Lists format>]
'''
return edges, paths, len(edge_type2index), neighbor_params, path_params