In [1]:
import sys
sys.path.append("../HetGNN/code_gcn/")

graph_data_path_root = '../GraphData'
output_root_dir = '../HetGNN/ProcessedData_clean'

In [2]:
import json
from collections import defaultdict
from tqdm import tqdm
import pandas as pd
import numpy as np

## Edge Index CSV

In [5]:
edge_details = defaultdict(list)
for idx in range(0, 9):
    with open(f'{graph_data_path_root}/process{idx}.jsons', 'r') as fin:
        for line in tqdm(fin.readlines()):
            trace = json.loads(line)
            node_info = trace['node_info']
            for (src_id, dst_id), edge_type in zip(trace['edge_index'], trace['edge_attr']):
                edge_details['src_id'].append(int(src_id))
                edge_details['dst_id'].append(int(dst_id))
                edge_details['edge_type'].append(edge_type)
                edge_details['trace_id'].append(trace['trace_id'])
edge_details_df = pd.DataFrame(edge_details)
edge_details_df

100%|██████████| 14721/14721 [00:07<00:00, 1881.37it/s]
100%|██████████| 14721/14721 [00:09<00:00, 1576.59it/s]
100%|██████████| 14721/14721 [00:10<00:00, 1394.12it/s]
100%|██████████| 14721/14721 [00:11<00:00, 1315.37it/s]
100%|██████████| 14721/14721 [00:12<00:00, 1153.44it/s]
100%|██████████| 14721/14721 [00:14<00:00, 1019.04it/s]
100%|██████████| 14720/14720 [00:15<00:00, 959.88it/s] 
100%|██████████| 14720/14720 [00:17<00:00, 848.60it/s] 
100%|██████████| 14719/14719 [00:18<00:00, 793.59it/s] 


Unnamed: 0,src_id,dst_id,edge_type,trace_id
0,0,2,0,007efb9578bc4f1ab3eab03cb5188af5.38.1629255886...
1,2,3,0,007efb9578bc4f1ab3eab03cb5188af5.38.1629255886...
2,3,4,0,007efb9578bc4f1ab3eab03cb5188af5.38.1629255886...
3,4,5,0,007efb9578bc4f1ab3eab03cb5188af5.38.1629255886...
4,5,6,0,007efb9578bc4f1ab3eab03cb5188af5.38.1629255886...
...,...,...,...,...
29677741,14,15,0,ffc3e79734424b65942466e0d8d432e2.47.1628705868...
29677742,15,16,0,ffc3e79734424b65942466e0d8d432e2.47.1628705868...
29677743,16,17,0,ffc3e79734424b65942466e0d8d432e2.47.1628705868...
29677744,17,18,0,ffc3e79734424b65942466e0d8d432e2.47.1628705868...


In [7]:
with open(f'{output_root_dir}/trace_id_to_idx.json', 'r') as fin:
    trace_id_to_idx = json.load(fin)

In [9]:
edge_details_df['trace_id'] = edge_details_df['trace_id'].apply(lambda x: trace_id_to_idx[x])
edge_details_df

Unnamed: 0,src_id,dst_id,edge_type,trace_id
0,0,2,0,0
1,2,3,0,0
2,3,4,0,0
3,4,5,0,0
4,5,6,0,0
...,...,...,...,...
29677741,14,15,0,132484
29677742,15,16,0,132484
29677743,16,17,0,132484
29677744,17,18,0,132484


In [10]:
edge_details_df.to_csv(f'{output_root_dir}/edge_index.csv')

In [22]:
edge_details_df[edge_details_df.trace_id == 1][['src_id', 'dst_id']].values

array([[  0,   2],
       [  2,   3],
       [  3,   4],
       ...,
       [827, 761],
       [832,  72],
       [837,  73]])

## GCN Model

In [3]:
%load_ext autoreload
%autoreload 2

In [4]:
import torch_geometric
from data_loader import EventGraphDataset
from GCN_2 import HetGCN_2

In [5]:
model = HetGCN_2()

In [6]:
dataset = EventGraphDataset(
    node_feature_csv=f'{output_root_dir}/node_feature_norm.csv',
    edge_index_csv=f'{output_root_dir}/edge_index.csv',
    het_types=False,
    unzip=False
)

reading node features..
reading edge index..
done


In [7]:
dataset[0]

(tensor([[ 2.3552e-04,  2.6773e-04, -1.8384e-05,  ...,  0.0000e+00,
           0.0000e+00,  2.6262e-04],
         [ 2.3552e-04,  2.6773e-04, -1.8384e-05,  ...,  1.0313e-04,
           0.0000e+00,  2.6278e-04],
         [ 8.0982e-05,  2.6773e-04, -1.8384e-05,  ...,  2.0626e-04,
           0.0000e+00,  7.9132e-05],
         ...,
         [ 3.8716e-06,  1.2357e-04,  2.3899e-04,  ...,  2.0626e-04,
           4.9475e-04,  3.7832e-06],
         [ 2.2149e-04,  1.2357e-04,  1.1030e-04,  ...,  2.5782e-04,
           4.9622e-04,  2.2116e-04],
         [ 2.2149e-04,  1.2357e-04,  1.1030e-04,  ...,  2.5782e-04,
           4.9768e-04,  2.2116e-04]], device='cuda:0'),
 tensor([[  0,   2,   2,  ..., 557, 557, 558],
         [558, 559, 559,  ..., 700, 837,  73]], device='cuda:0'))

In [8]:
dataset[0][1].shape, dataset[0][0].shape

(torch.Size([2, 962]), torch.Size([841, 7]))

In [10]:
model = model.to(model.device)

In [11]:
model(dataset[0][0], dataset[0][1])

tensor([[-1.1074e-04,  3.4899e-05, -1.9909e-04,  ..., -6.8256e-05,
         -3.5533e-05,  1.4146e-04],
        [-1.3763e-04,  4.7052e-05, -2.0909e-04,  ..., -7.1569e-05,
         -4.3488e-05,  1.2540e-04],
        [-4.8396e-05,  6.4910e-05, -1.5033e-04,  ..., -6.2435e-05,
          1.8504e-05,  2.3289e-07],
        ...,
        [-1.4363e-04,  1.0350e-04, -1.3349e-04,  ..., -7.3318e-05,
         -7.2670e-07, -1.0341e-04],
        [-2.7034e-04,  6.4225e-05, -2.0524e-04,  ..., -9.4051e-05,
         -9.2819e-05,  7.3371e-05],
        [-2.1153e-04,  9.0652e-05, -1.7076e-04,  ..., -8.8840e-05,
         -2.5935e-05, -7.7717e-05]], device='cuda:0', grad_fn=<AddBackward0>)