## Load and sort data

In [None]:
import sys
from dataAnalysis.DataAnalysis import DataAnalysis
import pandas as pd
import cudf
import torch
 
sys.path.insert(0, "../")
data = pd.read_csv(r"../extdata/sbcdata.csv", header=0)
data_analysis = DataAnalysis(data)

In [None]:
data = pd.concat((data_analysis.get_training_data(), data_analysis.get_testing_data()))
max_Id = data["Id"].unique().max()
gw_data = data_analysis.get_gw_testing_data().copy(deep=True)
gw_data = gw_data.assign(Id=lambda x: x.Id + max_Id)
data = pd.concat((data, gw_data))
data = cudf.from_pandas(data)
data = data.sort_values("Id")
unique_ids = data["Id"].unique()
data = data.reset_index(drop=True)
data.pop("index")

## Directed graph

In [None]:
import numpy as np
import cupy as cp

source_edge_index = np.array([], dtype= cp.int32)
target_edge_index = np.array([], dtype= cp.int32)

j = 0
for Id, group in data.groupby("Id"):
    indices = group.index
    offset = indices[0]
    num_nodes = len(indices)
    edge_index = torch.zeros((2, sum(range(num_nodes + 1))), dtype=torch.long)+offset

    ## Self edges
    edge_index[:, 0:num_nodes] = (torch.arange(num_nodes) + offset).view(1, -1)
    idx = num_nodes
    for i in range(1, num_nodes):
        edge_index[1, idx:idx + i] = i+offset
        edge_index[0, idx:idx + i] = torch.arange(i)+offset
        idx += i
    
    source_edge_index = np.concatenate((source_edge_index, edge_index[0, :].numpy()))
    target_edge_index = np.concatenate((target_edge_index, edge_index[1, :].numpy()))
    j+=1
    if j % 1000 == 0:
        print(f"{str(j / unique_ids.shape[0] * 100)} %")

edge_index = np.asarray([np.asarray(source_edge_index), np.asarray(target_edge_index)])
edge_index = torch.tensor(edge_index)
pd.DataFrame(edge_index.numpy().transpose()).to_csv("directed_edge_index.csv", index=False)

## Reverse directed graph

In [None]:
rev_edge_index = torch.zeros_like(edge_index)
index = torch.LongTensor([1,0])
rev_edge_index[index] = edge_index

# pd.DataFrame(rev_edge_index.numpy()).to_csv("reverse_directed_edge_index.csv", index=False)

## Undirected graph

In [None]:
from torch_geometric.utils import to_undirected

undirected_edge_index = to_undirected(edge_index)
pd.DataFrame(undirected_edge_index.numpy()).to_csv("undirected_edge_index.csv", index=False)