In [7]:
%reload_ext autoreload
%autoreload 2

In [8]:
import pandas as pd
import networkx as nx
import numpy as np

import torch
import torch.nn.functional as F
from torch_geometric_temporal.nn.recurrent import DCRNN
from torch_geometric_temporal.signal import (
    temporal_signal_split,
    StaticGraphTemporalSignal,
)

from tqdm import tqdm

from functools import partial
import os
import torch.optim as optim
import pickle

from ray import tune
from ray.tune import CLIReporter
from ray.tune.schedulers import ASHAScheduler

In [9]:
df = pd.read_csv('datasets/trade_data.csv')[['From', 'To', 'Period', 'Value']]
aggregates=df
aggregates = aggregates.rename(columns={'From':'source','To':'target','Period':'timestamp'})
aggregates['weight'] = (aggregates.Value-aggregates.Value.mean())/aggregates.Value.std()
# create node indices
translator = dict(zip(a:=pd.concat([aggregates.source, aggregates.target]).unique(), range(len(a))))
aggregates['id_source'] = aggregates.source.map(lambda x: translator[x])
aggregates['id_target'] = aggregates.target.map(lambda x: translator[x])
# Generate network
network = nx.from_pandas_edgelist(aggregates, source='id_source', target='id_target', edge_attr='weight', create_using=nx.DiGraph)

# Transform to line graph
line_graph = nx.line_graph(network)
# create final node indices
line_node_list = list(line_graph.nodes())
line_translator = dict(zip(a:=list(set(line_node_list)), range(len(a))))


In [10]:
# Create edge list and edge weights
# Edge weights are average number of emails of connecting node

edges = [[],[]]
edge_weights = []
for source, target in tqdm(list(line_graph.edges)):
    edges[0].append(line_translator[source])
    edges[1].append(line_translator[target])
    edge_weights.append(aggregates[((aggregates.id_source==source[1])|(aggregates.id_target==source[1]))].weight.mean())

edges = np.array(edges)
edge_weights = np.array(edge_weights)

100%|██████████| 222157/222157 [04:10<00:00, 888.62it/s] 


In [11]:
# Generate target arrays
numyears = aggregates.timestamp.nunique()
years = sorted(aggregates.timestamp.unique().tolist())
aggregates = aggregates.set_index(['id_source', 'id_target', 'timestamp'])
line_retranslator = {k:v for v, k in line_translator.items()}

targets = []
for year in tqdm(years):
    daily_targets = []
    for node in range(len(line_node_list)):
        source, target = line_retranslator[node]
        try:
            daily_targets.append(aggregates.loc[(source, target, year)].weight)
        except KeyError:
            daily_targets.append(0)
    targets.append(daily_targets)

# Generate features (trade in last 12 months)
feat_num = 12
features = []

for year in tqdm(range(numyears-feat_num)):
    daily_features = []
    for node in range(len(line_node_list)):
        node_feat = []
        for feature_i in range(feat_num):
            node_feat.append(targets[year+feature_i][node])
        daily_features.append(node_feat)
    features.append(daily_features)

# To array
targets = np.array(targets[feat_num:])
features = np.array(features)

# Create data iterator
data = StaticGraphTemporalSignal(edges, edge_weights, features, targets)

100%|██████████| 119/119 [00:22<00:00,  5.40it/s]
100%|██████████| 107/107 [00:00<00:00, 120.38it/s]


In [12]:
pickle.dump(data, open('data.pckl', 'wb'))