# #1: Import Packages

In [None]:
# General 
import sys, numpy as np, pandas as pd, math, matplotlib.pyplot as plt, datetime, copy, os

# Pytorch, pytorch Geometric
import torch, torch_geometric
from torch_geometric.data import HeteroData

# Sklearn
import sklearn

sys.path.insert(1, '/home/ec2-user/SageMaker/repos/fredriks-thesis/python')
import helper_functions, graph_preprocessing, nn_models, hetero_models, graphSage_models, div_models

# #2: Settings

In [None]:
model_file_path = "/home/ec2-user/SageMaker/repos/fredriks-thesis/notebooks/09_model_performance_script/models/"

settings = {
    'dataset': 1e4
    ,'seed': 0}

# #3: Load Dataset and Data Preprocessing

In [None]:
filepath = '/home/ec2-user/SageMaker/s3/exploration-876679093433-ew1-initiative-pop-amlanalysis/data/fredriks-thesis/heterographs_01/'
filename = "heterograph_externalnodes_{:.0f}.pt".format(settings['dataset'])

data = torch.load(filepath+filename)

# Removing the attribute globalRiskScore
data['ind'].x = torch.cat((data['ind'].x[:,0:4], data['ind'].x[:,5:data['ind'].x.shape[1]]), 1)
data['org'].x = torch.cat((data['org'].x[:,0:3], data['org'].x[:,4:data['ind'].x.shape[1]]), 1)
#data['ind'].attr_names.remove('globalRiskScore')
#data['org'].attr_names.remove('globalRiskScore')

torch.manual_seed(settings['seed']) # Setting torch random state seed

# Create num_features variables
data['ind'].num_features = data['ind'].x.shape[1]
data['org'].num_features = data['org'].x.shape[1]
data['ext'].num_features = data['ext'].x.shape[1]

# Reversing all edges 
data = graph_preprocessing.reverse_edges(data)
# Applying log to node feature transaction amounts and edge feature transaction amounts: 
data = graph_preprocessing.apply_log_to_txns(data)
# Normalizing node features
data = graph_preprocessing.normalize_node_features(data)
# Scaling edge_attributes to be in range [0.01,1]
data = graph_preprocessing.scaling_edge_attr(data)


# Adding dummy-features for role-edges; ones for all edges
data[('ind', 'role', 'org')].edge_attr = torch.ones([data[('ind', 'role', 'org')].edge_index.shape[1],1], dtype = torch.float32)
data[('org', 'rev_role', 'ind')].edge_attr = torch.ones([data[('org', 'rev_role', 'ind')].edge_index.shape[1],1], dtype = torch.float32)

# Define device and transfer data to device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Create homogeneous graphs from input

In [None]:
num_ind = data['ind'].x.shape[0]
num_org = data['org'].x.shape[0]
num_ext = data['ext'].x.shape[0]

########################################################################
# data_homo_2
########################################################################
data_homo_ind = torch_geometric.data.Data(x=torch.ones(num_ind), edge_index=  data[('ind', 'txn', 'ind')].edge_index, edge_attr =  data[('ind', 'txn', 'ind')].edge_attr)
data_homo_org = torch_geometric.data.Data(x=torch.ones(num_org), edge_index=  data[('org', 'txn', 'org')].edge_index, edge_attr =  data[('org', 'txn', 'org')].edge_attr)

In [None]:
import networkx as nx

start_time_total = helper_functions.stopwatch()

# Transform the homogeneous graph to a networkX-graph
nx_data_homo_ind = torch_geometric.utils.to_networkx(data_homo_ind, to_undirected=False)
nx_data_homo_org = torch_geometric.utils.to_networkx(data_homo_org, to_undirected=False)

# Add edge weights to ind
for it in range(data_homo_ind.edge_index.shape[1]):
    edge = data_homo_ind.edge_index[:,it]
    my_from = int(edge[0])
    my_to = int(edge[1])
    nx_data_homo_ind[my_from][my_to]['weight'] = float(data_homo_ind.edge_attr[it,1])

    
# Add edge weights to org
for it in range(data_homo_org.edge_index.shape[1]):
    edge = data_homo_org.edge_index[:,it]
    my_from = int(edge[0])
    my_to = int(edge[1])
    nx_data_homo_org[my_from][my_to]['weight'] = float(data_homo_org.edge_attr[it,1])


helper_functions.stopwatch(start_time_total)
helper_functions.sound_alert()

# Eigenvector_centrality

In [None]:
start_time_total = helper_functions.stopwatch()

eigenvector_ind = nx.eigenvector_centrality(nx_data_homo_ind, max_iter = 100000, weight = "weight")
helper_functions.stopwatch(start_time_total)

eigenvector_org = nx.eigenvector_centrality(nx_data_homo_org, max_iter = 100000, weight = "weight")

helper_functions.stopwatch(start_time_total)
helper_functions.sound_alert()

# betweenness_centrality

In [None]:
frac_ind = 0.0004
frac_org = 0.002

one_calc = 11/math.pow(3356,2)
k_ind = math.ceil(num_ind*frac_ind)
k_org = math.ceil(num_org*frac_org)

estimate_time_ind = one_calc*k_ind*num_ind
print("k_ind: {}, Estimate time ind: {:.4f}s".format(k_ind, estimate_time_ind))

estimate_time_org = one_calc*k_org*num_org
print("k_org: {}, Estimate time org: {:.4f}s".format(k_org,estimate_time_org))

In [None]:
start_time_total = helper_functions.stopwatch()
betweenness_ind  = nx.betweenness_centrality(nx_data_homo_ind, k = k_ind, weight = "weight")

helper_functions.stopwatch(start_time_total)
helper_functions.sound_alert()

In [None]:
start_time_total = helper_functions.stopwatch()
betweenness_org  = nx.betweenness_centrality(nx_data_homo_org, k = k_org, weight = "weight")

helper_functions.stopwatch(start_time_total)
helper_functions.sound_alert()

In [None]:
betweenness_ind = torch.tensor(list(betweenness_ind.values()))
betweenness_ind = betweenness_ind.reshape(betweenness_ind.shape[0],1)
eigenvector_ind = torch.tensor(list(eigenvector_ind.values()))
eigenvector_ind = eigenvector_ind.reshape(eigenvector_ind.shape[0],1)
ind_tensor = torch.cat((betweenness_ind,eigenvector_ind), dim = 1)
my_filename_ind = "centralities_ind_{:.0f}.pt".format(settings['dataset'])
torch.save(ind_tensor, filepath+my_filename_ind)

betweenness_org = torch.tensor(list(betweenness_org.values()))
betweenness_org = betweenness_org.reshape(betweenness_org.shape[0],1)
eigenvector_org = torch.tensor(list(eigenvector_org.values()))
eigenvector_org = eigenvector_org.reshape(eigenvector_org.shape[0],1)
org_tensor = torch.cat((betweenness_org,eigenvector_org), dim = 1)
my_filename_org = "centralities_org_{:.0f}.pt".format(settings['dataset'])
torch.save(org_tensor, filepath+my_filename_org)