# #1: Import Packages

In [1]:
# General 
import sys, numpy as np, pandas as pd, math, matplotlib.pyplot as plt, datetime, copy, os

# Pytorch, pytorch Geometric
import torch, torch_geometric
from torch_geometric.data import HeteroData

# Sklearn
import sklearn

sys.path.insert(1, '/home/ec2-user/SageMaker/repos/fredriks-thesis/python')
import helper_functions, graph_preprocessing, nn_models, hetero_models, graphSage_models, div_models

# #2: Settings

In [2]:
model_file_path = "/home/ec2-user/SageMaker/repos/fredriks-thesis/notebooks/09_model_performance_script/models/"

settings = {
    'dataset': 1e6
    ,'seed': 0}

# #3: Load Dataset and Data Preprocessing

In [3]:
filepath = '/home/ec2-user/SageMaker/s3/exploration-876679093433-ew1-initiative-pop-amlanalysis/data/fredriks-thesis/heterographs_01/'
filename = "heterograph_externalnodes_{:.0f}.pt".format(settings['dataset'])

data = torch.load(filepath+filename)

# Removing the attribute globalRiskScore
data['ind'].x = torch.cat((data['ind'].x[:,0:4], data['ind'].x[:,5:data['ind'].x.shape[1]]), 1)
data['org'].x = torch.cat((data['org'].x[:,0:3], data['org'].x[:,4:data['ind'].x.shape[1]]), 1)
#data['ind'].attr_names.remove('globalRiskScore')
#data['org'].attr_names.remove('globalRiskScore')

torch.manual_seed(settings['seed']) # Setting torch random state seed

# Create num_features variables
data['ind'].num_features = data['ind'].x.shape[1]
data['org'].num_features = data['org'].x.shape[1]
data['ext'].num_features = data['ext'].x.shape[1]

# Reversing all edges 
data = graph_preprocessing.reverse_edges(data)
# Applying log to node feature transaction amounts and edge feature transaction amounts: 
data = graph_preprocessing.apply_log_to_txns(data)
# Normalizing node features
data = graph_preprocessing.normalize_node_features(data)
# Scaling edge_attributes to be in range [0.01,1]
data = graph_preprocessing.scaling_edge_attr(data)


# Adding dummy-features for role-edges; ones for all edges
data[('ind', 'role', 'org')].edge_attr = torch.ones([data[('ind', 'role', 'org')].edge_index.shape[1],1], dtype = torch.float32)
data[('org', 'rev_role', 'ind')].edge_attr = torch.ones([data[('org', 'rev_role', 'ind')].edge_index.shape[1],1], dtype = torch.float32)

# Define device and transfer data to device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
#data = data.to(device)

# Degree features

In [4]:
from torch_geometric import utils
def hetero_degree(edge_index, N_in):
    idx_out, idx_in = edge_index
    degree_in = utils.degree(idx_in, N_in, dtype=torch.float)
    return degree_in

from torch_scatter import scatter_sum
def hetero_degree_weighted(edge_index, edge_weight, N_in):
    idx_out, idx_in = edge_index
    degree_in = scatter_sum(src = edge_weight, index = idx_in, dim_size = N_in)
    
    return degree_in

In [5]:
start_time = helper_functions.stopwatch()

ind_txn_ind_degree = hetero_degree(edge_index = data[('ind', 'txn', 'ind')].edge_index, N_in = data['ind'].x.shape[0])
org_txn_org_degree = hetero_degree(edge_index = data[('org', 'txn', 'org')].edge_index, N_in = data['org'].x.shape[0])
ind_txn_org_degree = hetero_degree(edge_index = data[('ind', 'txn', 'org')].edge_index, N_in = data['org'].x.shape[0])
org_txn_ind_degree = hetero_degree(edge_index = data[('org', 'txn', 'ind')].edge_index, N_in = data['ind'].x.shape[0])

ind_role_org_degree = hetero_degree(edge_index = data[('ind', 'role', 'org')].edge_index, N_in = data['org'].x.shape[0])

ind_txn_ind_degree_weighted = hetero_degree_weighted(edge_index = data[('ind', 'txn', 'ind')].edge_index, edge_weight = data[('ind', 'txn', 'ind')].edge_attr[:,0], N_in = data['ind'].x.shape[0])
org_txn_org_degree_weighted = hetero_degree_weighted(edge_index = data[('org', 'txn', 'org')].edge_index, edge_weight = data[('org', 'txn', 'org')].edge_attr[:,0], N_in = data['org'].x.shape[0])
ind_txn_org_degree_weighted = hetero_degree_weighted(edge_index = data[('ind', 'txn', 'org')].edge_index, edge_weight = data[('ind', 'txn', 'org')].edge_attr[:,0], N_in = data['org'].x.shape[0])
org_txn_ind_degree_weighted = hetero_degree_weighted(edge_index = data[('org', 'txn', 'ind')].edge_index, edge_weight = data[('org', 'txn', 'ind')].edge_attr[:,0], N_in = data['ind'].x.shape[0])

helper_functions.stopwatch(start_time)
helper_functions.sound_alert()

Done in: 0 min 0 sek


In [6]:
deg_ind_itxn_ind = hetero_degree(edge_index = data[('ind', 'txn', 'ind')     ].edge_index, N_in = data['ind'].x.shape[0])
deg_org_itxn_ind = hetero_degree(edge_index = data[('org', 'txn', 'ind')     ].edge_index, N_in = data['ind'].x.shape[0])
deg_ext_itxn_ind = hetero_degree(edge_index = data[('ext', 'txn', 'ind')     ].edge_index, N_in = data['ind'].x.shape[0])
deg_ind_otxn_ind = hetero_degree(edge_index = data[('ind', 'rev_txn', 'ind') ].edge_index, N_in = data['ind'].x.shape[0])
deg_org_otxn_ind = hetero_degree(edge_index = data[('org', 'rev_txn', 'ind') ].edge_index, N_in = data['ind'].x.shape[0])
deg_ext_otxn_ind = hetero_degree(edge_index = data[('ext', 'rev_txn', 'ind') ].edge_index, N_in = data['ind'].x.shape[0])
deg_org_role_ind = hetero_degree(edge_index = data[('org', 'rev_role', 'ind')].edge_index, N_in = data['ind'].x.shape[0])

wdeg_ind_itxn_ind = hetero_degree_weighted(edge_index = data[('ind', 'txn', 'ind')     ].edge_index, edge_weight = data[('ind', 'txn', 'ind')     ].edge_attr[:,0], N_in = data['ind'].x.shape[0])
wdeg_org_itxn_ind = hetero_degree_weighted(edge_index = data[('org', 'txn', 'ind')     ].edge_index, edge_weight = data[('org', 'txn', 'ind')     ].edge_attr[:,0], N_in = data['ind'].x.shape[0])
wdeg_ext_itxn_ind = hetero_degree_weighted(edge_index = data[('ext', 'txn', 'ind')     ].edge_index, edge_weight = data[('ext', 'txn', 'ind')     ].edge_attr[:,0], N_in = data['ind'].x.shape[0])
wdeg_ind_otxn_ind = hetero_degree_weighted(edge_index = data[('ind', 'rev_txn', 'ind') ].edge_index, edge_weight = data[('ind', 'rev_txn', 'ind') ].edge_attr[:,0], N_in = data['ind'].x.shape[0])
wdeg_org_otxn_ind = hetero_degree_weighted(edge_index = data[('org', 'rev_txn', 'ind') ].edge_index, edge_weight = data[('org', 'rev_txn', 'ind') ].edge_attr[:,0], N_in = data['ind'].x.shape[0])
wdeg_ext_otxn_ind = hetero_degree_weighted(edge_index = data[('ext', 'rev_txn', 'ind') ].edge_index, edge_weight = data[('ext', 'rev_txn', 'ind') ].edge_attr[:,0], N_in = data['ind'].x.shape[0])

deg_ind_itxn_org = hetero_degree(edge_index = data[('ind', 'txn', 'org')     ].edge_index, N_in = data['org'].x.shape[0])
deg_org_itxn_org = hetero_degree(edge_index = data[('org', 'txn', 'org')     ].edge_index, N_in = data['org'].x.shape[0])
deg_ext_itxn_org = hetero_degree(edge_index = data[('ext', 'txn', 'org')     ].edge_index, N_in = data['org'].x.shape[0])
deg_ind_otxn_org = hetero_degree(edge_index = data[('ind', 'rev_txn', 'org') ].edge_index, N_in = data['org'].x.shape[0])
deg_org_otxn_org = hetero_degree(edge_index = data[('org', 'rev_txn', 'org') ].edge_index, N_in = data['org'].x.shape[0])
deg_ext_otxn_org = hetero_degree(edge_index = data[('ext', 'rev_txn', 'org') ].edge_index, N_in = data['org'].x.shape[0])
deg_ind_role_org = hetero_degree(edge_index = data[('ind', 'role', 'org')    ].edge_index, N_in = data['org'].x.shape[0])

wdeg_ind_itxn_org = hetero_degree_weighted(edge_index = data[('ind', 'txn', 'org')     ].edge_index, edge_weight = data[('ind', 'txn', 'org')     ].edge_attr[:,0], N_in = data['org'].x.shape[0])
wdeg_org_itxn_org = hetero_degree_weighted(edge_index = data[('org', 'txn', 'org')     ].edge_index, edge_weight = data[('org', 'txn', 'org')     ].edge_attr[:,0], N_in = data['org'].x.shape[0])
wdeg_ext_itxn_org = hetero_degree_weighted(edge_index = data[('ext', 'txn', 'org')     ].edge_index, edge_weight = data[('ext', 'txn', 'org')     ].edge_attr[:,0], N_in = data['org'].x.shape[0])
wdeg_ind_otxn_org = hetero_degree_weighted(edge_index = data[('ind', 'rev_txn', 'org') ].edge_index, edge_weight = data[('ind', 'rev_txn', 'org') ].edge_attr[:,0], N_in = data['org'].x.shape[0])
wdeg_org_otxn_org = hetero_degree_weighted(edge_index = data[('org', 'rev_txn', 'org') ].edge_index, edge_weight = data[('org', 'rev_txn', 'org') ].edge_attr[:,0], N_in = data['org'].x.shape[0])
wdeg_ext_otxn_org = hetero_degree_weighted(edge_index = data[('ext', 'rev_txn', 'org') ].edge_index, edge_weight = data[('ext', 'rev_txn', 'org') ].edge_attr[:,0], N_in = data['org'].x.shape[0])


deg_ind_itxn_ext = hetero_degree(edge_index = data[('ind', 'txn', 'ext')     ].edge_index, N_in = data['ext'].x.shape[0])
deg_org_itxn_ext = hetero_degree(edge_index = data[('org', 'txn', 'ext')     ].edge_index, N_in = data['ext'].x.shape[0])
deg_ind_otxn_ext = hetero_degree(edge_index = data[('ind', 'rev_txn', 'ext') ].edge_index, N_in = data['ext'].x.shape[0])
deg_org_otxn_ext = hetero_degree(edge_index = data[('org', 'rev_txn', 'ext') ].edge_index, N_in = data['ext'].x.shape[0])

wdeg_ind_itxn_ext = hetero_degree_weighted(edge_index = data[('ind', 'txn', 'ext')     ].edge_index, edge_weight = data[('ind', 'txn', 'ext')     ].edge_attr[:,0], N_in = data['ext'].x.shape[0])
wdeg_org_itxn_ext = hetero_degree_weighted(edge_index = data[('org', 'txn', 'ext')     ].edge_index, edge_weight = data[('org', 'txn', 'ext')     ].edge_attr[:,0], N_in = data['ext'].x.shape[0])
wdeg_ind_otxn_ext = hetero_degree_weighted(edge_index = data[('ind', 'rev_txn', 'ext') ].edge_index, edge_weight = data[('ind', 'rev_txn', 'ext') ].edge_attr[:,0], N_in = data['ext'].x.shape[0])
wdeg_org_otxn_ext = hetero_degree_weighted(edge_index = data[('org', 'rev_txn', 'ext') ].edge_index, edge_weight = data[('org', 'rev_txn', 'ext') ].edge_attr[:,0], N_in = data['ext'].x.shape[0])

In [7]:
n_ind = data['ind'].x.shape[0]

degree_features_ind = torch.cat( (
      deg_ind_itxn_ind.view((n_ind,1))
    , deg_org_itxn_ind.view((n_ind,1))
    , deg_ext_itxn_ind.view((n_ind,1))
    , deg_ind_otxn_ind.view((n_ind,1))
    , deg_org_otxn_ind.view((n_ind,1))
    , deg_ext_otxn_ind.view((n_ind,1))
    , deg_org_role_ind.view((n_ind,1))
    
    , wdeg_ind_itxn_ind.view((n_ind,1))
    , wdeg_org_itxn_ind.view((n_ind,1))
    , wdeg_ext_itxn_ind.view((n_ind,1))
    , wdeg_ind_otxn_ind.view((n_ind,1))
    , wdeg_org_otxn_ind.view((n_ind,1))
    , wdeg_ext_otxn_ind.view((n_ind,1))
), dim = 1 )

n_org = data['org'].x.shape[0]

degree_features_org = torch.cat( (
      deg_ind_itxn_org.view((n_org,1))
    , deg_org_itxn_org.view((n_org,1))
    , deg_ext_itxn_org.view((n_org,1))
    , deg_ind_otxn_org.view((n_org,1))
    , deg_org_otxn_org.view((n_org,1))
    , deg_ext_otxn_org.view((n_org,1))
    , deg_ind_role_org.view((n_org,1))
    
    , wdeg_ind_itxn_org.view((n_org,1))
    , wdeg_org_itxn_org.view((n_org,1))
    , wdeg_ext_itxn_org.view((n_org,1))
    , wdeg_ind_otxn_org.view((n_org,1))
    , wdeg_org_otxn_org.view((n_org,1))
    , wdeg_ext_otxn_org.view((n_org,1))
), dim = 1 )

n_ext = data['ext'].x.shape[0]

degree_features_ext = torch.cat( (
      deg_ind_itxn_ext.view((n_ext,1))
    , deg_org_itxn_ext.view((n_ext,1))
    , deg_ind_otxn_ext.view((n_ext,1))
    , deg_org_otxn_ext.view((n_ext,1))

    , wdeg_ind_itxn_ext.view((n_ext,1))
    , wdeg_org_itxn_ext.view((n_ext,1))
    , wdeg_ind_otxn_ext.view((n_ext,1))
    , wdeg_org_otxn_ext.view((n_ext,1))
), dim = 1 )

In [8]:
my_filename_ind = "degfeatures_ind_{:.0f}.pt".format(settings['dataset'])
torch.save(degree_features_ind, filepath+my_filename_ind)

my_filename_org = "degfeatures_org_{:.0f}.pt".format(settings['dataset'])
torch.save(degree_features_org, filepath+my_filename_org)

my_filename_ext = "degfeatures_ext_{:.0f}.pt".format(settings['dataset'])
torch.save(degree_features_ext, filepath+my_filename_ext)

In [9]:
degree_features_ind

tensor([[0.0000, 1.0000, 0.0000,  ..., 0.0000, 0.0100, 0.0000],
        [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0100],
        [0.0000, 1.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        ...,
        [0.0000, 0.0000, 1.0000,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 1.0000,  ..., 0.0000, 0.0000, 0.0100],
        [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0100]])