# Imports

In [None]:
from flib.preprocess.feature_engineering import cal_features
import sys
import os
import json
import random
import numpy as np
import torch
import multiprocessing as mp
from flib.federated_learning.modules import LogisticRegressor
from flib.federated_learning.criterions import ClassBalancedLoss
from flib.federated_learning.client import Client
from flib.federated_learning.server import Server
import multiprocessing as mp

# Functions

In [None]:
def set_random_seed(seed:int=1):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    ## NOTE: If you want every run to be exactly the same each time
    ##       uncomment the following lines
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

# Set seed and multiprocessing context

In [None]:
set_random_seed(42)
mp.set_start_method('spawn')

# Generate data

In [None]:
pwd = '/home/edvin/Desktop/flib/'
config_path = pwd + 'flib/AMLsim/paramFiles/10K_accts/conf.json'

os.system(f'cd ../flib/AMLsim && python3 scripts/transaction_graph_generator.py "{config_path}"')
os.system(f'cd ../flib/AMLsim && mvn exec:java -Dexec.mainClass=amlsim.AMLSim -Dexec.args="{config_path}"')

with open(config_path, 'r') as f:
    config = json.load(f)
tx_log_path = os.path.join(config['output']['directory'], config['general']['simulation_name'], config['output']['transaction_log'])

print(f'txs log: {tx_log_path}')

# Feature engineering

In [None]:
tx_log_path = 'outputs/10K_accts/tx_log.csv'
dfs = cal_features('../flib/AMLsim/' + tx_log_path, windows=(3, 10), overlap=0.9, include_edges=False)

In [None]:
datasets = []
for df in dfs:
    train_df, test_df = df
    train_node_df, train_edge_df = train_df
    test_node_df, test_edge_df = test_df
    display(train_node_df.loc[0:0])
    train_node_df = train_node_df.drop(columns=['account', 'bank'])
    test_node_df = test_node_df.drop(columns=['account', 'bank'])
    datasets.append((train_node_df, test_node_df))
    

In [None]:
# hyperparameters
log_predictions = True
n_rounds = 301
eval_every = 30
n_rounds_no_aggregation = 0
Module = LogisticRegressor 
Optimizer = torch.optim.SGD
Criterion = ClassBalancedLoss
n_epochs = 1 
batch_size = 128
n_workers = 4
optimizer_params = {'momentum': 0.0, 'dampening': 0.0, 'weight_decay': 0.0}
criterion_params = {'beta': 0.9999, 'loss_type': 'sigmoid'}
lr = 0.001

os.makedirs(f'results/10K_accts', exist_ok=True)
    
# init clients
clients = []
for i, dataset in enumerate(datasets):
    trainset, testset = dataset
    clients.append(Client(
        name=f'client_{i}',
        device=torch.device('cuda:0'),
        trainset=trainset,
        valset=None, 
        testset=testset, 
        Module=Module, 
        Optimizer=Optimizer, 
        Criterion=Criterion, 
        optimizer_params=optimizer_params,
        criterion_params=criterion_params,
        lr=lr,
        n_epochs=n_epochs,
        batch_size=batch_size
    ))
    
# init server
input_dim = len(datasets[0][0].columns) - 1
output_dim = len(datasets[0][0][datasets[0][0].columns[-1]].unique())
module = Module(input_dim=input_dim, output_dim=output_dim)
model = module.state_dict()
server = Server(clients=clients, model=model, n_workers=n_workers, log_predictions=log_predictions, log_file=f'results/10K_accts/log')
    
# train
print(f'running experiment: 10K_accts')
avg_losses = server.run(n_rounds=n_rounds, eval_every=eval_every, n_rounds_no_aggregation=n_rounds_no_aggregation)
print()