In [1]:

import os.path
import wget
import bz2
import shutil
import numpy as np

In [2]:
os.environ["PROTOBUF_PYTHON_IMPLEMENTATION"] = "python"
os.environ["CUDA_LAUNCH_BLOCKING"]="1"

In [2]:

import xgboost as xgb
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score, mean_squared_error, log_loss
from sklearn.model_selection import KFold

In [2]:
import torch

In [5]:
from torch.utils.tensorboard import SummaryWriter
from datetime import datetime

2023-09-13 20:53:11.192749: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.


In [3]:
from utils.data_utils import load_data_cross_validation, load_movielens, load_creditcardfraud
from model.fl_model import VerticalFLModel
from model.single_party_model import SingleParty
from model.split_nn_model import SplitNNModel

In [4]:
from model.models import FC
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

In [8]:
# from ydata_profiling import ProfileReport
# import pandas as pd

In [9]:
## Profiling
# df = pd.read_csv("data/creditcard/creditcard.csv")
# profile = ProfileReport(df, title="Pandas Profiling Report")

In [10]:
# profile

In [4]:
xs_train_val, y_train_val, xs_test, y_test = load_creditcardfraud("data/creditcard/creditcard.csv", use_cache = False,
                                                        test_rate = 0.1)

Loading creditcardfraud from file


In [5]:
xs_train_val[0].shape

(256326, 23)

In [7]:
xs_train_val[1].shape

(256326, 6)

In [8]:
xs_test[0].shape

(28481, 23)

In [9]:
xs_test[1].shape

(28481, 6)

In [5]:
xs_train_val[0]

array([[ 1.6727735 , -0.02492336,  0.11050692, ...,  0.32611802,
         0.08338555,  0.08263728],
       [ 0.1097971 , -0.3073768 , -0.56113055, ..., -0.08961086,
        -0.15334963,  0.07125348],
       [ 1.16946849,  0.3376317 , -1.13809214, ...,  0.68097497,
         0.19069961,  0.20737273],
       ...,
       [-0.28861898, -0.24892616,  1.01542692, ...,  0.19425057,
        -0.39809271,  0.20028062],
       [-0.82400081,  0.41135224,  0.18816536, ..., -0.31271814,
         0.17021189, -0.22083509],
       [ 0.30089772,  0.17409996,  0.05117377, ..., -0.95504273,
         0.09243825,  1.41551824]])

In [17]:
device = "cuda:0"

xs_train_val, y_train_val, xs_test, y_test = load_creditcardfraud("data/creditcard/creditcard.csv", use_cache = False,
                                                        test_rate = 0.1)

x_train_val = np.concatenate(xs_train_val, axis=1)
x_test = np.concatenate(xs_test, axis=1)
rmse_list = []
acc_list = []
f1_list = []
kfold = KFold(n_splits=5, shuffle=True, random_state=0).split(y_train_val)

def predict(model, Xs):
    Xs_tensor = torch.from_numpy(Xs).float().to(device) 
    y_score = model.to(device)(Xs_tensor).detach().cpu().numpy()
    y_pred = np.where(y_score > 0.5, 1, 0)
    return y_pred, y_score


for i, (train_idx, val_idx) in enumerate(kfold):
    print("Cross Validation Fold {}".format(i))
    xs_train = x_train_val[train_idx] 
    y_train = y_train_val[train_idx]
    xs_val = x_train_val[val_idx] 
    y_val = y_train_val[val_idx]
    Xs_tensor = torch.from_numpy(xs_train).float()
    y_tensor = torch.from_numpy(y_train).float()
    dataset = TensorDataset(Xs_tensor, y_tensor)
    data_loader = DataLoader(dataset, batch_size=128, shuffle=True, num_workers=0)
    best_val_acc = 0.0
    best_val_f1 = 0.0
    best_val_auc = 0.0
    best_val_rmse = np.inf
    best_test_acc = 0.0
    best_test_rmse = np.inf

    
    num_features = x_train_val.shape[1]
    print(num_features)

    hidden_layers = [16, 32,64]
    output_dim = 1
    test_freq = 1
    model = FC(num_features, hidden_layers, output_size=output_dim,
                                             activation='sigmoid')
    
    model.to(device)
    
        
    loss_fn = nn.BCELoss()
    optimizer = optim.Adam(model.parameters(), lr=3e-5,
                       weight_decay=1e-5)
    
    for ep in range(100):
        start_epoch_time = datetime.now()
        model.train()
        total_loss = 0.0
        num_batches = 0
        for _, data in enumerate(data_loader):
            Xs_i = data[0].to(device)

            y_i = data[-1].to(device)

            optimizer.zero_grad()
            y_pred = model(Xs_i)
            # forward propagation communication size in bytes

            loss = loss_fn(y_pred.view(-1), y_i)
            

            total_loss += loss.item()
            num_batches += 1
            loss.backward()
            optimizer.step()


        print("Epoch {}: training loss {}"
              .format(ep + 1, total_loss / num_batches))


        # test model
        if xs_val is not None and y_val is not None and (ep + 1) % test_freq == 0:
            model.eval()
            with torch.no_grad():    
                y_pred_train, y_score_train = predict(model,xs_train)
                y_pred_val, y_score_val = predict(model,xs_val)

                train_acc = accuracy_score(y_train, y_pred_train)
                val_acc = accuracy_score(y_val, y_pred_val)

                train_f1 = f1_score(y_train, y_pred_train) 
                val_f1 = f1_score(y_val, y_pred_val) 
                
                train_auc = roc_auc_score(y_train, y_score_train)
                val_auc = roc_auc_score(y_val, y_score_val)
                
                if val_f1 > best_val_f1:
                    best_val_f1 = val_f1
                if val_acc > best_val_acc:
                    best_val_acc = val_acc
                if val_auc > best_val_auc:
                    best_val_auc = val_auc
                print(
                    "Epoch {}: train accuracy {}, val accuracy {}".format(ep + 1, train_acc, val_acc))
                print("Epoch {}: train f1 {}, val f1 {}".format(ep + 1, train_f1, val_f1))
                print(" Epoch {}: best val acc {}, best val f1 {}".format(ep + 1, best_val_acc,
                                                                                   best_val_f1))
                print("Epoch {}: train auc {}, test auc {}".format(ep + 1, train_auc, val_auc))
    break

Loading creditcardfraud from file
Cross Validation Fold 0
29
Epoch 1: training loss 0.3939728724095722
Epoch 1: train accuracy 0.9981566370818297, val accuracy 0.9982054383021886
Epoch 1: train f1 0.0, val f1 0.0
 Epoch 1: best val acc 0.9982054383021886, best val f1 0.0
Epoch 1: train auc 0.1604683370239208, test auc 0.17888521005062014
Epoch 2: training loss 0.04611795657347476
Epoch 2: train accuracy 0.9981566370818297, val accuracy 0.9982054383021886
Epoch 2: train f1 0.0, val f1 0.0
 Epoch 2: best val acc 0.9982054383021886, best val f1 0.0
Epoch 2: train auc 0.40801912544786856, test auc 0.4550214230731978
Epoch 3: training loss 0.018334642083152282
Epoch 3: train accuracy 0.9981566370818297, val accuracy 0.9982054383021886
Epoch 3: train f1 0.0, val f1 0.0
 Epoch 3: best val acc 0.9982054383021886, best val f1 0.0
Epoch 3: train auc 0.7412736812696262, test auc 0.775321749665676
Epoch 4: training loss 0.011981304881771671
Epoch 4: train accuracy 0.9981566370818297, val accuracy 

In [14]:
x_train_val = np.concatenate(xs_train_val, axis=1)
x_test = np.concatenate(xs_test, axis=1)
rmse_list = []
acc_list = []
f1_list = []
kfold = KFold(n_splits=5, shuffle=True, random_state=0).split(y_train_val)

In [15]:
sum(y_test)/len(y_test)*100

0.0772444787753239

In [15]:
# SecureBoost without DP (use XGBoost instead since SecureBoost is lossless)

In [16]:
for i, (train_idx, val_idx) in enumerate(kfold):
    print("Cross Validation Fold {}".format(i))
    x_train = x_train_val[train_idx]
    y_train = y_train_val[train_idx]
    x_val = x_train_val[val_idx]
    y_val = y_train_val[val_idx]
    xg_cls = xgb.XGBClassifier(learning_rate=0.1,
                              max_depth=6,
                              n_estimators=200,
                              reg_alpha=10,
                              verbosity=2)
    xg_cls.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_val, y_val)], eval_metric='auc')
    y_pred = xg_cls.predict(x_test)
    f1 = f1_score(y_test, y_pred)
    f1_list.append(f1)
print("Finished training.")
print("-------------------------------------------------")
print("f1=" + str(f1_list))

Cross Validation Fold 0




[11:58:53] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 20 extra nodes, 0 pruned nodes, max_depth=5
[0]	validation_0-auc:0.89011	validation_1-auc:0.86946
[11:58:53] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 28 extra nodes, 0 pruned nodes, max_depth=6
[1]	validation_0-auc:0.89014	validation_1-auc:0.86949
[11:58:53] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 26 extra nodes, 0 pruned nodes, max_depth=6
[2]	validation_0-auc:0.89938	validation_1-auc:0.87490
[11:58:53] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 20 extra nodes, 0 pruned nodes, max_depth=5
[3]	validation_0-auc:0.89939	validation_1-auc:0.87490
[11:58:53] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 22 extra nodes, 0 pruned nodes, max_depth=6
[4]	validation_0-auc:0.90731	validation_1-auc:0.89119
[11:58:53] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 22 extra nodes, 0 pruned nodes, max_depth=6
[5]	validation_0-auc:0.90731	validation_1-auc:0.8911



[11:59:19] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 18 extra nodes, 0 pruned nodes, max_depth=4
[1]	validation_0-auc:0.90887	validation_1-auc:0.88996
[11:59:19] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 26 extra nodes, 0 pruned nodes, max_depth=6
[2]	validation_0-auc:0.90888	validation_1-auc:0.88997
[11:59:19] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 30 extra nodes, 0 pruned nodes, max_depth=6
[3]	validation_0-auc:0.92730	validation_1-auc:0.90091
[11:59:19] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 26 extra nodes, 0 pruned nodes, max_depth=6
[4]	validation_0-auc:0.92731	validation_1-auc:0.90091
[11:59:20] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 20 extra nodes, 0 pruned nodes, max_depth=5
[5]	validation_0-auc:0.92731	validation_1-auc:0.90091
[11:59:20] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 24 extra nodes, 0 pruned nodes, max_depth=5
[6]	validation_0-auc:0.92731	validation_1-auc:0.9009



[11:59:49] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 18 extra nodes, 0 pruned nodes, max_depth=5
[0]	validation_0-auc:0.88820	validation_1-auc:0.88815
[11:59:49] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 20 extra nodes, 0 pruned nodes, max_depth=5
[1]	validation_0-auc:0.88820	validation_1-auc:0.88815
[11:59:50] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 28 extra nodes, 0 pruned nodes, max_depth=6
[2]	validation_0-auc:0.92273	validation_1-auc:0.92000
[11:59:50] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 30 extra nodes, 0 pruned nodes, max_depth=6
[3]	validation_0-auc:0.92273	validation_1-auc:0.92001
[11:59:50] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 26 extra nodes, 0 pruned nodes, max_depth=6
[4]	validation_0-auc:0.92273	validation_1-auc:0.92001
[11:59:51] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 24 extra nodes, 0 pruned nodes, max_depth=6
[5]	validation_0-auc:0.92274	validation_1-auc:0.9200



[12:00:20] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 24 extra nodes, 0 pruned nodes, max_depth=6
[0]	validation_0-auc:0.88364	validation_1-auc:0.90355
[12:00:20] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 20 extra nodes, 0 pruned nodes, max_depth=6
[1]	validation_0-auc:0.88364	validation_1-auc:0.90355
[12:00:21] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 20 extra nodes, 0 pruned nodes, max_depth=5
[2]	validation_0-auc:0.88364	validation_1-auc:0.90356
[12:00:21] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 26 extra nodes, 0 pruned nodes, max_depth=6
[3]	validation_0-auc:0.90041	validation_1-auc:0.92763
[12:00:21] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 28 extra nodes, 0 pruned nodes, max_depth=6
[4]	validation_0-auc:0.90041	validation_1-auc:0.92763
[12:00:22] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 22 extra nodes, 0 pruned nodes, max_depth=5
[5]	validation_0-auc:0.90041	validation_1-auc:0.9276



[12:00:49] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 26 extra nodes, 0 pruned nodes, max_depth=5
[0]	validation_0-auc:0.88604	validation_1-auc:0.88627
[12:00:49] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 26 extra nodes, 0 pruned nodes, max_depth=5
[1]	validation_0-auc:0.88743	validation_1-auc:0.89081
[12:00:49] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 28 extra nodes, 0 pruned nodes, max_depth=5
[2]	validation_0-auc:0.90546	validation_1-auc:0.90441
[12:00:49] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 30 extra nodes, 0 pruned nodes, max_depth=6
[3]	validation_0-auc:0.90546	validation_1-auc:0.90442
[12:00:49] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 16 extra nodes, 0 pruned nodes, max_depth=4
[4]	validation_0-auc:0.90546	validation_1-auc:0.90443
[12:00:49] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 14 extra nodes, 0 pruned nodes, max_depth=4
[5]	validation_0-auc:0.90545	validation_1-auc:0.9044

In [17]:
print("Finished training.")
print("-------------------------------------------------")
print("f1=" + str(f1_list))

Finished training.
-------------------------------------------------
f1=[0.7499999999999999, 0.6829268292682926, 0.7894736842105263, 0.7894736842105263, 0.7368421052631579]


In [None]:
sum(y_pred)

In [8]:
# SplitNN
num_parties = 2
xs_train_val, y_train_val, xs_test, y_test = load_creditcardfraud("data/creditcard/creditcard.csv", use_cache = False,
                                                        test_rate = 0.1)

Loading creditcardfraud from file


In [9]:
xs_train_val[1].shape

(256326, 15)

In [10]:
kfold = KFold(n_splits=5, shuffle=True, random_state=0).split(y_train_val)

In [11]:
print("Overall percentage: ", sum(y_train_val)/len(y_train_val))
for i, (train_idx, val_idx) in enumerate(kfold):
    y_train = y_train_val[train_idx]
    y_val = y_train_val[val_idx]
    print(f"Fold {i} -----")
    print("Percentage of fraud in train: ", sum(y_train)/len(y_train))
    print("Percentage of fraud in val: ", sum(y_val)/ len(y_val))

Overall percentage:  0.0018336025217886597
Fold 0 -----
Percentage of fraud in train:  0.0018433629181702916
Percentage of fraud in val:  0.001794561697811415
Fold 1 -----
Percentage of fraud in train:  0.0018482305265262531
Percentage of fraud in val:  0.001775090217497318
Fold 2 -----
Percentage of fraud in train:  0.0018336007334402934
Percentage of fraud in val:  0.0018336096752170096
Fold 3 -----
Percentage of fraud in train:  0.0018872433080888125
Percentage of fraud in val:  0.0016190383302448065
Fold 4 -----
Percentage of fraud in train:  0.0017555751703151746
Percentage of fraud in val:  0.0021457134497220327


In [12]:
def calculate_cls_weight(label):
    labels_tensor = torch.tensor(label, dtype=torch.float32)

    class_frequencies = torch.bincount(labels_tensor.long()) / len(labels_tensor)

    # Calculate class weights as the inverse of class frequencies
    class_weights = 1.0 / class_frequencies

    # Normalize the class weights to sum to 1
    class_weights /= class_weights.sum()
    
    return class_weights

In [13]:
def calculate_cls_weight_bin(label):
    labels_tensor = torch.tensor(label, dtype=torch.float32)
    num_positive_samples = (labels_tensor == 1).sum().item()
    num_negative_samples = (labels_tensor == 0).sum().item()
    pos_weight = torch.tensor(num_negative_samples / num_positive_samples, dtype=torch.float32)
    return pos_weight

In [14]:
calculate_cls_weight_bin(y_train_val)

tensor(544.3745)

In [15]:
acc_list = []
f1_list = []
kfold = KFold(n_splits=5, shuffle=True, random_state=0).split(y_train_val)

In [16]:
for i, (train_idx, val_idx) in enumerate(kfold):
    print("Cross Validation Fold {}".format(i))
    xs_train = [data[train_idx] for data in xs_train_val]
    y_train = y_train_val[train_idx]
    xs_val = [data[val_idx] for data in xs_train_val]
    y_val = y_train_val[val_idx]
    start = datetime.now()
    name = "splitnn_creditcard_party_{}_fold_{}".format(num_parties, i)
    writer = SummaryWriter("runs/{}".format(name))
    # ncf_counts = [counts[:2], counts[2:]]
    embed_dims = [[32, 32], [1, 4, 10, 4, 15, 5]]
    # cls_weight = calculate_cls_weight_bin(y_train)
    cls_weight = None
    aggregate_model = SplitNNModel(
        num_parties=num_parties,
        name=name,
        num_epochs=100,
        local_hidden_layers=[32, 16],
        local_output_size=3,
        lr=3e-5,
        agg_hidden_layers=[10],
        batch_size=128,
        weight_decay=1e-5,
        writer=writer,
        device='cuda:{}'.format("0"),
        update_target_freq=1,
        task='binary_classification',
        test_batch_size=1000,
        test_freq=1,
        cuda_parallel=False,
        model_type='fc',
        optimizer='adam',
        privacy=None,
        batches_per_lot=5,
        epsilon=1,
        delta=1.0 / xs_train[0].shape[0],
        num_workers=0,
        cls_weight = cls_weight,
        model_path="saved_model/"
    )
    _, _, rmse, _ = aggregate_model.train(xs_train, y_train, xs_val, y_val, xs_test, y_test, use_cache=False)
    y_pred_test, y_score_test = aggregate_model.predict(xs_test)
    test_f1 = f1_score(y_test, y_pred_test)
    f1_list.append(test_f1)
    print(aggregate_model.params)
    print("-------------------------------------------------")
    time_min = (datetime.now() - start).seconds / 60
    print("Time(min) {}: ".format(time_min))
print("Best f1={}".format(f1_list))

Cross Validation Fold 0
Start training SplitNN
[SplitNN] Epoch 1: training loss 0.5790474514290458
[Final] Epoch 1: train accuracy 0.9981566370818297, val accuracy 0.9982054383021886
[Final] Epoch 1: train f1 0.0, val f1 0.0
[Final] Epoch 1: best val acc 0.9982054383021886, best val f1 0.0
[Final] Epoch 1: train auc 0.05153126550831283, test auc 0.05188160258011456
Epoch 1 duration 11 sec
[SplitNN] Epoch 2: training loss 0.22489827544343002
[Final] Epoch 2: train accuracy 0.9981566370818297, val accuracy 0.9982054383021886
[Final] Epoch 2: train f1 0.0, val f1 0.0
[Final] Epoch 2: best val acc 0.9982054383021886, best val f1 0.0
[Final] Epoch 2: train auc 0.06883099446197324, test auc 0.06768881871058843
Epoch 2 duration 10 sec
[SplitNN] Epoch 3: training loss 0.04266267091050949
[Final] Epoch 3: train accuracy 0.9981566370818297, val accuracy 0.9982054383021886
[Final] Epoch 3: train f1 0.0, val f1 0.0
[Final] Epoch 3: best val acc 0.9982054383021886, best val f1 0.0
[Final] Epoch 3: 

KeyboardInterrupt: 

In [21]:
pos_weight = torch.randint(1, 100, (1,)).float()
criterion = torch.nn.BCEWithLogitsLoss(pos_weight=pos_weight)
x = torch.randn(10,1, requires_grad=True)
y = torch.randint(0, 2, (10,1)).float()

loss = criterion(x, y)

In [17]:
pos_weight

tensor([36.])

In [19]:
# FedOnce
num_parties = 2
xs_train_val, y_train_val, xs_test, y_test = load_creditcardfraud("data/creditcard/creditcard.csv", use_cache = False,
                                                        test_rate = 0.1)

Loading creditcardfraud from file


In [20]:
kfold = KFold(n_splits=5, shuffle=True, random_state=0).split(y_train_val)

In [21]:
score_summary = []

In [22]:
def calculate_cls_weight_bin(label):
    labels_tensor = torch.tensor(label, dtype=torch.float32)
    num_positive_samples = (labels_tensor == 1).sum().item()
    num_negative_samples = (labels_tensor == 0).sum().item()
    pos_weight = torch.tensor(num_negative_samples / num_positive_samples, dtype=torch.float32)
    return pos_weight

In [None]:
for party_id in range(num_parties):
    kfold = KFold(n_splits=5, shuffle=True, random_state=0).split(y_train_val)
    acc_list = []
    for i, (train_idx, val_idx) in enumerate(kfold):
        print("Cross Validation Fold {}".format(i))
        xs_train = [data[train_idx] for data in xs_train_val]
        y_train = y_train_val[train_idx]
        xs_val = [data[val_idx] for data in xs_train_val]
        y_val = y_train_val[val_idx]
        start = datetime.now()
        name = "fedonce_creditcard_party_{}_active_{}_fold_{}".format(num_parties, party_id, i)
        writer = SummaryWriter("runs/{}".format(name))
        class_weights = calculate_cls_weight_bin(y_train)
        aggregate_model = VerticalFLModel(
            num_parties=num_parties,
            active_party_id=party_id,
            name=name,
            full_name=name,
            num_epochs=1 if party_id == 0 else 1,
            num_local_rounds=1 if party_id == 0 else 1,
            local_lr=1e-4,
            local_hidden_layers=[32, 16] if party_id == 0 else [128],
            local_batch_size=64,
            local_weight_decay=1e-5,
            local_output_size=3 if party_id == 0 else 64,
            num_agg_rounds=1,
            agg_lr=1e-4 if party_id == 0 else 5e-4,
            agg_hidden_layers=[10] if party_id == 0 else [32, 32],
            agg_batch_size=64,
            agg_weight_decay=2e-4 if party_id == 0 else 1e-5,
            writer=writer,
            device='cuda:{}'.format("0"),
            update_target_freq=1,
            task='binary_classification',
            n_classes = 2,
            test_batch_size=1000,
            test_freq=1,
            cuda_parallel=False,
            n_channels=1,
            model_type='fc',
            optimizer='adam',
            privacy=None,
            num_workers=0,
            cls_weight = class_weights)

        _, _, rmse, _ = aggregate_model.train(xs_train, y_train, xs_val, y_val, use_cache=False)
        y_test_score = aggregate_model.predict_agg(xs_test).astype(np.int64)
        y_test = y_test.astype(np.int64)
        test_acc = f1_score(y_test, y_test_score)
        acc_list.append(test_acc)
        print(aggregate_model.params)
        time_min = (datetime.now() - start).seconds / 60
        print("Time(min) {}: ".format(time_min))
        break
    score_summary.append(acc_list)
    print("F1 for active party {}".format(party_id) + str(acc_list))
    print("-------------------------------------------------")

for i, result in enumerate(score_summary):
    print("Party {}: F1={}".format(i, result))

Cross Validation Fold 0
Initializing and local labels
Finished initializing
Start training local models
[Local] Party 1, Epoch 1: training loss 0.3286517994889941
Epoch 1 duration 11 sec
Local training finished, time = 11 sec
Adding noise 0.0 to predicted labels
Start training aggregation model
[Aggregating] Epoch 1: training loss 1.3806181097410144
[Final] Epoch 1: train accuracy 0.99811274748854, test accuracy 0.9982054383021886
[Final] Epoch 1: train f1 0.0, test f1 0.0
[Final] Epoch 1: train auc 0.5839286251411739, test f1 0.6069209599356917
[Final] Epoch 1: best test acc 0.9982054383021886, best test f1 0.0, best test auc 0.6069209599356917
Epoch 1 duration 17 sec
repr_noise=0.0
out_norm=1.0
out_privacy=None
full_name=fedonce_creditcard_party_2_active_0_fold_0
ncf_embed_dims=None
ncf_counts=None
inter_party_comp_method=None
num_workers=0
momentum=0
batches_per_lot=1
grad_norm_C=1.0
delta=0.0001
epsilon=1
privacy=None
optimizer=adam
model_type=fc
n_channels=1
cuda_parallel=False
te

In [None]:
score_summary

In [15]:
num_parties = 1

xs_train_val, y_train_val, xs_test, y_test = load_creditcardfraud("data/creditcard/creditcard.csv",num_parties = num_parties, use_cache = False,
                                                        test_rate = 0.1)
x_train_val = np.concatenate(xs_train_val, axis=1)
x_test = np.concatenate(xs_test, axis=1)
print(x_train_val.shape)
print(x_test.shape)
kfold = KFold(n_splits=5, shuffle=True, random_state=0).split(y_train_val)
f1_list = []
for i, (train_idx, val_idx) in enumerate(kfold):
    print("Cross Validation Fold {}".format(i))
    x_train = x_train_val[train_idx]
    y_train = y_train_val[train_idx]
    x_val = x_train_val[val_idx]
    y_val = y_train_val[val_idx]
    name = "combine_movielens_fold_{}".format(i)
    writer = SummaryWriter("runs/{}".format(name))
    single_model = SingleParty(
        party_id=0,
        num_epochs=100,
        lr=1e-4,
        hidden_layers=[64, 32],
        batch_size=128,
        weight_decay=1e-4,
        writer=writer,
        device='cuda:0',
        task="binary_classification",
        test_batch_size=1000,
        test_freq=1,
        model_type='fc',
        optimizer='adam',
        cuda_parallel=False,
        n_channels=1,
        n_classes = 1
    )
    _, _, _, _ = single_model.train(x_train, y_train, x_val, y_val, xs_test, y_test)
    y_pred_test, y_score_test = single_model.predict(x_test)
    test_f1 = f1_score(y_test, y_pred_test)
    f1_list.append(test_f1)
    print(single_model.params)
    break
print("-------------------------------------------------")
print("Best accuracy={}".format(f1_list))

Loading creditcardfraud from file
(256326, 29)
(28481, 29)
Cross Validation Fold 0
Start training
Party 0, Epoch 1: training loss 0.13178486255382404
[Final] Epoch 1: train accuracy 0.9981712669462597, test accuracy 0.9982249444075996
[Final] Epoch 1: train f1 0.015748031496062992, test f1 0.02150537634408602
[Final] Epoch 1: train auc 0.8042236339876092, test auc 0.8002471533608269
[Final] Epoch 1: best test acc 0.9982249444075996, best test f1 0.02150537634408602, best test auc 0.8002471533608269
Epoch 1 duration 5 sec
Party 0, Epoch 2: training loss 0.007576693399427354
[Final] Epoch 2: train accuracy 0.9990978250268214, test accuracy 0.9991027191510943
[Final] Epoch 2: train f1 0.7104851330203442, test f1 0.7124999999999999
[Final] Epoch 2: train auc 0.9307286463570357, test auc 0.9330794467638968
[Final] Epoch 2: best test acc 0.9991027191510943, best test f1 0.7124999999999999, best test auc 0.9330794467638968
Epoch 2 duration 5 sec
Party 0, Epoch 3: training loss 0.0045627826164

In [4]:
import numpy as np
from sklearn.model_selection import KFold

In [25]:
arr = np.arange(1, 1001)

In [39]:
kfold = KFold(n_splits=5, shuffle=True, random_state=0).split(arr)

In [38]:
a = iter(kfold)

In [34]:
a.__next__()[0]

array([  0,   2,   3,   4,   6,   7,   9,  10,  11,  12,  13,  15,  16,
        17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  28,  29,  32,
        33,  35,  36,  37,  38,  39,  40,  41,  42,  43,  44,  46,  47,
        48,  49,  50,  51,  52,  53,  56,  57,  58,  59,  61,  62,  63,
        64,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  78,
        79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,
        92,  93,  94,  95,  96,  98,  99, 100, 102, 104, 105, 106, 107,
       109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
       123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135,
       136, 137, 138, 139, 140, 143, 146, 147, 148, 149, 151, 152, 153,
       154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166,
       167, 168, 169, 170, 171, 172, 173, 174, 176, 177, 178, 179, 180,
       181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193,
       194, 195, 197, 198, 199, 200, 201, 203, 205, 206, 207, 20

In [40]:
b = iter(kfold)

In [41]:
b.__next__()[0] == a.__next__()[0]

array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,