# PPFL
(after HFL learning)

In [1]:
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

from sklearn.metrics import accuracy_score, f1_score
from sklearn.preprocessing import StandardScaler

from PPFL import *

In [2]:
client5 = pd.read_csv('./client5.csv')
X_5 = client5.drop('fraud_bool',axis=1)
y_5 = client5['fraud_bool']

test_under = pd.read_csv('./client_test_under.csv')
X_test_under = test_under.drop('fraud_bool',axis=1)
y_test_under = test_under['fraud_bool']

In [3]:
object_col = {'payment_type':{'AE':0, 'AD':1, 'AC':2, 'AA':3, 'AB':4},
                'employment_status':{'CE':0, 'CA':1, 'CB':2, 'CC':3, 'CG':4, 'CD':5, 'CF':6},
                'housing_status':{'BE':0, 'BF':1, 'BC':2, 'BG':3, 'BA':4, 'BD':5, 'BB':6},
                'source':{'INTERNET':0, 'TELEAPP':1},
                'device_os':{'other':0, 'windows':1, 'x11':2, 'linux':3, 'macintosh':4}}

for col,vals in object_col.items():
    X_5[col] = X_5[col].replace(vals)
    X_test_under[col] = X_test_under[col].replace(vals)

scaler = StandardScaler()
X_5 = scaler.fit_transform(X_5)
X_test_under = scaler.transform(X_test_under)


# model training

In [4]:
HFL_model = FL_net(input_dim=20)
VFL_model = FL_net(input_dim=11)

In [5]:
import pickle
with open('weight.pickle', 'rb') as f:
    weight = pickle.load(f)
for k,v in weight.items():
    k_replace = k.replace('module.','')
    HFL_model.state_dict()[k_replace].copy_(v)

In [6]:
common_idx = [2, 3, 4, 5, 6, 7, 12, 13, 14, 15, 17, 19, 20, 22, 23, 24, 25, 26, 28, 30]
specific_idx = [0, 1, 8, 9, 10, 11, 16, 18, 21, 27, 29]
device = torch.device('cuda:2' if torch.cuda.is_available() else 'cpu')
PPFL_net = PPFL_net(HFL_model, VFL_model, common_idx=common_idx, specific_idx=specific_idx, device=device)

In [7]:
from torch.utils.data import DataLoader, Dataset, TensorDataset

DS5 = TensorDataset(torch.Tensor(X_5), torch.LongTensor(y_5.values))
DS_test_under = TensorDataset(torch.Tensor(X_test_under), torch.LongTensor(y_test_under.values))
loader5 = DataLoader(DS5, batch_size=128)
loader_test_under = DataLoader(DS_test_under, batch_size=128)

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(PPFL_net.parameters(), lr=0.01, weight_decay=0.0004)

for epoch in range(100):
    PPFL_net.train()

    tr_preds = np.array([])
    tr_reals = np.array([])
    for xx,yy in loader5:
        xx = xx.to(device)
        yy = yy.to(device)
        optimizer.zero_grad()
        y_pred = PPFL_net(xx)
        y_prediction = torch.argmax(y_pred, dim=1).detach().cpu().numpy()
        loss = criterion(y_pred, yy)
        loss.backward()
        optimizer.step()
        tr_preds = np.append(tr_preds, y_prediction)
        tr_reals = np.append(tr_reals, yy.detach().cpu().numpy())
    if epoch%3==0:
        print('\nepoch: ', epoch )
        print('tr accuracy: ', accuracy_score(tr_reals, tr_preds)) 
        print('tr f1: ', f1_score(tr_reals, tr_preds))   
    PPFL_net.eval()
    val_preds = np.array([])
    val_reals = np.array([])
    with torch.no_grad():
        for xx_val, yy_val in loader_test_under:
            xx_val = xx_val.to(device)
            yy_val = yy_val.to(device)
            y_pred = PPFL_net(xx_val)
            y_prediction = torch.argmax(y_pred, dim=1).detach().cpu().numpy()
            loss = criterion(y_pred, yy_val)
            val_preds = np.append(val_preds, y_prediction)
            val_reals = np.append(val_reals, yy_val.detach().cpu().numpy())
    if epoch%3==0:
        print('\nval accuracy: ', accuracy_score(val_reals, val_preds))
        print('val f1: ', f1_score(val_reals, val_preds))

In [9]:
PPFL_net.eval()
with torch.no_grad():
    for xx_val, yy_val in loader_test_under:
        xx_val = xx_val.to(device)
        yy_val = yy_val.to(device)
        y_pred = PPFL_net(xx_val)
        y_prediction = torch.argmax(y_pred, dim=1).detach().cpu().numpy()
        loss = criterion(y_pred, yy_val)
        val_preds = np.append(val_preds, y_prediction)
        val_reals = np.append(val_reals, yy_val.detach().cpu().numpy())
print('\nval accuracy: ', accuracy_score(val_reals, val_preds))
print('val f1: ', f1_score(val_reals, val_preds))


val accuracy:  0.76186158960411
val f1:  0.7596095179987798
