Датасет - https://www.kaggle.com/datasets/mlg-ulb/creditcardfraud

Задача - классифицировать fraud-транзакции. В качестве основной метрики возьмем AUC - Area under ROC curve. Грубо говоря, AUC отражает качество классификационной модели - чем больше, тем модель лучше.

Для сравнения федеративного и обычного подходов обучения модели возьмем трёх клиентов: сначала обучим модели локально на каждом клиенте(без обмена весами), а затем федеративно(с обменом и агрегацией весов). После этого сравним средние значения AUC по всем трем клиентам. 

# Пример локального обучения

In [1]:
from client.dataset import get_train_test_datasets
from client.net import Net

import torch
from torch.optim import AdamW

from sklearn.metrics import roc_auc_score

import numpy as np

In [2]:
clients_n = 3
batch_size = 16
epochs = 15

In [3]:
init_parameters = {}
single_train_roc_auc_scores = np.array([])

In [4]:
for clients_i in range(1, clients_n + 1):
    dataset_path = f'client/anti_fraud_dataset/client_{clients_i}/client_anti_fraud_dataset.csv'
    
    train_set, test_set = get_train_test_datasets(dataset_path)

    train_dataloader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=True)
    test_dataloader = torch.utils.data.DataLoader(test_set, batch_size=batch_size)
    
    model = Net(n_features=train_set[0]['transaction'].shape[0])
    init_parameters[f'client_{clients_i}'] = model.get_weights()
    optimizer = AdamW(params=model.parameters(), lr=0.0001)
    loss_fn = torch.nn.BCELoss()
    
    test_roc_auc_score = 0
    
    for epoch in range(epochs):
        train_epoch_loss = 0.0
        model.train()
    
        for i, data in enumerate(train_dataloader):
            transactions, labels = data['transaction'], data['label']
            transactions = transactions.reshape(transactions.shape[0], 1, transactions.shape[1])
        
            optimizer.zero_grad()
        
            output = model(transactions)
        
            loss = loss_fn(output, labels)
            loss.backward()
            optimizer.step()
        
            train_epoch_loss += loss.item()
    
        train_epoch_loss /= len(train_dataloader)
    
        test_epoch_loss = 0.0
        model.eval()
    
        outputs = np.array([])
        labels = np.array([])
    
        for i, data in enumerate(test_dataloader):
            transactions, label = data['transaction'], data['label']
            transactions = transactions.reshape(transactions.shape[0], 1, transactions.shape[1])
            output = model(transactions)
        
            loss = loss_fn(output, label)
        
            test_epoch_loss += loss.item()
            outputs = np.hstack([outputs, output.detach().numpy().reshape(-1)])
            labels = np.hstack([labels, label.reshape(-1)])
    
        test_epoch_loss /= len(test_dataloader)
        test_roc_auc_score = roc_auc_score(labels, outputs)
    
    single_train_roc_auc_scores = np.append(single_train_roc_auc_scores, test_roc_auc_score)

In [5]:
for score_i in range(len(single_train_roc_auc_scores)):
    print(f"Client {score_i + 1} AUC = {single_train_roc_auc_scores[score_i]}")

Client 1 AUC = 0.95625
Client 2 AUC = 0.980625
Client 3 AUC = 0.9911448450347882


In [6]:
print(f"Среднее значение ROC AUC при раздельном обучении - {np.mean(single_train_roc_auc_scores)}")

Среднее значение ROC AUC при раздельном обучении - 0.9760066150115961


# Пример федеративного обучения

In [7]:
import os
import shutil

from federated_learning.core.client.client_manager import ClientManager
from federated_learning.core.server.server import Server
from federated_learning.core.utils.typing import Config
from federated_learning.examples.anti_fraud_fl.client.client import create_client

from federated_learning.strategy.fed_avg import FedAvgStrategy

In [8]:
COMMUNICATION_ROUNDS = 15

In [9]:
def on_fit_config_fn(server_round) -> Config:
    config = {
        'batch_size': 16
    }
    return config


def on_evaluate_config_fn(server_round) -> Config:
    config = {
        'batch_size': 16
    }
    return config

In [10]:
client_manager = ClientManager()
weights_path = "weights"
if os.path.exists(weights_path):
    shutil.rmtree(weights_path, ignore_errors=True)
os.mkdir(weights_path)
for client_i in range(1, clients_n + 1):
    
    client_folder_path = weights_path + f"/client_{client_i}"
    os.mkdir(client_folder_path)
    os.mkdir(client_folder_path + f"/global_weights")
    os.mkdir(client_folder_path + f"/local_weights")
    client = create_client(id=f'client_{client_i}', dataset_path=f"client/anti_fraud_dataset/client_{client_i}/client_anti_fraud_dataset.csv", global_weights_save_folder_path=client_folder_path + f"/global_weights", local_weights_save_folder_path=client_folder_path + f"/local_weights", init_weights=init_parameters[f'client_{client_i}'])
    client_manager.add_client(client)

In [11]:
strategy = FedAvgStrategy(on_fit_config_fn=on_fit_config_fn, on_evaluate_config_fn=on_evaluate_config_fn)

In [12]:
server = Server(client_manager, strategy)

In [13]:
test_results = server.fit(COMMUNICATION_ROUNDS)

In [14]:
federated_train_roc_auc_scores = np.array([])

In [15]:
test_results.sort(key=lambda x: x[0].id)

In [16]:
 for client_results in test_results:
    client_id = client_results[0].id
    client_result = client_results[1]
    federated_train_roc_auc_scores = np.append(federated_train_roc_auc_scores, client_result.metrics['test_roc_auc_score'])
    print(f"Client {client_id} AUC = {client_result.metrics['test_roc_auc_score']}")

Client client_1 AUC = 0.9854103343465046
Client client_2 AUC = 0.9878787878787879
Client client_3 AUC = 0.9958258795468098


In [17]:
print(f"Среднее значение ROC AUC при федеративном обучении - {np.mean(federated_train_roc_auc_scores)}")

Среднее значение ROC AUC при федеративном обучении - 0.9897050005907007
