In [1]:
import os, sys
sys.path.append('../../src')

from datetime import datetime
import pandas as pd

import visualization.visualize as vis
from data.load_dataset import load_dataset
from models.train_model import get_dataloaders, set_and_train_model
from models.model import get_model, evaluate_model, save_model
from federated.federated_learning import get_federated_avg_model

## Running all databases and rounds

In [2]:
num_epochs = 30
base_path='../../models/federated_models/'

In [3]:
exec_time = datetime.now()
exec_id = f'{exec_time.year}-{exec_time.month}-{exec_time.day}_{exec_time.hour}h{exec_time.minute}m{exec_time.second}s'
base_path = os.path.join(base_path, f'{exec_id}/')
if not os.path.isdir(base_path):
    os.mkdir(base_path)
    print(base_path, 'folder created.')

training_stats = []
round_number_list = [1,2,3,4,5]
site_number_list = [1,2,3,4,5]
for round_number in round_number_list:
    model_id = f'fed_model_round-{round_number}'
    model_id_old = f'fed_model_round-{round_number-1}'
    model_name = f'{exec_id}_{model_id}'
    model_name_old = f'{exec_id}_{model_id_old}'
    local_models = []
    local_train_db_sizes = []

    for site_number in site_number_list:
        print(f'\n\nTraining round_number {round_number} site_number {site_number}:')
        df = load_dataset(site_number)
        dataloaders, db_sizes = get_dataloaders(df, round_number, test_all_rounds=True)

        ### Visualize a few images
        # Let's visualize a few training images so as to understand the data augmentations.
        # vis.plot_samples(dataloaders['train'], title=f'site_number {site_number} round_number {round_number} samples')        
        
        # Train the model
        model = get_model(model_name=model_name_old, base_path=base_path)
        model, model_hist = set_and_train_model(model, dataloaders, num_epochs=num_epochs)
        # vis.plot_model_hist(model_hist, f'Baseline Model site_number: {site_number} round_number: {round_number}')
        
        local_models.append(model)
        local_train_db_sizes.append(db_sizes['train'])
    
    fed_model = get_federated_avg_model(local_models, local_train_db_sizes)

    for site_number in site_number_list:
        print(f'\nEvaluating round_number {round_number} site_number {site_number}:')
        df = load_dataset(site_number)
        dataloaders, _ = get_dataloaders(df, round_number)

        # Evaluate model predictions
        model_stats = evaluate_model(fed_model, dataloaders, split='test')
        model_stats['round_number'] = round_number; model_stats['site_number'] = site_number
        training_stats.append(model_stats)

    # Saving current model:
    save_model(fed_model, model_name, base_path=base_path)

../../models/federated_models/2022-11-11_8h20m51s/ folder created.


Training round_number 1 site_number 1:
train_set size: 5576
valid_set size: 1634
test_set size: 4260
total: 11470

File Not Found: ../../models/federated_models/2022-11-11_8h20m51s/2022-11-11_8h20m51s_fed_model_round-0.pth.
Model will start with default initialization.
Cuda available: True. Model sent to device: cuda:0.

Epoch 0/29
----------
train Loss: 0.6550 Acc: 0.6227
valid Loss: 0.6200 Acc: 0.6665
2m 46s/epoch

Epoch 1/29
----------
train Loss: 0.6275 Acc: 0.6628
valid Loss: 0.7147 Acc: 0.6193
2m 39s/epoch

Epoch 2/29
----------
train Loss: 0.5968 Acc: 0.6827
valid Loss: 0.6151 Acc: 0.6591
2m 40s/epoch

Epoch 3/29
----------
train Loss: 0.5622 Acc: 0.7109
valid Loss: 0.6515 Acc: 0.6487
2m 46s/epoch

Epoch 4/29
----------
train Loss: 0.5153 Acc: 0.7446
valid Loss: 0.7287 Acc: 0.6157
2m 47s/epoch

Epoch 5/29
----------
train Loss: 0.4826 Acc: 0.7747
valid Loss: 0.7488 Acc: 0.6536
2m 49s/epoch

Epoch 6/29
---------

In [4]:
# Saving statistics report:
training_stats = pd.DataFrame(training_stats)
training_stats.to_csv(f'{base_path}{exec_id}_training_stats.csv', index=False)

training_stats

Unnamed: 0,accuracy_score,balanced_accuracy_score,recall_score,precision_score,f1_score,confusion_matrix,round_number,site_number
0,0.673691,0.650275,0.464187,0.687755,0.554276,"[[782, 153], [389, 337]]",1,1
1,0.67617,0.662053,0.473256,0.732014,0.574859,"[[850, 149], [453, 407]]",1,2
2,0.674774,0.653508,0.492829,0.670213,0.567994,"[[815, 186], [389, 378]]",1,3
3,0.672062,0.654797,0.474801,0.70334,0.566904,"[[763, 151], [396, 358]]",1,4
4,0.6655,0.644511,0.491309,0.648318,0.558998,"[[907, 230], [439, 424]]",1,5
5,0.688427,0.685232,0.658021,0.643797,0.650831,"[[939, 379], [356, 685]]",2,1
6,0.698819,0.697244,0.673986,0.677995,0.675985,"[[977, 379], [386, 798]]",2,2
7,0.692182,0.69285,0.698427,0.637131,0.666372,"[[945, 430], [326, 755]]",2,3
8,0.702146,0.700542,0.680304,0.675165,0.677725,"[[888, 344], [336, 715]]",2,4
9,0.699064,0.700359,0.711129,0.642751,0.675214,"[[1073, 483], [353, 869]]",2,5


In [5]:
training_stats = pd.read_csv(f'{base_path}{exec_id}_training_stats.csv')
training_stats

Unnamed: 0,accuracy_score,balanced_accuracy_score,recall_score,precision_score,f1_score,confusion_matrix,round_number,site_number
0,0.673691,0.650275,0.464187,0.687755,0.554276,[[782 153]\n [389 337]],1,1
1,0.67617,0.662053,0.473256,0.732014,0.574859,[[850 149]\n [453 407]],1,2
2,0.674774,0.653508,0.492829,0.670213,0.567994,[[815 186]\n [389 378]],1,3
3,0.672062,0.654797,0.474801,0.70334,0.566904,[[763 151]\n [396 358]],1,4
4,0.6655,0.644511,0.491309,0.648318,0.558998,[[907 230]\n [439 424]],1,5
5,0.688427,0.685232,0.658021,0.643797,0.650831,[[939 379]\n [356 685]],2,1
6,0.698819,0.697244,0.673986,0.677995,0.675985,[[977 379]\n [386 798]],2,2
7,0.692182,0.69285,0.698427,0.637131,0.666372,[[945 430]\n [326 755]],2,3
8,0.702146,0.700542,0.680304,0.675165,0.677725,[[888 344]\n [336 715]],2,4
9,0.699064,0.700359,0.711129,0.642751,0.675214,[[1073 483]\n [ 353 869]],2,5
