In [1]:
import os, sys
sys.path.append('../../src')

from datetime import datetime
import pandas as pd

import visualization.visualize as vis
from data.load_dataset import load_dataset
from models.train_model import get_dataloaders, set_and_train_model
from models.model import get_model, evaluate_model, save_model
from federated.federated_learning import get_federated_avg_model

## Running all databases and rounds

In [2]:
num_epochs = 30
base_path='../../models/federated_models/'

In [3]:
exec_time = datetime.now()
exec_id = f'{exec_time.year}-{exec_time.month}-{exec_time.day}_{exec_time.hour}h{exec_time.minute}m{exec_time.second}s'
base_path = os.path.join(base_path, f'{exec_id}/')
if not os.path.isdir(base_path):
    os.mkdir(base_path)
    print(base_path, 'folder created.')

training_stats = []
round_number_list = [1,2,3,4,5]
site_number_list = [1,2,3,4,5]
for round_number in round_number_list:
    model_id = f'fed_model_round-{round_number}'
    model_id_old = f'fed_model_round-{round_number-1}'
    model_name = f'{exec_id}_{model_id}'
    model_name_old = f'{exec_id}_{model_id_old}'
    local_models = []
    local_train_db_sizes = []

    for site_number in site_number_list:
        print(f'\n\nTraining round_number {round_number} site_number {site_number}:')
        df = load_dataset(site_number)
        dataloaders, db_sizes = get_dataloaders(df, round_number)

        ### Visualize a few images
        # Let's visualize a few training images so as to understand the data augmentations.
        # vis.plot_samples(dataloaders['train'], title=f'site_number {site_number} round_number {round_number} samples')        
        
        # Train the model
        model = get_model(model_name=model_name_old, base_path=base_path)
        model, model_hist = set_and_train_model(model, dataloaders, num_epochs=num_epochs)
        # vis.plot_model_hist(model_hist, f'Baseline Model site_number: {site_number} round_number: {round_number}')
        
        local_models.append(model)
        local_train_db_sizes.append(db_sizes['train'])
    
    fed_model = get_federated_avg_model(local_models, local_train_db_sizes)

    for site_number in site_number_list:
        print(f'\nEvaluating round_number {round_number} site_number {site_number}:')
        df = load_dataset(site_number)
        dataloaders, _ = get_dataloaders(df, round_number)

        # Evaluate model predictions
        model_stats = evaluate_model(fed_model, dataloaders, split='test')
        model_stats['round_number'] = round_number; model_stats['site_number'] = site_number
        training_stats.append(model_stats)

    # Saving current model:
    save_model(model, model_name, base_path=base_path)

../../models/federated_models/2022-11-2_14h30m28s/ folder created.


Training round_number 1 site_number 1:
train_set size: 5576
valid_set size: 1634
test_set size: 1661
total: 8871

File Not Found: ../../models/federated_models/2022-11-2_14h30m28s/2022-11-2_14h30m28s_fed_model_round-0.pth.
Model will start with default initialization.
Cuda available: True. Model sent to device: cuda:0.

Epoch 0/29
----------
train Loss: 0.6443 Acc: 0.6329
valid Loss: 0.6273 Acc: 0.6573
3m 1s/epoch

Epoch 1/29
----------
train Loss: 0.6108 Acc: 0.6738
valid Loss: 0.6267 Acc: 0.6628
2m 57s/epoch

Epoch 2/29
----------
train Loss: 0.5842 Acc: 0.6967
valid Loss: 0.6233 Acc: 0.6579
2m 52s/epoch

Epoch 3/29
----------
train Loss: 0.5497 Acc: 0.7227
valid Loss: 0.6780 Acc: 0.6389
2m 48s/epoch

Epoch 4/29
----------
train Loss: 0.5287 Acc: 0.7387
valid Loss: 0.6613 Acc: 0.6389
2m 44s/epoch

Epoch 5/29
----------
train Loss: 0.4859 Acc: 0.7660
valid Loss: 0.7388 Acc: 0.6236
2m 46s/epoch

Epoch 6/29
----------


In [4]:
# Saving statistics report:
training_stats = pd.DataFrame(training_stats)
training_stats.to_csv(f'{base_path}{exec_id}_training_stats.csv', index=False)

training_stats

Unnamed: 0,accuracy_score,balanced_accuracy_score,recall_score,precision_score,f1_score,confusion_matrix,round_number,site_number
0,0.678507,0.664406,0.552342,0.657377,0.600299,"[[726, 209], [325, 401]]",1,1
1,0.68908,0.682074,0.588372,0.693151,0.636478,"[[775, 224], [354, 506]]",1,2
2,0.669683,0.660594,0.591917,0.626207,0.608579,"[[730, 271], [313, 454]]",1,3
3,0.680456,0.672322,0.587533,0.666165,0.624383,"[[692, 222], [311, 443]]",1,4
4,0.68,0.66969,0.594438,0.638854,0.615846,"[[847, 290], [350, 513]]",1,5
5,0.673167,0.656231,0.512008,0.669598,0.580294,"[[1055, 263], [508, 533]]",2,1
6,0.684646,0.67631,0.553209,0.70658,0.620559,"[[1084, 272], [529, 655]]",2,2
7,0.686482,0.672133,0.552266,0.676104,0.607943,"[[1089, 286], [484, 597]]",2,3
8,0.69032,0.680148,0.551855,0.710784,0.621318,"[[996, 236], [471, 580]]",2,4
9,0.687185,0.675175,0.575286,0.667616,0.618022,"[[1206, 350], [519, 703]]",2,5


In [5]:
training_stats = pd.read_csv(f'{base_path}{exec_id}_training_stats.csv')
training_stats

Unnamed: 0,accuracy_score,balanced_accuracy_score,recall_score,precision_score,f1_score,confusion_matrix,round_number,site_number
0,0.678507,0.664406,0.552342,0.657377,0.600299,[[726 209]\n [325 401]],1,1
1,0.68908,0.682074,0.588372,0.693151,0.636478,[[775 224]\n [354 506]],1,2
2,0.669683,0.660594,0.591917,0.626207,0.608579,[[730 271]\n [313 454]],1,3
3,0.680456,0.672322,0.587533,0.666165,0.624383,[[692 222]\n [311 443]],1,4
4,0.68,0.66969,0.594438,0.638854,0.615846,[[847 290]\n [350 513]],1,5
5,0.673167,0.656231,0.512008,0.669598,0.580294,[[1055 263]\n [ 508 533]],2,1
6,0.684646,0.67631,0.553209,0.70658,0.620559,[[1084 272]\n [ 529 655]],2,2
7,0.686482,0.672133,0.552266,0.676104,0.607943,[[1089 286]\n [ 484 597]],2,3
8,0.69032,0.680148,0.551855,0.710784,0.621318,[[996 236]\n [471 580]],2,4
9,0.687185,0.675175,0.575286,0.667616,0.618022,[[1206 350]\n [ 519 703]],2,5
