In [1]:
import os, sys
sys.path.append('../../src')

from datetime import datetime
import pandas as pd

import visualization.visualize as vis
from data.load_dataset import load_dataset
from models.train_model import get_dataloaders, set_and_train_model
from models.model import get_model, evaluate_model, save_model

## Running all databases and rounds

In [2]:
num_epochs = 5
base_path='../../models/federated_models/'

In [3]:
def get_federated_model(local_models):
    key_select = list(local_models.keys())[0]
    return local_models[key_select]


In [4]:
exec_time = datetime.now()
exec_id = f'{exec_time.year}-{exec_time.month}-{exec_time.day}_{exec_time.hour}h{exec_time.minute}m{exec_time.second}s'

training_stats = []
for round_number in [1,2,3,4,5]:
    model_id = f'fed_model_round{round_number}'
    model_id_old = f'fed_model_round{round_number-1}'
    model_name = f'{exec_id}_{model_id}'
    model_name_old = f'{exec_id}_{model_id_old}'
    local_models = {}

    for db_number in [1,2,3,4,5]:
        print(f'\n\nTraining round_number {round_number} db_number {db_number}:')
        df = load_dataset(db_number)
        dataloaders = get_dataloaders(df.sample(1000), round_number)

        ### Visualize a few images
        # Let's visualize a few training images so as to understand the data augmentations.
        # vis.plot_samples(dataloaders['train'], title=f'db_number {db_number} round_number {round_number} samples')        
        
        # Train the model
        model = get_model(model_name=model_name_old, base_path=base_path)
        model, model_hist = set_and_train_model(model, dataloaders, num_epochs=num_epochs)
        # vis.plot_model_hist(model_hist, f'Baseline Model db_number: {db_number} round_number: {round_number}')
        
        local_models[model_name] = model
    
    fed_model = get_federated_model(local_models)

    for db_number in [1,2,3,4,5]:
        print(f'\nEvaluating round_number {round_number} db_number {db_number}:')
        df = load_dataset(db_number)
        dataloaders = get_dataloaders(df.sample(1000), round_number)

        # Evaluate model predictions
        model_stats = evaluate_model(fed_model, dataloaders, split='test')
        model_stats['round_number'] = round_number; model_stats['db_number'] = db_number
        training_stats.append(model_stats)

    # Saving current model:
    save_model(model, model_name, base_path=base_path)



Training round_number 1 db_number 1:
train_set size: 228
valid_set size: 90
test_set size: 81
total: 399

File Not Found: 2022-10-6_0h10m24s_fed_model_round0. Model will start with default initialization.
Cuda available: True. Model sento to device: cuda:0.

Epoch 0/4
----------
train Loss: 0.7005 Acc: 0.5088
valid Loss: 0.6840 Acc: 0.5444
0m 9s/epoch

Epoch 1/4
----------
train Loss: 0.6440 Acc: 0.6184
valid Loss: 0.6540 Acc: 0.6000
0m 6s/epoch

Epoch 2/4
----------
train Loss: 0.5878 Acc: 0.6623
valid Loss: 0.6224 Acc: 0.6667
0m 6s/epoch

Epoch 3/4
----------
train Loss: 0.5658 Acc: 0.7412
valid Loss: 0.6833 Acc: 0.5000
0m 6s/epoch

Epoch 4/4
----------
train Loss: 0.4941 Acc: 0.7632
valid Loss: 0.6857 Acc: 0.6222
0m 7s/epoch
Training complete in 0m 37s
Best val Acc: 0.666667


Training round_number 1 db_number 2:
train_set size: 235
valid_set size: 76
test_set size: 84
total: 395

File Not Found: 2022-10-6_0h10m24s_fed_model_round0. Model will start with default initialization.
Cu

In [8]:
# Saving statistics report:
training_stats = pd.DataFrame(training_stats)
training_stats.to_csv(f'{base_path}{exec_id}_training_stats.csv', index=False)

training_stats

Unnamed: 0,accuracy_score,balanced_accuracy_score,recall_score,precision_score,f1_score,confusion_matrix,round_number,db_number
0,0.666667,0.591667,0.266667,0.666667,0.380952,"[[44, 4], [22, 8]]",1,1
1,0.590361,0.575872,0.175,0.875,0.291667,"[[42, 1], [33, 7]]",1,2
2,0.541667,0.585759,0.268293,0.785714,0.4,"[[28, 3], [30, 11]]",1,3
3,0.68,0.646104,0.363636,0.8,0.5,"[[39, 3], [21, 12]]",1,4
4,0.517647,0.497222,0.15,0.461538,0.226415,"[[38, 7], [34, 6]]",1,5
5,0.632353,0.624226,0.568966,0.568966,0.568966,"[[53, 25], [25, 33]]",2,1
6,0.608696,0.597527,0.480769,0.581395,0.526316,"[[45, 18], [27, 25]]",2,2
7,0.694915,0.672451,0.577778,0.604651,0.590909,"[[56, 17], [19, 26]]",2,3
8,0.673077,0.637354,0.513514,0.542857,0.527778,"[[51, 16], [18, 19]]",2,4
9,0.672,0.655529,0.54717,0.630435,0.585859,"[[55, 17], [24, 29]]",2,5


In [9]:
training_stats = pd.read_csv(f'{base_path}{exec_id}_training_stats.csv')
training_stats

Unnamed: 0,accuracy_score,balanced_accuracy_score,recall_score,precision_score,f1_score,confusion_matrix,round_number,db_number
0,0.666667,0.591667,0.266667,0.666667,0.380952,[[44 4]\n [22 8]],1,1
1,0.590361,0.575872,0.175,0.875,0.291667,[[42 1]\n [33 7]],1,2
2,0.541667,0.585759,0.268293,0.785714,0.4,[[28 3]\n [30 11]],1,3
3,0.68,0.646104,0.363636,0.8,0.5,[[39 3]\n [21 12]],1,4
4,0.517647,0.497222,0.15,0.461538,0.226415,[[38 7]\n [34 6]],1,5
5,0.632353,0.624226,0.568966,0.568966,0.568966,[[53 25]\n [25 33]],2,1
6,0.608696,0.597527,0.480769,0.581395,0.526316,[[45 18]\n [27 25]],2,2
7,0.694915,0.672451,0.577778,0.604651,0.590909,[[56 17]\n [19 26]],2,3
8,0.673077,0.637354,0.513514,0.542857,0.527778,[[51 16]\n [18 19]],2,4
9,0.672,0.655529,0.54717,0.630435,0.585859,[[55 17]\n [24 29]],2,5
