In [None]:
import os, sys
sys.path.append('../../src')

from datetime import datetime
import pandas as pd

import visualization.visualize as vis
from data.load_dataset import load_dataset
from models.train_model import get_dataloaders, set_and_train_model
from models.model import get_model, evaluate_model, save_model
from federated.federated_learning import get_federated_avg_model

## Running all databases and rounds

In [None]:
num_epochs = 30
base_path='../../models/federated_models/'

In [None]:
exec_time = datetime.now()
exec_id = f'{exec_time.year}-{exec_time.month}-{exec_time.day}_{exec_time.hour}h{exec_time.minute}m{exec_time.second}s'
base_path = os.path.join(base_path, f'{exec_id}/')
if not os.path.isdir(base_path):
    os.mkdir(base_path)
    print(base_path, 'folder created.')

training_stats = []
round_number_list = [1,2,3,4,5]
site_number_list = [1,2,3,4,5]
for round_number in round_number_list:
    model_id = f'fed_model_round-{round_number}'
    model_id_old = f'fed_model_round-{round_number-1}'
    model_name = f'{exec_id}_{model_id}'
    model_name_old = f'{exec_id}_{model_id_old}'
    local_models = []
    local_train_db_sizes = []

    for site_number in site_number_list:
        print(f'\n\nTraining round_number {round_number} site_number {site_number}:')
        df = load_dataset(site_number)
        dataloaders, db_sizes = get_dataloaders(df, round_number, test_all_rounds=True)

        ### Visualize a few images
        # Let's visualize a few training images so as to understand the data augmentations.
        # vis.plot_samples(dataloaders['train'], title=f'site_number {site_number} round_number {round_number} samples')        
        
        # Train the model
        model = get_model(model_name=model_name_old, base_path=base_path)
        model, model_hist = set_and_train_model(model, dataloaders, num_epochs=num_epochs)
        # vis.plot_model_hist(model_hist, f'Baseline Model site_number: {site_number} round_number: {round_number}')
        
        local_models.append(model)
        local_train_db_sizes.append(db_sizes['train'])
    
    fed_model = get_federated_avg_model(local_models, local_train_db_sizes)

    for site_number in site_number_list:
        print(f'\nEvaluating round_number {round_number} site_number {site_number}:')
        df = load_dataset(site_number)
        dataloaders, _ = get_dataloaders(df, round_number)

        # Evaluate model predictions
        model_stats = evaluate_model(fed_model, dataloaders, split='test')
        model_stats['round_number'] = round_number; model_stats['site_number'] = site_number
        training_stats.append(model_stats)

    # Saving current model:
    save_model(model, model_name, base_path=base_path)

In [None]:
# Saving statistics report:
training_stats = pd.DataFrame(training_stats)
training_stats.to_csv(f'{base_path}{exec_id}_training_stats.csv', index=False)

training_stats

In [None]:
training_stats = pd.read_csv(f'{base_path}{exec_id}_training_stats.csv')
training_stats