In [1]:
import os, sys
sys.path.append('../../src')

from datetime import datetime
import pandas as pd

import visualization.visualize as vis
from data.load_dataset import load_dataset
from models.train_model import get_dataloaders, set_and_train_model
from models.model import get_model, evaluate_model, save_model

## Running all databases and rounds

In [2]:
num_epochs = 5
base_path = '../../models/local_models/'

In [3]:
exec_time = datetime.now()
exec_id = f'{exec_time.year}-{exec_time.month}-{exec_time.day}_{exec_time.hour}h{exec_time.minute}m{exec_time.second}s'
base_path = os.path.join(base_path, f'{exec_id}/')
if not os.path.isdir(base_path):
    os.mkdir(base_path)
    print(base_path, 'folder created.')

training_stats = []
round_number_list = [1,2,3,4,5]
site_number_list = [1,2,3,4,5]
for round_number in round_number_list:
    for site_number in site_number_list:

        print(f'\n\nTraining round_number {round_number} site_number {site_number}:')
        model_id = f'local_model_round-{round_number}_site-{site_number}'
        model_id_old = f'local_model_round-{round_number-1}_site-{site_number}'
        model_name = f'{exec_id}_{model_id}'
        model_name_old = f'{exec_id}_{model_id_old}'
        
        df = load_dataset(site_number)
        dataloaders, db_sizes = get_dataloaders(df.sample(1000), round_number)

        ### Visualize a few images
        # Let's visualize a few training images so as to understand the data augmentations.
        # vis.plot_samples(dataloaders['train'], title=f'site_number {site_number} round_number {round_number} samples')        
        
        # Train the model
        model = get_model(model_name=model_name_old, base_path=base_path)
        model, model_hist = set_and_train_model(model, dataloaders, num_epochs=num_epochs)
        # vis.plot_model_hist(model_hist, f'Baseline Model site_number: {site_number} round_number: {round_number}')
        
        # Evaluate model predictions
        model_stats = evaluate_model(model, dataloaders, split='test')
        model_stats['round_number'] = round_number; model_stats['site_number'] = site_number
        training_stats.append(model_stats)
        # Saving current model:
        save_model(model, model_name, base_path=base_path)

../../models/local_models/2022-10-30_10h45m45s/ folder created.


Training round_number 1 site_number 1:
train_set size: 267
valid_set size: 66
test_set size: 73
total: 406

File Not Found: ../../models/local_models/2022-10-30_10h45m45s/2022-10-30_10h45m45s_local_model_round-0_site-1.pth.
Model will start with default initialization.
Cuda available: True. Model sent to device: cuda:0.

Epoch 0/4
----------
train Loss: 0.6897 Acc: 0.5730
valid Loss: 0.6681 Acc: 0.6212
0m 10s/epoch

Epoch 1/4
----------
train Loss: 0.6024 Acc: 0.7041
valid Loss: 0.6803 Acc: 0.5909
0m 8s/epoch

Epoch 2/4
----------
train Loss: 0.5183 Acc: 0.7903
valid Loss: 0.7063 Acc: 0.6364
0m 8s/epoch

Epoch 3/4
----------
train Loss: 0.4397 Acc: 0.8090
valid Loss: 0.7183 Acc: 0.5606
0m 8s/epoch

Epoch 4/4
----------
train Loss: 0.3736 Acc: 0.8727
valid Loss: 0.7454 Acc: 0.6212
0m 8s/epoch
Training complete in 0m 42s
Best val Acc: 0.636364

Classification report for test set:
              precision    recall  f1-score

In [4]:
# Saving statistics report:
training_stats = pd.DataFrame(training_stats)
training_stats.to_csv(f'{base_path}{exec_id}_training_stats.csv', index=False)

training_stats

Unnamed: 0,accuracy_score,balanced_accuracy_score,recall_score,precision_score,f1_score,confusion_matrix,round_number,site_number
0,0.630137,0.59031,0.366667,0.578947,0.44898,"[[35, 8], [19, 11]]",1,1
1,0.547619,0.535365,0.432432,0.484848,0.457143,"[[30, 17], [21, 16]]",1,2
2,0.659091,0.653747,0.418605,0.782609,0.545455,"[[40, 5], [25, 18]]",1,3
3,0.541667,0.593825,0.325581,0.777778,0.459016,"[[25, 4], [29, 14]]",1,4
4,0.671642,0.628704,0.407407,0.647059,0.5,"[[34, 6], [16, 11]]",1,5
5,0.598131,0.602273,0.454545,0.657895,0.537634,"[[39, 13], [30, 25]]",2,1
6,0.66,0.643115,0.361702,0.809524,0.5,"[[49, 4], [30, 17]]",2,2
7,0.692308,0.66565,0.511628,0.666667,0.578947,"[[50, 11], [21, 22]]",2,3
8,0.62037,0.637077,0.765957,0.545455,0.637168,"[[31, 30], [11, 36]]",2,4
9,0.644628,0.616638,0.469388,0.575,0.516854,"[[55, 17], [26, 23]]",2,5


In [5]:
training_stats = pd.read_csv(f'{base_path}{exec_id}_training_stats.csv')
training_stats

Unnamed: 0,accuracy_score,balanced_accuracy_score,recall_score,precision_score,f1_score,confusion_matrix,round_number,site_number
0,0.630137,0.59031,0.366667,0.578947,0.44898,[[35 8]\n [19 11]],1,1
1,0.547619,0.535365,0.432432,0.484848,0.457143,[[30 17]\n [21 16]],1,2
2,0.659091,0.653747,0.418605,0.782609,0.545455,[[40 5]\n [25 18]],1,3
3,0.541667,0.593825,0.325581,0.777778,0.459016,[[25 4]\n [29 14]],1,4
4,0.671642,0.628704,0.407407,0.647059,0.5,[[34 6]\n [16 11]],1,5
5,0.598131,0.602273,0.454545,0.657895,0.537634,[[39 13]\n [30 25]],2,1
6,0.66,0.643115,0.361702,0.809524,0.5,[[49 4]\n [30 17]],2,2
7,0.692308,0.66565,0.511628,0.666667,0.578947,[[50 11]\n [21 22]],2,3
8,0.62037,0.637077,0.765957,0.545455,0.637168,[[31 30]\n [11 36]],2,4
9,0.644628,0.616638,0.469388,0.575,0.516854,[[55 17]\n [26 23]],2,5
