In [1]:
import os, sys
sys.path.append('../../src')

from datetime import datetime
import pandas as pd

import visualization.visualize as vis
from data.load_dataset import load_dataset
from models.train_model import get_dataloaders, set_and_train_model
from models.model import get_model, evaluate_model, save_model

## Running all databases and rounds

In [2]:
num_epochs = 30
base_path = '../../models/local_models/'

In [3]:
exec_time = datetime.now()
exec_id = f'{exec_time.year}-{exec_time.month}-{exec_time.day}_{exec_time.hour}h{exec_time.minute}m{exec_time.second}s'
base_path = os.path.join(base_path, f'{exec_id}/')
if not os.path.isdir(base_path):
    os.mkdir(base_path)
    print(base_path, 'folder created.')

training_stats = []
round_number_list = [1,2,3,4,5]
site_number_list = [1,2,3,4,5]
for round_number in round_number_list:
    for site_number in site_number_list:

        print(f'\n\nTraining round_number {round_number} site_number {site_number}:')
        model_id = f'local_model_round-{round_number}_site-{site_number}'
        model_id_old = f'local_model_round-{round_number-1}_site-{site_number}'
        model_name = f'{exec_id}_{model_id}'
        model_name_old = f'{exec_id}_{model_id_old}'
        
        df = load_dataset(site_number)
        dataloaders, db_sizes = get_dataloaders(df, round_number, test_all_rounds=True)

        ### Visualize a few images
        # Let's visualize a few training images so as to understand the data augmentations.
        # vis.plot_samples(dataloaders['train'], title=f'site_number {site_number} round_number {round_number} samples')        
        
        # Train the model
        model = get_model(model_name=model_name_old, base_path=base_path)
        model, model_hist = set_and_train_model(model, dataloaders, num_epochs=num_epochs)
        # vis.plot_model_hist(model_hist, f'Baseline Model site_number: {site_number} round_number: {round_number}')
        
        # Evaluate model predictions
        model_stats = evaluate_model(model, dataloaders, split='test')
        model_stats['round_number'] = round_number; model_stats['site_number'] = site_number
        training_stats.append(model_stats)
        # Saving current model:
        save_model(model, model_name, base_path=base_path)

../../models/local_models/2022-11-10_23h2m50s/ folder created.


Training round_number 1 site_number 1:
train_set size: 5576
valid_set size: 1634
test_set size: 4260
total: 11470

File Not Found: ../../models/local_models/2022-11-10_23h2m50s/2022-11-10_23h2m50s_local_model_round-0_site-1.pth.
Model will start with default initialization.
Cuda available: True. Model sent to device: cuda:0.

Epoch 0/29
----------
train Loss: 0.6597 Acc: 0.6263
valid Loss: 0.6531 Acc: 0.6334
4m 17s/epoch

Epoch 1/29
----------
train Loss: 0.6212 Acc: 0.6585
valid Loss: 0.6147 Acc: 0.6707
3m 37s/epoch

Epoch 2/29
----------
train Loss: 0.5881 Acc: 0.6964
valid Loss: 0.6311 Acc: 0.6628
3m 35s/epoch

Epoch 3/29
----------
train Loss: 0.5500 Acc: 0.7174
valid Loss: 0.6276 Acc: 0.6689
3m 34s/epoch

Epoch 4/29
----------
train Loss: 0.5156 Acc: 0.7444
valid Loss: 0.7046 Acc: 0.6444
3m 33s/epoch

Epoch 5/29
----------
train Loss: 0.4818 Acc: 0.7724
valid Loss: 0.7093 Acc: 0.6414
3m 36s/epoch

Epoch 6/29
--------

In [4]:
# Saving statistics report:
training_stats = pd.DataFrame(training_stats)
training_stats.to_csv(f'{base_path}{exec_id}_training_stats.csv', index=False)

training_stats

Unnamed: 0,accuracy_score,balanced_accuracy_score,recall_score,precision_score,f1_score,confusion_matrix,round_number,site_number
0,0.670188,0.672109,0.692268,0.621622,0.655045,"[[1521, 812], [593, 1334]]",1,1
1,0.668338,0.662197,0.563721,0.675209,0.614449,"[[1853, 583], [938, 1212]]",1,2
2,0.669797,0.674374,0.74522,0.622853,0.678564,"[[1437, 944], [533, 1559]]",1,3
3,0.671983,0.667797,0.590337,0.675302,0.629968,"[[1650, 564], [814, 1173]]",1,4
4,0.676871,0.671369,0.578477,0.685484,0.627451,"[[2023, 624], [991, 1360]]",1,5
5,0.676761,0.666325,0.556824,0.672306,0.60914,"[[1810, 523], [854, 1073]]",2,1
6,0.666594,0.663722,0.617674,0.65258,0.634648,"[[1729, 707], [822, 1328]]",2,2
7,0.674268,0.671931,0.635755,0.65679,0.646102,"[[1686, 695], [762, 1330]]",2,3
8,0.671745,0.665765,0.555108,0.690238,0.615342,"[[1719, 495], [884, 1103]]",2,4
9,0.676271,0.67418,0.638877,0.661383,0.649935,"[[1878, 769], [849, 1502]]",2,5


In [5]:
training_stats = pd.read_csv(f'{base_path}{exec_id}_training_stats.csv')
training_stats

Unnamed: 0,accuracy_score,balanced_accuracy_score,recall_score,precision_score,f1_score,confusion_matrix,round_number,site_number
0,0.670188,0.672109,0.692268,0.621622,0.655045,[[1521 812]\n [ 593 1334]],1,1
1,0.668338,0.662197,0.563721,0.675209,0.614449,[[1853 583]\n [ 938 1212]],1,2
2,0.669797,0.674374,0.74522,0.622853,0.678564,[[1437 944]\n [ 533 1559]],1,3
3,0.671983,0.667797,0.590337,0.675302,0.629968,[[1650 564]\n [ 814 1173]],1,4
4,0.676871,0.671369,0.578477,0.685484,0.627451,[[2023 624]\n [ 991 1360]],1,5
5,0.676761,0.666325,0.556824,0.672306,0.60914,[[1810 523]\n [ 854 1073]],2,1
6,0.666594,0.663722,0.617674,0.65258,0.634648,[[1729 707]\n [ 822 1328]],2,2
7,0.674268,0.671931,0.635755,0.65679,0.646102,[[1686 695]\n [ 762 1330]],2,3
8,0.671745,0.665765,0.555108,0.690238,0.615342,[[1719 495]\n [ 884 1103]],2,4
9,0.676271,0.67418,0.638877,0.661383,0.649935,[[1878 769]\n [ 849 1502]],2,5
