In [1]:
import os, sys
sys.path.append('../../src')

from datetime import datetime
import pandas as pd

import visualization.visualize as vis
from data.load_dataset import load_dataset
from models.train_model import get_dataloaders, set_and_train_model
from models.model import get_model, evaluate_model, save_model

## Running all databases and rounds

In [2]:
num_epochs = 30
base_path = '../../models/local_models/'

In [3]:
exec_time = datetime.now()
exec_id = f'{exec_time.year}-{exec_time.month}-{exec_time.day}_{exec_time.hour}h{exec_time.minute}m{exec_time.second}s'
base_path = os.path.join(base_path, f'{exec_id}/')
if not os.path.isdir(base_path):
    os.mkdir(base_path)
    print(base_path, 'folder created.')

training_stats = []
round_number_list = [1,2,3,4,5]
site_number_list = [1,2,3,4,5]
for round_number in round_number_list:
    for site_number in site_number_list:

        print(f'\n\nTraining round_number {round_number} site_number {site_number}:')
        model_id = f'local_model_round-{round_number}_site-{site_number}'
        model_id_old = f'local_model_round-{round_number-1}_site-{site_number}'
        model_name = f'{exec_id}_{model_id}'
        model_name_old = f'{exec_id}_{model_id_old}'
        
        df = load_dataset(site_number)
        dataloaders, db_sizes = get_dataloaders(df, round_number)

        ### Visualize a few images
        # Let's visualize a few training images so as to understand the data augmentations.
        # vis.plot_samples(dataloaders['train'], title=f'site_number {site_number} round_number {round_number} samples')        
        
        # Train the model
        model = get_model(model_name=model_name_old, base_path=base_path)
        model, model_hist = set_and_train_model(model, dataloaders, num_epochs=num_epochs)
        # vis.plot_model_hist(model_hist, f'Baseline Model site_number: {site_number} round_number: {round_number}')
        
        # Evaluate model predictions
        model_stats = evaluate_model(model, dataloaders, split='test')
        model_stats['round_number'] = round_number; model_stats['site_number'] = site_number
        training_stats.append(model_stats)
        # Saving current model:
        save_model(model, model_name, base_path=base_path)

../../models/local_models/2022-11-2_11h33m39s/ folder created.


Training round_number 1 site_number 1:
train_set size: 5576
valid_set size: 1634
test_set size: 1661
total: 8871

File Not Found: ../../models/local_models/2022-11-2_11h33m39s/2022-11-2_11h33m39s_local_model_round-0_site-1.pth.
Model will start with default initialization.
Cuda available: True. Model sent to device: cuda:0.

Epoch 0/29
----------
train Loss: 0.6552 Acc: 0.6246
valid Loss: 0.6197 Acc: 0.6707
3m 29s/epoch

Epoch 1/29
----------
train Loss: 0.6166 Acc: 0.6677
valid Loss: 0.6207 Acc: 0.6640
3m 14s/epoch

Epoch 2/29
----------
train Loss: 0.5921 Acc: 0.6879
valid Loss: 0.6379 Acc: 0.6536
3m 14s/epoch

Epoch 3/29
----------
train Loss: 0.5501 Acc: 0.7235
valid Loss: 0.6584 Acc: 0.6285
3m 19s/epoch

Epoch 4/29
----------
train Loss: 0.5258 Acc: 0.7387
valid Loss: 0.6563 Acc: 0.6542
3m 11s/epoch

Epoch 5/29
----------
train Loss: 0.4775 Acc: 0.7713
valid Loss: 0.7013 Acc: 0.6230
3m 12s/epoch

Epoch 6/29
---------

In [4]:
# Saving statistics report:
training_stats = pd.DataFrame(training_stats)
training_stats.to_csv(f'{base_path}{exec_id}_training_stats.csv', index=False)

training_stats

Unnamed: 0,accuracy_score,balanced_accuracy_score,recall_score,precision_score,f1_score,confusion_matrix,round_number,site_number
0,0.649609,0.625036,0.429752,0.65,0.517413,"[[767, 168], [414, 312]]",1,1
1,0.671329,0.66216,0.539535,0.683358,0.602989,"[[784, 215], [396, 464]]",1,2
2,0.664593,0.653508,0.569752,0.624286,0.595774,"[[738, 263], [330, 437]]",1,3
3,0.657674,0.647007,0.535809,0.6464,0.585932,"[[693, 221], [350, 404]]",1,4
4,0.664,0.663158,0.65701,0.601273,0.627907,"[[761, 376], [296, 567]]",1,5
5,0.665112,0.655383,0.572526,0.633369,0.601413,"[[973, 345], [445, 596]]",2,1
6,0.661811,0.653531,0.53125,0.674169,0.594237,"[[1052, 304], [555, 629]]",2,2
7,0.677932,0.668848,0.592969,0.646169,0.618427,"[[1024, 351], [440, 641]]",2,3
8,0.660096,0.649,0.509039,0.672956,0.579632,"[[972, 260], [516, 535]]",2,4
9,0.672066,0.663699,0.594108,0.636284,0.614473,"[[1141, 415], [496, 726]]",2,5


In [5]:
training_stats = pd.read_csv(f'{base_path}{exec_id}_training_stats.csv')
training_stats

Unnamed: 0,accuracy_score,balanced_accuracy_score,recall_score,precision_score,f1_score,confusion_matrix,round_number,site_number
0,0.649609,0.625036,0.429752,0.65,0.517413,[[767 168]\n [414 312]],1,1
1,0.671329,0.66216,0.539535,0.683358,0.602989,[[784 215]\n [396 464]],1,2
2,0.664593,0.653508,0.569752,0.624286,0.595774,[[738 263]\n [330 437]],1,3
3,0.657674,0.647007,0.535809,0.6464,0.585932,[[693 221]\n [350 404]],1,4
4,0.664,0.663158,0.65701,0.601273,0.627907,[[761 376]\n [296 567]],1,5
5,0.665112,0.655383,0.572526,0.633369,0.601413,[[973 345]\n [445 596]],2,1
6,0.661811,0.653531,0.53125,0.674169,0.594237,[[1052 304]\n [ 555 629]],2,2
7,0.677932,0.668848,0.592969,0.646169,0.618427,[[1024 351]\n [ 440 641]],2,3
8,0.660096,0.649,0.509039,0.672956,0.579632,[[972 260]\n [516 535]],2,4
9,0.672066,0.663699,0.594108,0.636284,0.614473,[[1141 415]\n [ 496 726]],2,5
