In [1]:
import os
import sys
import logging
import pickle
import torch
import pandas as pd
import torch.nn as nn
import numpy as np
import pipe

import scripts.data_loader as data_loader
import src.training as training
import src.evaluation as evaluation
import src.kfold.kfold_training_testing as kfold

from dataclasses import dataclass, asdict
from src.types import *
from src.models.main_model import OB_05Model
from src.kfold.kfold_training_config import KFoldTrainingConfig
from scripts.visualization.model_evaluation import TrainingVisualizations, TestingVisualizations

output_dir = "../output/kfold_model"
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

In [2]:
# Getting the datasets per fold
raw_dataset_per_fold = data_loader.KFold.split_into_n_sub_datasets(2)

# logger for output (we can output training data to stdout or a file for example)
logger = logging.getLogger("logger")
logger.setLevel(logging.INFO)
console_handler = logging.StreamHandler(sys.stdout)
logger.addHandler(console_handler)

model = OB_05Model()

initial_learning_rate = 0.0001
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=initial_learning_rate, weight_decay=5e-2)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor=0.1, patience=5)

training_config = KFoldTrainingConfig(
    model_name="kfold_model",
    output_dir=output_dir,
    output_logger=logger,
    
    folds=raw_dataset_per_fold,
    classes=data_loader.get_trainset().classes,
    
    epochs_per_fold=2,
    initial_learning_rate=initial_learning_rate,
    patience=5,
    
    model=model,
    criterion=criterion,
    optimizer=optimizer,
    scheduler=scheduler
)

# K-fold

In [3]:
results_per_fold = kfold.kfold_cross_validation(training_config)


Fold 1/2;f Epoch 1/2:
	Training precision: 0.2922
	Training recall: 0.2928
	Training accuracy: 0.6509
	Training f1-score: 0.2901

	Validation precision: 0.5137
	Validation recall: 0.5576
	Validation accuracy: 0.7500
	Validation f1-score: 0.4674
	Learning rate for param group "0": 0.0001

Fold 1/2;f Epoch 2/2:
	Training precision: 0.4286
	Training recall: 0.4314
	Training accuracy: 0.7180
	Training f1-score: 0.4288

	Validation precision: 0.4884
	Validation recall: nan
	Validation accuracy: 0.7356
	Validation f1-score: nan
	Learning rate for param group "0": 0.0001


  recalls_per_class = true_positives / (true_positives + false_negatives)



Testing batch #1:
	MACRO precision: 0.4000
	MACRO recall: nan
	MACRO f1_score: nan

	MICRO precision: 0.3438
	MICRO recall: 0.3438
	MICRO f1_score: 0.3438

	Accuracy: 0.3438



Testing batch #2:
	MACRO precision: 0.3887
	MACRO recall: nan
	MACRO f1_score: nan

	MICRO precision: 0.3750
	MICRO recall: 0.3750
	MICRO f1_score: 0.3750

	Accuracy: 0.3750



Testing batch #3:
	MACRO precision: 0.4037
	MACRO recall: nan
	MACRO f1_score: nan

	MICRO precision: 0.3542
	MICRO recall: 0.3542
	MICRO f1_score: 0.3542

	Accuracy: 0.3542




  recall = true_positives / (true_positives + false_negatives)



Testing batch #4:
	MACRO precision: 0.4076
	MACRO recall: nan
	MACRO f1_score: nan

	MICRO precision: 0.3594
	MICRO recall: 0.3594
	MICRO f1_score: 0.3594

	Accuracy: 0.3594



Testing batch #5:
	MACRO precision: 0.4267
	MACRO recall: nan
	MACRO f1_score: nan

	MICRO precision: 0.3875
	MICRO recall: 0.3875
	MICRO f1_score: 0.3875

	Accuracy: 0.3875



Testing batch #6:
	MACRO precision: 0.4146
	MACRO recall: nan
	MACRO f1_score: nan

	MICRO precision: 0.3750
	MICRO recall: 0.3750
	MICRO f1_score: 0.3750

	Accuracy: 0.3750



Testing batch #7:
	MACRO precision: 0.4214
	MACRO recall: nan
	MACRO f1_score: nan

	MICRO precision: 0.4062
	MICRO recall: 0.4062
	MICRO f1_score: 0.4062

	Accuracy: 0.4062



Testing batch #8:
	MACRO precision: 0.4325
	MACRO recall: nan
	MACRO f1_score: nan

	MICRO precision: 0.4375
	MICRO recall: 0.4375
	MICRO f1_score: 0.4375

	Accuracy: 0.4375



Testing batch #9:
	MACRO precision: 0.4419
	MACRO recall: 0.6410
	MACRO f1_score: 0.3386

	MICRO precision: 0.4375

In [4]:
for (training_logger, evaluation_results) in results_per_fold:
    df = evaluation_results.get_metrics_table_as_df()
    print(df)

            macro                       micro                              
metrics precision   recall f1_score precision    recall  f1_score  accuracy
model     0.46034  0.64649  0.36029  0.475367  0.475367  0.475367  0.475367
            macro                         micro                              
metrics precision    recall  f1_score precision    recall  f1_score  accuracy
model    0.582338  0.619674  0.537786  0.575993  0.575993  0.575993  0.575993


In [None]:
data = []
for (training_logger, evaluation_results) in results_per_fold:
    macro_precision, macro_recall, macro_f1_score, macro_accuracy = cm_macro.calculate_overall_metrics(
        evaluation_results.confusion_matrix)
    micro_precision, micro_recall, micro_f1_score, micro_accuracy = cm_micro.calculate_overall_metrics(
        evaluation_results.confusion_matrix)
    accuracy = (macro_accuracy + micro_accuracy) / 2  # should be the same for both
    data.append(
        [macro_precision, macro_recall, macro_f1_score, micro_precision, micro_recall, micro_f1_score, accuracy]
    )


tuples = [("macro", "precision"), ("macro", "recall"), ("macro", "f1_score"), ("micro", "precision"),
          ("micro", "recall"), ("micro", "f1_score"), ("", "accuracy")]

temp_df = pd.DataFrame(data, 
                       index=pd.Index(range(1, len(results_per_fold) + 1)), 
                       columns=pd.MultiIndex.from_tuples(tuples, names=["", "fold"]))

averages = list(tuples 
                | pipe.select(lambda key: np.array(df[key])) 
                | pipe.select(lambda arr: np.mean(arr)))
data.append(averages)

indices = list(range(1, len(results_per_fold) + 1)) + ["average"]
df = pd.DataFrame(data, 
                  index=pd.Index(indices), 
                  columns=pd.MultiIndex.from_tuples(tuples, names=["", "fold"]))

df.style

In [None]:
list(next(df.iterrows()))

In [None]:
# list(df[("macro", "precision")])

import pipe

averages = list(tuples
                | pipe.select(lambda key: np.array(df[key]))
                | pipe.select(lambda arr: np.mean(arr)))

as_row = dict(zip(tuples, averages))
as_row

df.loc[len(df)] = as_row

In [None]:
df

In [6]:
import src.types as tps

evaluation_results_list = list(results_per_fold | pipe.select(lambda pair: pair[1]))
df = tps.EvaluationResults.format_evaluation_results_as_df(evaluation_results_list)
df.style

Unnamed: 0_level_0,macro,macro,macro,micro,micro,micro,Unnamed: 7_level_0
fold,precision,recall,f1_score,precision,recall,f1_score,accuracy
1,0.867004,0.854943,0.856533,0.865801,0.865801,0.865801,0.865801
2,0.918653,0.918364,0.918254,0.917749,0.917749,0.917749,0.917749
3,0.946538,0.943592,0.944766,0.948052,0.948052,0.948052,0.948052
4,0.959801,0.951377,0.954191,0.95671,0.95671,0.95671,0.95671
5,0.946826,0.949132,0.947902,0.952381,0.952381,0.952381,0.952381
6,0.973766,0.973311,0.973506,0.974026,0.974026,0.974026,0.974026
7,0.969203,0.967237,0.967636,0.969697,0.969697,0.969697,0.969697
8,0.971255,0.969523,0.970123,0.969697,0.969697,0.969697,0.969697
9,0.933553,0.934906,0.9323,0.935065,0.935065,0.935065,0.935065
10,0.963533,0.950664,0.955399,0.961864,0.961864,0.961864,0.961864


In [5]:
import pickle
import src.types as tps

# Deserialize and load from a file
with open('../output/kfold_model/results.pkl', 'rb') as f:
    results_per_fold = pickle.load(f)
    
evaluation_results_list = list(results_per_fold | pipe.select(lambda pair: pair[1]))
df = tps.EvaluationResults.format_evaluation_results_as_df(evaluation_results_list)
df.style

Unnamed: 0_level_0,macro,macro,macro,micro,micro,micro,Unnamed: 7_level_0
fold,precision,recall,f1_score,precision,recall,f1_score,accuracy
1,0.867004,0.854943,0.856533,0.865801,0.865801,0.865801,0.865801
2,0.918653,0.918364,0.918254,0.917749,0.917749,0.917749,0.917749
3,0.946538,0.943592,0.944766,0.948052,0.948052,0.948052,0.948052
4,0.959801,0.951377,0.954191,0.95671,0.95671,0.95671,0.95671
5,0.946826,0.949132,0.947902,0.952381,0.952381,0.952381,0.952381
6,0.973766,0.973311,0.973506,0.974026,0.974026,0.974026,0.974026
7,0.969203,0.967237,0.967636,0.969697,0.969697,0.969697,0.969697
8,0.971255,0.969523,0.970123,0.969697,0.969697,0.969697,0.969697
9,0.933553,0.934906,0.9323,0.935065,0.935065,0.935065,0.935065
10,0.963533,0.950664,0.955399,0.961864,0.961864,0.961864,0.961864
