In [None]:
import os
import sys

import src.data_loader as data_loader
import src.kfold.kfold_training_testing as kfold

from dataclasses import dataclass, asdict
from src.types import *
from src.models.main_model import OB_05Model
from src.kfold.kfold_training_config import KFoldTrainingConfig
from src.visualization.model_evaluation import TrainingVisualizations, TestingVisualizations

output_dir = "../output/kfold_model"
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

In [None]:
# Getting the datasets per fold
raw_dataset_per_fold = data_loader.KFold.split_into_n_sub_datasets(2)

# logger for output (we can output training data to stdout or a file for example)
logger = logging.getLogger("logger")
logger.setLevel(logging.INFO)
console_handler = logging.StreamHandler(sys.stdout)
logger.addHandler(console_handler)

model = OB_05Model()

initial_learning_rate = 0.0001
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=initial_learning_rate, weight_decay=5e-2)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor=0.1, patience=5)

training_config = KFoldTrainingConfig(
    model_name="kfold_model",
    output_dir=output_dir,
    output_logger=logger,
    
    folds=raw_dataset_per_fold,
    classes=data_loader.get_trainset().classes,
    
    epochs_per_fold=2,
    initial_learning_rate=initial_learning_rate,
    patience=5,
    
    model=model,
    criterion=criterion,
    optimizer=optimizer,
    scheduler=scheduler
)

# K-fold

In [None]:
results_per_fold = kfold.kfold_cross_validation(training_config)

In [None]:
for (training_logger, evaluation_results) in results_per_fold:
    df = evaluation_results.get_metrics_table_as_df()
    print(df)

In [None]:
data = []
for (training_logger, evaluation_results) in results_per_fold:
    macro_precision, macro_recall, macro_f1_score, macro_accuracy = utils.cm_macro.calculate_overall_metrics(
        evaluation_results.confusion_matrix)
    micro_precision, micro_recall, micro_f1_score, micro_accuracy = utils.cm_micro.calculate_overall_metrics(
        evaluation_results.confusion_matrix)
    accuracy = (macro_accuracy + micro_accuracy) / 2  # should be the same for both
    data.append(
        [macro_precision, macro_recall, macro_f1_score, micro_precision, micro_recall, micro_f1_score, accuracy]
    )


tuples = [("macro", "precision"), ("macro", "recall"), ("macro", "f1_score"), ("micro", "precision"),
          ("micro", "recall"), ("micro", "f1_score"), ("", "accuracy")]

temp_df = pd.DataFrame(data, 
                       index=pd.Index(range(1, len(results_per_fold) + 1)), 
                       columns=pd.MultiIndex.from_tuples(tuples, names=["", "fold"]))

averages = list(tuples 
                | pipe.select(lambda key: np.array(df[key])) 
                | pipe.select(lambda arr: np.mean(arr)))
data.append(averages)

indices = list(range(1, len(results_per_fold) + 1)) + ["average"]
df = pd.DataFrame(data, 
                  index=pd.Index(indices), 
                  columns=pd.MultiIndex.from_tuples(tuples, names=["", "fold"]))

df.style

In [None]:
list(next(df.iterrows()))

In [None]:
# list(df[("macro", "precision")])

import pipe

averages = list(tuples
                | pipe.select(lambda key: np.array(df[key]))
                | pipe.select(lambda arr: np.mean(arr)))

as_row = dict(zip(tuples, averages))
as_row

df.loc[len(df)] = as_row

In [None]:
df

In [None]:
import src.types as tps

evaluation_results_list = list(results_per_fold | pipe.select(lambda pair: pair[1]))
df = tps.EvaluationResults.format_evaluation_results_as_df(evaluation_results_list)
df.style

In [None]:
import pickle
import src.types as tps

# Deserialize and load from a file
with open('../output/kfold_model/results.pkl', 'rb') as f:
    results_per_fold = pickle.load(f)
    
evaluation_results_list = list(results_per_fold | pipe.select(lambda pair: pair[1]))
df = tps.EvaluationResults.format_evaluation_results_as_df(evaluation_results_list)
df.style