# Evaluation/Testing

In [8]:
import pickle
import torch
import pandas as pd
import torch.nn as nn
import numpy as np

import scripts.data_loader as data_loader
import scripts.model.training as training
import scripts.model.evaluation as evaluation

from dataclasses import dataclass, asdict
from scripts.model.types import * 
from models.model1 import OB_05Model


cm = ConfusionMatrx
cm_macro = ConfusionMatrx.Macro
cm_micro = ConfusionMatrx.Micro


with open("models/training_logger.pkl", "rb") as file:
    training_logger = pickle.load(file)

print(training_logger)

TrainingLogger(training_confusion_matrix_history=[array([[257, 101,  51,  28],
       [177,  84,  47,  35],
       [ 50,  16, 227, 138],
       [ 58,  23, 164, 164]]), array([[289, 127,   5,  16],
       [169, 145,  12,  17],
       [  3,  12, 298, 118],
       [ 10,  12, 110, 277]]), array([[320, 105,   5,   7],
       [168, 148,  13,  14],
       [  2,   5, 346,  78],
       [  9,   6,  95, 299]]), array([[323,  99,   6,   9],
       [163, 154,   9,  17],
       [  4,  10, 339,  78],
       [  8,   5,  79, 317]]), array([[318, 110,   1,   8],
       [144, 179,  10,  10],
       [  3,   4, 349,  75],
       [  5,   9,  56, 339]]), array([[332,  92,   4,   9],
       [132, 189,  12,  10],
       [  1,   4, 369,  57],
       [  7,   2,  43, 357]]), array([[329,  95,   4,   9],
       [125, 197,   9,  12],
       [  3,   2, 384,  42],
       [  2,  11,  43, 353]]), array([[341,  85,   2,   9],
       [ 95, 229,   7,  12],
       [  3,   4, 386,  38],
       [  8,   6,  42, 353]]), array(

In [2]:
loaded_testing_dataset = torch.load('models/testing_dataset.pth')
testing_set_loader = data_loader.create_data_loader(loaded_testing_dataset)

print(next(iter(loaded_testing_dataset)))

(tensor([[[ 0.7595,  0.4922,  0.4922,  0.3777,  0.4031,  0.4540,  0.2886,
           0.3268,  0.1740,  0.1867,  0.0340, -0.0806, -0.1187, -0.0933,
          -0.1060, -0.1060, -0.1824, -0.1187,  0.0722,  0.5559,  0.9759,
           0.4922,  0.3904,  0.7086,  0.7850,  0.8486,  0.9123,  0.9759],
         [ 0.4540,  0.4922,  0.6577,  0.3268,  0.4795,  0.6322,  0.4668,
           0.3395,  0.3904,  0.4413,  0.2631,  0.0595, -0.0678,  0.0085,
          -0.0169, -0.0933, -0.0678,  0.0976,  0.3395,  0.5940,  0.9123,
           0.5304,  0.1613,  0.5304,  0.8232,  0.8995,  0.9632,  1.0013],
         [ 0.2758,  0.4159,  0.4159,  0.3522,  0.7722,  1.0141,  0.7341,
           0.5431,  0.5559,  0.7086,  0.5304,  0.1867,  0.1231,  0.1358,
           0.0849,  0.1740,  0.5431,  1.0395,  1.1414,  0.9886,  0.8232,
           0.2886,  0.0213,  0.1104,  0.7086,  0.8868,  0.9632,  0.9886],
         [ 0.1104,  0.0213,  0.0085,  0.4159,  0.7213,  0.6959,  0.9632,
           0.7977,  0.9632,  0.7213,  0.7341,  

In [3]:
model = OB_05Model()
model.load_state_dict(torch.load('models/model.pth'))
evaluation_result = evaluation.evaluate_model(model, testing_set_loader)

In [69]:
confusion_matrix = evaluation_result.confusion_matrix

macro_precision, macro_recall, macro_f1_score, macro_accuracy = cm_macro.calculate_overall_metrics(confusion_matrix)
micro_precision, micro_recall, micro_f1_score, micro_accuracy = cm_macro.calculate_overall_metrics(confusion_matrix)
accuracy = (macro_accuracy + micro_accuracy) / 2  # should be the same for both

data = [[macro_precision, macro_recall, macro_f1_score, micro_precision, micro_recall, micro_f1_score, accuracy]]
tuples = [("macro", "precision"), ("macro", "recall"), ("macro", "f1_score"), ("micro", "precision"), ("micro", "recall"), ("micro", "f1_score"), ("", "accuracy")]

df = pd.DataFrame(data,
                  index=pd.Index(["model"]),
                  columns=pd.MultiIndex.from_tuples(tuples, names=["", "metrics"]))
s = df.style

s

Unnamed: 0_level_0,macro,macro,macro,micro,micro,micro,Unnamed: 7_level_0
metrics,precision,recall,f1_score,precision,recall,f1_score,accuracy
model,0.500757,0.465695,0.482389,0.500757,0.465695,0.482389,0.008945


In [55]:
html = df.style.to_html()
with open("styled_table.html", "w") as f:
    f.write(html)