# Analysis
This notebook consists of all the analysis performed on the resulting json files generated during evaluation, to understand the best course of action.

## Run 1: Cross-Validation — Classification on Data without any modifications
MobileNetV3, EfficientNet-B0, ShuffleNet and custom CNNs where run directly on the data without any other additional mechanisms to compare raw performance on a 10-fold cross validation.

In [5]:
# Import section
from collections import defaultdict
import json
import os


import pandas as pd

In [None]:
# Global Constants
PATH_TO_RESULTS = "../results"

In [7]:
# Local Constants
LABEL = "cross_val_no_weighting"

In [30]:
def parse_fold_information(label: str, model_name: str) -> pd.DataFrame:
    # Get the full folder for the label
    folder = os.path.join(PATH_TO_RESULTS, label)
    # Filter files with the model name
    files = [file for file in os.listdir(folder) if model_name in file]

    metrics = defaultdict(list)

    for file in files:
        full_path = os.path.join(folder, file)
        with open(full_path, 'r') as f:
            data = json.load(f)
            for key, value in data.items():
                metrics[key].append(value)
    
    df = pd.DataFrame(metrics)

    return df

def aggregate(model: str, df: pd.DataFrame, 
              metrics: list[str] = ["f1_score", "accuracy", "precision", "recall", "val_loss"]):
    print(model)
    summary = df[["f1_score", "accuracy", "precision", "recall", "val_loss"]].agg(
        ['mean', 'std']).transpose()
    print(summary)
    print("")

In [19]:
cnn = parse_fold_information(LABEL, "cnn")
efficientnet = parse_fold_information(LABEL, "efficientnet")
mobilenet = parse_fold_information(LABEL, "mobilenet")
shufflenet = parse_fold_information(LABEL, "shufflenet")

In [31]:
aggregate("CNN", cnn)
aggregate("EfficientNet", efficientnet)
aggregate("MobileNet", mobilenet)
aggregate("ShuffleNet", shufflenet)

CNN
               mean       std
f1_score   0.314521  0.025359
accuracy   0.286667  0.214918
precision  0.269080  0.257276
recall     0.866667  0.261052
val_loss   0.717242  0.054002

EfficientNet
               mean       std
f1_score   0.563315  0.157316
accuracy   0.827011  0.082669
precision  0.642554  0.232485
recall     0.606667  0.231367
val_loss   0.573706  0.065731

MobileNet
               mean       std
f1_score   0.430105  0.103001
accuracy   0.631162  0.225209
precision  0.417911  0.245957
recall     0.700000  0.324037
val_loss   0.663719  0.070578

ShuffleNet
               mean       std
f1_score   0.494486  0.162359
accuracy   0.756322  0.217654
precision  0.628571  0.294272
recall     0.560000  0.254733
val_loss   0.608302  0.067740

