# Analysis
This notebook consists of all the analysis performed on the resulting json files generated during evaluation, to understand the best course of action.

In [1]:
# Import section
from collections import defaultdict
import json
import os

import pandas as pd

In [2]:
# Global Constants
PATH_TO_RESULTS = "../results"

In [3]:
def parse_fold_information(label: str, model_name: str) -> pd.DataFrame:
    # Get the full folder for the label
    folder = os.path.join(PATH_TO_RESULTS, label)
    # Filter files with the model name
    files = [file for file in os.listdir(folder) if model_name in file]

    metrics = defaultdict(list)

    for file in files:
        full_path = os.path.join(folder, file)
        with open(full_path, 'r') as f:
            data = json.load(f)
            for key, value in data.items():
                metrics[key].append(value)
    
    df = pd.DataFrame(metrics)

    return df

def aggregate(model: str, df: pd.DataFrame, 
              metrics: list[str] = ["f1_score", "accuracy", "precision", "recall", "val_loss"]):
    print(model)
    summary = df[metrics].agg(['mean', 'std']).transpose()
    print(summary)
    print("")

## Run 1: Cross-Validation — Classification on Data without any modifications
MobileNetV3, EfficientNet-B0, ShuffleNet and custom CNNs where run directly on the data without any other additional mechanisms to compare raw performance on a 10-fold cross validation.

In [4]:
# Local Constants
LABEL = "cross_val_baseline"

In [5]:
cnn = parse_fold_information(LABEL, "cnn")
efficientnet = parse_fold_information(LABEL, "efficientnet")
mobilenet = parse_fold_information(LABEL, "mobilenet")
shufflenet = parse_fold_information(LABEL, "shufflenet")

In [6]:
aggregate("CNN", cnn)
aggregate("EfficientNet", efficientnet)
aggregate("MobileNet", mobilenet)
aggregate("ShuffleNet", shufflenet)

CNN
               mean       std
f1_score   0.329365  0.048161
accuracy   0.409068  0.324335
precision  0.403366  0.353520
recall     0.751852  0.377533
val_loss   0.702145  0.051089

EfficientNet
               mean       std
f1_score   0.475545  0.160060
accuracy   0.669860  0.281165
precision  0.536510  0.326948
recall     0.651852  0.272392
val_loss   0.649492  0.059587

MobileNet
               mean       std
f1_score   0.420039  0.111858
accuracy   0.619157  0.285041
precision  0.524625  0.368649
recall     0.607407  0.279274
val_loss   0.636350  0.079073

ShuffleNet
               mean       std
f1_score   0.526515  0.117469
accuracy   0.792976  0.189495
precision  0.684456  0.285278
recall     0.551852  0.267245
val_loss   0.547775  0.075891



## Run 2: Cross-Validation — Classification with RoI cropping
See if yolo has a positive impact on the classification pipeline.

In [7]:
LABEL = "cross_val_roi"

In [8]:
cnn = parse_fold_information(LABEL, "cnn")
efficientnet = parse_fold_information(LABEL, "efficientnet")
mobilenet = parse_fold_information(LABEL, "mobilenet")
shufflenet = parse_fold_information(LABEL, "shufflenet")

aggregate("CNN", cnn)
aggregate("EfficientNet", efficientnet)
aggregate("MobileNet", mobilenet)
aggregate("ShuffleNet", shufflenet)

CNN
               mean       std
f1_score   0.298283  0.057064
accuracy   0.381281  0.285873
precision  0.306359  0.306551
recall     0.742857  0.387162
val_loss   0.707397  0.022903

EfficientNet
               mean       std
f1_score   0.524742  0.151793
accuracy   0.836322  0.103042
precision  0.770476  0.245873
recall     0.490000  0.218892
val_loss   0.566428  0.052737

MobileNet
               mean       std
f1_score   0.417692  0.109407
accuracy   0.507407  0.238925
precision  0.291300  0.109114
recall     0.851852  0.165085
val_loss   0.691758  0.026871

ShuffleNet
               mean       std
f1_score   0.673160  0.145517
accuracy   0.888342  0.053282
precision  0.786395  0.202692
recall     0.614286  0.165392
val_loss   0.523567  0.053942



## Run 3: Cross-Validation — Classification with Weighted Loss
Measure performance with weighted loss for class imbalanace.

In [9]:
LABEL = "cross_val_weighted_loss"

In [10]:
cnn = parse_fold_information(LABEL, "cnn")
efficientnet = parse_fold_information(LABEL, "efficientnet")
mobilenet = parse_fold_information(LABEL, "mobilenet")
shufflenet = parse_fold_information(LABEL, "shufflenet")

aggregate("CNN", cnn)
aggregate("EfficientNet", efficientnet)
aggregate("MobileNet", mobilenet)
aggregate("ShuffleNet", shufflenet)

CNN
               mean       std
f1_score   0.422186  0.078554
accuracy   0.642184  0.280393
precision  0.503095  0.302067
recall     0.596667  0.300801
val_loss   1.098361  0.055903

EfficientNet
               mean       std
f1_score   0.447486  0.124734
accuracy   0.593333  0.172148
precision  0.358606  0.235301
recall     0.800000  0.205480
val_loss   1.073502  0.080908

MobileNet
               mean       std
f1_score   0.406179  0.090501
accuracy   0.525977  0.255615
precision  0.304930  0.120095
recall     0.776667  0.226650
val_loss   1.089747  0.086940

ShuffleNet
               mean       std
f1_score   0.483451  0.156489
accuracy   0.715964  0.235402
precision  0.460824  0.199726
recall     0.600000  0.204803
val_loss   1.053080  0.099752



## Run 3: Cross-Validation — Classification with Weighted Sampling
Measure performance with weighted loss for class imbalanace.

In [11]:
LABEL = "cross_val_weighted_sampling"

In [12]:
cnn = parse_fold_information(LABEL, "cnn")
efficientnet = parse_fold_information(LABEL, "efficientnet")
mobilenet = parse_fold_information(LABEL, "mobilenet")
shufflenet = parse_fold_information(LABEL, "shufflenet")

aggregate("CNN", cnn)
aggregate("EfficientNet", efficientnet)
aggregate("MobileNet", mobilenet)
aggregate("ShuffleNet", shufflenet)

CNN
               mean       std
f1_score   0.396500  0.085706
accuracy   0.655057  0.277727
precision  0.603095  0.372025
recall     0.540000  0.336210
val_loss   0.666499  0.068875

EfficientNet
               mean       std
f1_score   0.473687  0.115679
accuracy   0.749540  0.123704
precision  0.468855  0.220675
recall     0.563333  0.160593
val_loss   0.642296  0.045469

MobileNet
               mean       std
f1_score   0.438660  0.157196
accuracy   0.755977  0.137303
precision  0.512689  0.284399
recall     0.490000  0.217193
val_loss   0.597751  0.077192

ShuffleNet
               mean       std
f1_score   0.501779  0.149693
accuracy   0.688161  0.280124
precision  0.489272  0.201883
recall     0.676667  0.254369
val_loss   0.606920  0.087218



## Run 4: Cross Validation — RoI with Weighted Loss
Measure performance with both RoI and Weighted loss.

In [13]:
LABEL = "cross_val_roi_weighted_loss"

In [14]:
cnn = parse_fold_information(LABEL, "cnn")
efficientnet = parse_fold_information(LABEL, "efficientnet")
mobilenet = parse_fold_information(LABEL, "mobilenet")
shufflenet = parse_fold_information(LABEL, "shufflenet")

aggregate("CNN", cnn)
aggregate("EfficientNet", efficientnet)
aggregate("MobileNet", mobilenet)
aggregate("ShuffleNet", shufflenet)

CNN
               mean       std
f1_score   0.443133  0.106624
accuracy   0.664713  0.229756
precision  0.403320  0.167438
recall     0.633333  0.266667
val_loss   1.119618  0.049638

EfficientNet
               mean       std
f1_score   0.537562  0.150277
accuracy   0.693448  0.164913
precision  0.407853  0.158687
recall     0.863333  0.173169
val_loss   1.030399  0.109383

MobileNet
               mean       std
f1_score   0.336924  0.039516
accuracy   0.366794  0.210330
precision  0.295792  0.265294
recall     0.855556  0.296273
val_loss   1.146482  0.061040

ShuffleNet
               mean       std
f1_score   0.607054  0.135646
accuracy   0.789540  0.167665
precision  0.585393  0.227943
recall     0.726667  0.155397
val_loss   0.980385  0.084076



## Run 4: Cross Validation — RoI with Weighted Sampling
Measure performance with both RoI and Weighted Sampling.

In [15]:
LABEL = "cross_val_roi_weighted_sampling"

In [16]:
cnn = parse_fold_information(LABEL, "cnn")
efficientnet = parse_fold_information(LABEL, "efficientnet")
mobilenet = parse_fold_information(LABEL, "mobilenet")
shufflenet = parse_fold_information(LABEL, "shufflenet")

aggregate("CNN", cnn)
aggregate("EfficientNet", efficientnet)
aggregate("MobileNet", mobilenet)
aggregate("ShuffleNet", shufflenet)

CNN
               mean       std
f1_score   0.485370  0.141696
accuracy   0.629080  0.284843
precision  0.503924  0.322599
recall     0.716667  0.245578
val_loss   0.685210  0.063355

EfficientNet
               mean       std
f1_score   0.605137  0.142116
accuracy   0.833678  0.074314
precision  0.585476  0.151163
recall     0.686667  0.206200
val_loss   0.586910  0.052880

MobileNet
               mean       std
f1_score   0.366622  0.063820
accuracy   0.494253  0.231639
precision  0.331217  0.255871
recall     0.770370  0.316862
val_loss   0.697311  0.039344

ShuffleNet
               mean       std
f1_score   0.647641  0.072260
accuracy   0.868199  0.038317
precision  0.694048  0.161762
recall     0.644444  0.148137
val_loss   0.575955  0.056104



## Run 4: Cross Validation — RoI with Both Weighted
Measure performance with both RoI and Both Weighted.

In [17]:
LABEL = "cross_val_roi_weighted_both"

In [18]:
cnn = parse_fold_information(LABEL, "cnn")
efficientnet = parse_fold_information(LABEL, "efficientnet")
mobilenet = parse_fold_information(LABEL, "mobilenet")
shufflenet = parse_fold_information(LABEL, "shufflenet")

aggregate("CNN", cnn)
aggregate("EfficientNet", efficientnet)
aggregate("MobileNet", mobilenet)
aggregate("ShuffleNet", shufflenet)

CNN
               mean       std
f1_score   0.343524  0.040856
accuracy   0.355862  0.262496
precision  0.295788  0.252044
recall     0.860000  0.267499
val_loss   1.142902  0.067224

EfficientNet
               mean       std
f1_score   0.469823  0.110394
accuracy   0.586667  0.162676
precision  0.329264  0.114280
recall     0.913333  0.120902
val_loss   1.043890  0.048366

MobileNet
               mean       std
f1_score   0.315966  0.052420
accuracy   0.412299  0.299224
precision  0.329195  0.270626
recall     0.713333  0.374561
val_loss   1.133842  0.043554

ShuffleNet
               mean       std
f1_score   0.531203  0.093005
accuracy   0.701724  0.139033
precision  0.460556  0.236830
recall     0.830000  0.198420
val_loss   1.014458  0.064004

