# Environment

In [1]:
import os
import io
import json
import requests
import zipfile
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch

from collections import defaultdict
from collections import namedtuple
from glob import glob

In [None]:
# Adjust CWD
directory = os.getcwd().replace('\\notebooks', '')
os.chdir(directory)
wd = os.getcwd()

# Choose Model Results

In [None]:
logs_dir = 'baseline'
#logs_dir = 'random_se_b'
#logs_dir = 'proportional_se_b'

# Load Model Training Logs

In [None]:
# create model validation and test accuracy storage
va_dict = defaultdict(list)
ta_dict = defaultdict(list)

# iterate through model directories
for model_dir in sorted(glob(f'{logs_dir}/*')):
    
    # get model id based on model directory
    _, model_id = os.path.split(model_dir)
    
    # extract model_name and var from model_id
    model_name, var = model_id.rsplit('_', 1)
    model_name = model_name.rsplit('_', 1)[0]  # remove the extra part
    
    # append validation and test accuracy to corresponding list
    # read validation accuracy from training logs 
    train_log = pd.read_csv(f'{model_dir}/train_log.csv')
    va = train_log.valid_accuracy.tolist()
    va_dict[var].append(va)
    
    # read test accuracy from test logs
    with open(f'{model_dir}/test_logs.json') as f:
        ta = json.load(f)['accuracy']
    ta_dict[var].append(ta)

# compute accuracy mean and SE for each model
Logs = namedtuple('Logs', ['va_mean', 'va_se', 'ta_mean', 'ta_se'])
logs = {}
for var, v in va_dict.items():
    # print number of training iters for each model
    print(f'{model_name}_{var}: {len(v)}')

    # calculate the mean and standard error of valid. accuracy
    va = np.array(v)
    va_mean = np.mean(va, axis=0)
    va_se = np.std(va, axis=0) / np.sqrt(va.shape[0])
    
    # calculate the mean and standard error of test accuracy
    ta = np.array(ta_dict[var])
    ta_mean = np.mean(ta)
    ta_se = np.std(ta) / np.sqrt(len(ta))
    
    # save validation and test logs
    logs[var] = Logs(va_mean, va_se, ta_mean, ta_se)


FasterRCNN_FPN_1100_qdrl: 5
FasterRCNN_FPN_1100_square: 5
FasterRCNN_FPN_1440_qdrl: 5
FasterRCNN_FPN_1440_square: 5
FasterRCNN_FPN_800_qdrl: 5
FasterRCNN_FPN_800_square: 5
RCNN_128_qdrl: 5
RCNN_128_square: 5
RCNN_256_qdrl: 5
RCNN_256_square: 5
RCNN_64_qdrl: 5
RCNN_64_square: 5


# Compare models

In [None]:
# Initialize lists to store data
model_names = []
validation_accuracy = []
testing_accuracy = []

# Iterate over the logs
for var, log in logs.items():
    # Extract mean and standard error for validation accuracy
    va_mean = log.va_mean[-1]
    va_se = log.va_se[-1]
    
    # Extract mean and standard error for testing accuracy
    ta_mean = log.ta_mean
    ta_se = log.ta_se
    
    # Append data to lists
    model_names.append(f'{model_name}_{var}')
    validation_accuracy.append(f'{100 * va_mean:.3f} ± {100 * va_se:.3f}')
    testing_accuracy.append(f'{100 * ta_mean:.6f} ± {100 * ta_se:.6f}')

# Create DataFrame
df = pd.DataFrame({
    'Model Name': model_names,
    'Validation Accuracy %': validation_accuracy,
    'Testing Accuracy %': testing_accuracy
})

# Display DataFrame
df