# Analysis of Model Selection measures

In [None]:
import glob
import os
import pandas as pd
import json
import numpy as np
from skimage.segmentation import disk_level_set

print(os.getcwd())
paths = glob.glob('../outputs/*/*/UMS*dict_output.json')

In [None]:
import os
import json
from collections import defaultdict


def get_measure_name(output_filename):
    return os.path.basename(output_filename).split('_dict_output')[0]



path_dict = defaultdict(list)

for p in paths:
    base_path, result_file = os.path.split(p)
    path_dict[base_path].append(result_file)

result_fields = {'UMS': ['gt/bbox/AP50', 'umsdas/iou', 'umsdas/giou', 'umsdas/ioukl', 'umsdas/ioukl_iou', 'umsdas/ioukl_kl', 
                         'umsdropout/iou', 'umsdropout/giou', 'umsdropout/ioukl', 'umsdropout/ioukl_iou', 'umsdropout/ioukl_kl'],
                # 'BoS': ['BoS'],
                #'DAS': ['DAS', 'PDR_normalized', 'FIS_normalized', 'FIS', 'PDR'],
                 }
rows = []

for base_path, result_files in path_dict.items():
    split_path = base_path.split('/')
    domain = split_path[-2]
    method = split_path[-1]
    datas = {}
    for result_file in result_files:
        with open(os.path.join(base_path, result_file), 'r') as file:
            data = json.load(file)
        measure = get_measure_name(result_file)
        if measure not in ['DAS']:
            data = {os.path.basename(k): results for k, results in data.items()}
        datas[measure] = data
    models = datas['UMS'].keys()
    for m in models:
        iterations = int(m[-11:-4])
        row = [domain,method, m, iterations]
        for measure, fields in result_fields.items():
            model_measure_data = datas[measure].get(m, None)
            if model_measure_data is not None:
                datasets = [k for k in datas[measure].get(m, None).keys() if '_val' in k or 'test' in k]
                if len(datasets)!= 1:
                    print(f"Can't get val dataset: {row} {measure} {m} {datasets}")
                results = datas[measure][m][datasets[0]]
            for f in fields:
                if model_measure_data is not None:
                    keys = f.split("/")
                    result = results
                    for k in keys:
                        result = result[k]
                    if type(result) is list:
                        result = result[0]
                    elif type(result) is dict:
                        result = result['1'][0] # FIS storage
                    row.append(result)
                else:
                    row.append(np.NaN)
        rows.append(row)
headings = ['domain', 'method', 'model', 'iters']
for measure, fields in result_fields.items():
    for f in fields:
        h = "AP50" if "AP50" in f else f"{measure}_{f}"
        headings.append(h)

df = pd.DataFrame(data=rows, columns=headings)

#df['UMS_sum'] = df['UMS_entropy'] + df['UMS_info_max_reg']
#df['UMS_diff'] = - df['UMS_entropy'] + df['UMS_info_max_reg']
#df['UMS_comb'] =df['UMS_loss_box_giou'] - df['UMS_loss_score_logits']


In [None]:
def y_axis_title(column):
    split = column.split("/")
    return f"{split[-1]} ({'dropout' if split[0] == 'UMS_umsdropout' else 'DAS'})"
    

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

y_axis_lookup = {}

vals = sorted([h for h in headings[4:] if 'AP50' not in h]) 

import matplotlib.pyplot as plt
import pandas as pd

methods = sorted(list(set(df['method'])))
methods = [m for m in methods if 'strongaug' in m] + [m for m in methods if 'aldi' in m.lower()]
fig, axes = plt.subplots(len(methods), len(vals), figsize=(len(vals)*3, len(methods)*3))

# Create subplots
for j, method in enumerate(methods):
    sub_df = df[df['method']==method]#[df['iters'] > 2999]

    # Plot each y variable against the common x variable
    for i, val in enumerate(vals):
        ax = axes[j, i] if len(methods)>1 else axes[i]
        ax.plot(sub_df['iters'], sub_df[val], color='green')
        ax.set_ylabel(y_axis_title(val), color='green')
        ax.set_xlabel('iters')
        ax.grid(True)


        # Create a second y-axis sharing the same x-axis
        ax2 = ax.twinx()
        ax2.plot(sub_df['iters'], sub_df['AP50'], 'b-')
        ax2.set_ylabel('AP50', color='b')
        if i == 0:
            ax.set_title(f"{method}")
    # Set common x label
    #axes[-1].set_xlabel('iters')

    # Show plot
plt.tight_layout()
plt.show()



## Summary heatmap

In [None]:
# List of unique methods

comparison = 'AP50'
# Measures to check
rows = []
for m in methods:
    row = [m]
    sub_df = df[df['method']==m]
    max_ap = sub_df['AP50'].idxmax()
    max = sub_df[comparison][max_ap]
    row.append(sub_df[comparison][max_ap])
    row.append(0.0)
    min_ap = sub_df['AP50'].idxmin()
    row.append((sub_df[comparison][min_ap] - max)/max*100)
    last_ap = sub_df['iters'].idxmax()
    row.append((sub_df[comparison][last_ap] - max)/max*100)
    for v in vals:
        max_id = sub_df[v].idxmax()
        row.append((sub_df[comparison][max_id] - max)/max*100)
    rows.append(row)

headings = ['method', 'max_AP50','max_AP50_diff', 'min_AP50', 'last_AP50'] + [y_axis_title(v) for v in vals]

summary_df = pd.DataFrame(data=rows, columns=headings, index=methods)



In [None]:
import seaborn as sns
import matplotlib.pyplot as plt


# Create a heatmap with column headers and a colorbar
plt.figure(figsize=(len(vals), len(methods)))
sns.heatmap(summary_df.iloc[:, 2:], annot=True,  cbar=True)
plt.title('Percentage difference in AP50 between selected model and maximum')
plt.show()