In [1]:
import os
import torch
import numpy as np
import pandas as pd

CLUSTERS_TO_IGNORE = 100

results_dir = 'results'
check_point_file_name = 'check_point.pth'
arguments_file_name = 'arguments.pth'
evaluation_file_name = 'evaluation.pth'

In [2]:
def get_directories_with_results(dir_to_search):
    result_dirs = []

    for root, dirs, files in os.walk(dir_to_search):
        split_root = root.split("/")
        if len(split_root) > 1 and split_root[1].isnumeric() and int(split_root[1]) >= CLUSTERS_TO_IGNORE:
            continue
        if files and not dirs:
            result_dirs.append(root)

    return result_dirs

In [3]:
def get_all_args_and_data(dir_to_search):
    dirs_for_results = get_directories_with_results(dir_to_search)
    all_args = []
    all_data = []
    all_evaluation = []

    for dir in dirs_for_results:
        args_path = os.path.join(dir, arguments_file_name)
        check_point_path = os.path.join(dir, check_point_file_name)
        evaluation_path = os.path.join(dir, evaluation_file_name)

        args = torch.load(args_path)
        data = torch.load(check_point_path, map_location=torch.device('cpu'))
        evaluation = torch.load(evaluation_path)

        all_args.append(args)
        all_data.append(data)
        all_evaluation.append(evaluation)

    return all_args, all_data, all_evaluation

def get_results_all_data(dir_to_search):
    all_args, all_data, all_evaluation = get_all_args_and_data(dir_to_search)
    all_final_loss = []

    for data in all_data:
        final_loss = data['loss_seq'][-1]
        all_final_loss.append(final_loss)

    args_as_dicts = [vars(a) for a in all_args]

    df = pd.DataFrame(args_as_dicts)
    df['final_loss'] = all_final_loss
    df['data'] = all_data
    df['eval'] = all_evaluation

    return df

def get_results(dir_to_search, clusters=True):
    results = get_results_all_data(dir_to_search)
    df = pd.DataFrame({
        'final_loss': results['final_loss'],
        'start': results['date_and_start_time'],
        'end': results['data'].apply(lambda d: d['save_time']),
        'top1': results['eval'].apply(lambda d: d['top1']),
        'top5': results['eval'].apply(lambda d: d['top5']),
        'eval_time': results['eval'].apply(lambda d: d['eval_time'].total_seconds()),
    })
    
    if clusters:
        df['num_clusters'] =  results['num_clusters']
    return df

In [4]:
improved_df = get_results(results_dir)
improved_df

Unnamed: 0,final_loss,start,end,top1,top5,eval_time,num_clusters
0,0.116424,2024-04-19 01:19:38.827201,2024-04-19 03:59:01.071579,88.5,99.5,4.789872,25
1,0.175164,2024-04-19 01:19:38.827751,2024-04-19 03:57:00.291512,88.49,99.64,4.744316,25
2,0.329222,2024-04-19 01:19:38.827027,2024-04-19 03:57:29.099972,87.71,99.46,4.754891,25
3,0.215096,2024-04-19 01:19:38.829412,2024-04-19 04:00:31.731990,88.81,99.66,4.89532,25
4,0.295373,2024-04-19 01:19:38.830013,2024-04-19 03:57:04.462890,88.46,99.48,4.547283,25
5,0.188847,2024-04-19 01:19:47.554458,2024-04-19 03:19:29.950911,88.34,99.52,5.303724,5
6,0.303293,2024-04-19 01:19:47.553106,2024-04-19 03:19:31.172892,88.71,99.53,5.060603,5
7,0.118089,2024-04-19 01:19:47.552949,2024-04-19 03:18:03.808145,89.03,99.55,4.508155,5
8,0.317891,2024-04-19 01:19:47.555241,2024-04-19 03:20:18.185268,88.76,99.61,4.585092,5
9,0.139408,2024-04-19 01:19:47.553195,2024-04-19 03:20:52.615317,88.53,99.45,4.741202,5


In [61]:
improved_copy = improved_df.copy()[['top1', 'top5', 'eval_time', 'num_clusters']]
improved_copy = improved_copy.rename(columns={'top1': 'Top1 Accuracy', 'top5': 'Top5 Accuracy', 'eval_time': 'Inference Time (Seconds)', 'num_clusters': 'Number Clusters'})
improved_copy = improved_copy.sort_values('Number Clusters')
improved_copy['Trial'] = [str(i) for i in range(1, 31)]
improved_copy  = improved_copy.set_index('Trial')
print(improved_copy.to_latex())
# improved_copy

\begin{tabular}{lrrrr}
\toprule
{} &  Top1 Accuracy &  Top5 Accuracy &  Inference Time (Seconds) &  Number Clusters \\
Trial &                &                &                           &                  \\
\midrule
1     &          88.53 &          99.45 &                  4.741202 &                5 \\
2     &          88.34 &          99.52 &                  5.303724 &                5 \\
3     &          88.71 &          99.53 &                  5.060603 &                5 \\
4     &          89.03 &          99.55 &                  4.508155 &                5 \\
5     &          88.76 &          99.61 &                  4.585092 &                5 \\
6     &          88.32 &          99.51 &                  4.981002 &               10 \\
7     &          88.05 &          99.57 &                  4.942312 &               10 \\
8     &          88.39 &          99.49 &                  4.896819 &               10 \\
9     &          89.12 &          99.65 &                  4.5

In [5]:
display(improved_df.groupby(['num_clusters'])['eval_time'].describe())
display(improved_df.groupby(['num_clusters'])['top1'].describe())
display(improved_df.groupby(['num_clusters'])['top5'].describe())

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
num_clusters,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
5,5.0,4.839755,0.334875,4.508155,4.585092,4.741202,5.060603,5.303724
10,5.0,4.848384,0.161025,4.574597,4.847188,4.896819,4.942312,4.981002
15,5.0,4.87464,0.30695,4.712664,4.731148,4.752984,4.753515,5.422887
20,5.0,4.968191,0.424828,4.616431,4.622275,4.96899,4.973336,5.659925
25,5.0,4.746336,0.126284,4.547283,4.744316,4.754891,4.789872,4.89532
30,5.0,5.146306,0.877683,4.540003,4.759562,4.845712,4.888407,6.697844


Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
num_clusters,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
5,5.0,88.674,0.258708,88.34,88.53,88.71,88.76,89.03
10,5.0,88.48,0.3968,88.05,88.32,88.39,88.52,89.12
15,5.0,88.76,0.19799,88.55,88.58,88.75,88.95,88.97
20,5.0,88.63,0.597118,87.98,88.07,88.67,89.17,89.26
25,5.0,88.394,0.407958,87.71,88.46,88.49,88.5,88.81
30,5.0,88.684,0.418127,87.96,88.7,88.87,88.9,88.99


Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
num_clusters,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
5,5.0,99.532,0.057619,99.45,99.52,99.53,99.55,99.61
10,5.0,99.55,0.063246,99.49,99.51,99.53,99.57,99.65
15,5.0,99.538,0.09471,99.39,99.51,99.57,99.58,99.64
20,5.0,99.516,0.069498,99.44,99.45,99.53,99.56,99.6
25,5.0,99.548,0.094446,99.46,99.48,99.5,99.64,99.66
30,5.0,99.516,0.084439,99.4,99.46,99.55,99.56,99.61


In [43]:
original_results = 'code_original/results'
original_df = get_results(original_results, clusters=False)

original_df[['top1', 'top5', 'eval_time']]

Unnamed: 0,top1,top5,eval_time
0,89.24,99.41,19.140482
1,89.44,99.48,19.794001
2,90.12,99.58,19.055294
3,88.27,99.39,18.983667
4,89.44,99.53,19.675458


In [49]:
original_copy = original_df.copy()[['top1', 'top5', 'eval_time']]
original_copy = original_copy.rename(columns={'top1': 'Top1 Accuracy', 'top5': 'Top5 Accuracy', 'eval_time': 'Inference Time (Seconds)'})
print(original_copy.to_latex())

\begin{tabular}{lrrr}
\toprule
{} &  Top1 Accuracy &  Top5 Accuracy &  Inference Time (Seconds) \\
\midrule
0 &          89.24 &          99.41 &                 19.140482 \\
1 &          89.44 &          99.48 &                 19.794001 \\
2 &          90.12 &          99.58 &                 19.055294 \\
3 &          88.27 &          99.39 &                 18.983667 \\
4 &          89.44 &          99.53 &                 19.675458 \\
\bottomrule
\end{tabular}



In [7]:
display(original_df['eval_time'].describe())
display(original_df['top1'].describe())
display(original_df['top5'].describe())

count     5.000000
mean     19.329780
std       0.376153
min      18.983667
25%      19.055294
50%      19.140482
75%      19.675458
max      19.794001
Name: eval_time, dtype: float64

count     5.000000
mean     89.302000
std       0.666348
min      88.270000
25%      89.240000
50%      89.440000
75%      89.440000
max      90.120000
Name: top1, dtype: float64

count     5.000000
mean     99.478000
std       0.079812
min      99.390000
25%      99.410000
50%      99.480000
75%      99.530000
max      99.580000
Name: top5, dtype: float64

# Eval Times Kruskal Wallis and Posthoc Dunn Test

In [41]:
from scipy import stats

cluster_groups = improved_df.groupby(['num_clusters'])
display(cluster_groups['eval_time'].apply(list))
eval_times = cluster_groups['eval_time'].apply(list).tolist()
eval_times.append(original_df['eval_time'].tolist())
eval_times

num_clusters
5     [5.303724, 5.060603, 4.508155, 4.585092, 4.741...
10    [4.896819, 4.942312, 4.981002, 4.574597, 4.847...
15    [4.731148, 5.422887, 4.752984, 4.712664, 4.753...
20    [4.96899, 4.622275, 4.616431, 4.973336, 5.659925]
25    [4.789872, 4.744316, 4.754891, 4.89532, 4.547283]
30    [4.759562, 4.845712, 4.888407, 4.540003, 6.697...
Name: eval_time, dtype: object

[[5.303724, 5.060603, 4.508155, 4.585092, 4.741202],
 [4.896819, 4.942312, 4.981002, 4.574597, 4.847188],
 [4.731148, 5.422887, 4.752984, 4.712664, 4.753515],
 [4.96899, 4.622275, 4.616431, 4.973336, 5.659925],
 [4.789872, 4.744316, 4.754891, 4.89532, 4.547283],
 [4.759562, 4.845712, 4.888407, 4.540003, 6.697844],
 [19.140482, 19.794001, 19.055294, 18.983667, 19.675458]]

In [27]:
stats.kruskal(*eval_times)

KruskalResult(statistic=13.699047619047633, pvalue=0.03318472032432296)

In [62]:
import scikit_posthocs as sp

p_values = sp.posthoc_dunn(eval_times)
print(p_values < 0.05)
print(p_values)

       1      2      3      4      5      6      7
1  False  False  False  False  False  False   True
2  False  False  False  False  False  False   True
3  False  False  False  False  False  False   True
4  False  False  False  False  False  False   True
5  False  False  False  False  False  False   True
6  False  False  False  False  False  False   True
7   True   True   True   True   True   True  False
          1         2         3         4         5         6         7
1  1.000000  0.516937  0.975381  0.537094  0.877371  0.665706  0.003050
2  0.516937  1.000000  0.537094  0.975381  0.422335  0.828969  0.020638
3  0.975381  0.537094  1.000000  0.557639  0.853100  0.688281  0.003370
4  0.537094  0.975381  0.557639  1.000000  0.440401  0.853100  0.019006
5  0.877371  0.422335  0.853100  0.440401  1.000000  0.557639  0.001827
6  0.665706  0.828969  0.688281  0.853100  0.557639  1.000000  0.011388
7  0.003050  0.020638  0.003370  0.019006  0.001827  0.011388  1.000000


# Top1 Kruskal Wallis

In [63]:
top1 = cluster_groups['top1'].apply(list).tolist()
top1.append(original_df['top1'].tolist())
top1

[[88.34, 88.71, 89.03, 88.76, 88.53],
 [88.39, 88.05, 88.32, 89.12, 88.52],
 [88.97, 88.55, 88.95, 88.75, 88.58],
 [89.26, 88.07, 88.67, 89.17, 87.98],
 [88.5, 88.49, 87.71, 88.81, 88.46],
 [88.99, 88.7, 88.9, 87.96, 88.87],
 [89.24, 89.44, 90.12, 88.27, 89.44]]

In [64]:
stats.kruskal(*top1)

KruskalResult(statistic=8.59167950693375, pvalue=0.19787713859203498)

# Top5 Kruskal Wallis

In [65]:
top5 = cluster_groups['top5'].apply(list).tolist()
top5.append(original_df['top5'].tolist())
top5

[[99.52, 99.53, 99.55, 99.61, 99.45],
 [99.49, 99.57, 99.51, 99.65, 99.53],
 [99.57, 99.39, 99.58, 99.51, 99.64],
 [99.44, 99.45, 99.56, 99.53, 99.6],
 [99.5, 99.64, 99.46, 99.66, 99.48],
 [99.56, 99.46, 99.61, 99.4, 99.55],
 [99.41, 99.48, 99.58, 99.39, 99.53]]

In [40]:
stats.kruskal(*top5)

KruskalResult(statistic=2.466301446832425, pvalue=0.8722198428395025)