In [1]:
import os
import re
import pickle
import numpy as np
import pandas as pd
from itertools import combinations, product
from operator import itemgetter
from humanize import ordinal
from sklearn.cluster import KMeans
from scipy.spatial.distance import euclidean

In [2]:
pd.set_option('max_rows', 100)
pd.set_option('max_columns', 100)

In [3]:
RESULTS_PATH = 'data/results'
FEATURES_PATH = 'data/features'

In [4]:
results_dfs = {}
for filename in os.listdir(RESULTS_PATH):
    archtecture = re.search(r'(?<=results_).*(?=.csv)', filename).group()
    results_dfs[archtecture] = pd.read_csv(os.path.join(RESULTS_PATH, filename))

In [5]:
metabase_df = pd.concat(results_dfs.values(), ignore_index=True)

In [6]:
metabase_df.head()

Unnamed: 0,Architecture,Classes,Accuracy_vector,Precision_vector,Recall_vector,F1_vector,Accuracy_avg,Precision_Avg,Recall_Avg,F1_avg
0,ResNet10,airplane-automobile,"[0.942, 0.939, 0.93, 0.9275, 0.916, 0.9315, 0....","[0.9203510204081633, 0.9051657032755298, 0.893...","[0.932, 0.958, 0.948, 0.939, 0.954, 0.938, 0.8...","[0.9414141414141414, 0.9401373895976447, 0.931...",0.92505,0.892683,0.9274,0.925149
1,ResNet10,airplane-bird,"[0.89, 0.889, 0.8935, 0.882, 0.875, 0.8895, 0....","[0.8614113785557986, 0.8492249488752557, 0.852...","[0.847, 0.878, 0.89, 0.886, 0.888, 0.89, 0.887...","[0.8850574712643677, 0.8877654196157735, 0.893...",0.8848,0.843588,0.8753,0.883662
2,ResNet10,airplane-cat,"[0.9245, 0.9325, 0.935, 0.926, 0.932, 0.925, 0...","[0.8897871921182265, 0.8980344995140912, 0.905...","[0.932, 0.947, 0.937, 0.938, 0.961, 0.909, 0.8...","[0.9250620347394541, 0.933464760965993, 0.9351...",0.9258,0.893557,0.9279,0.925876
3,ResNet10,airplane-deer,"[0.9475, 0.9125, 0.943, 0.93, 0.917, 0.936, 0....","[0.9234072781655036, 0.8553790534618757, 0.921...","[0.949, 0.983, 0.933, 0.98, 0.935, 0.928, 0.95...","[0.947578632051922, 0.9182624941616068, 0.9424...",0.9332,0.900529,0.946,0.934192
4,ResNet10,airplane-dog,"[0.948, 0.9515, 0.936, 0.947, 0.9345, 0.9475, ...","[0.924704, 0.9361238390092881, 0.9224043010752...","[0.948, 0.936, 0.901, 0.95, 0.914, 0.93, 0.955...","[0.948, 0.9507364144235654, 0.9336787564766839...",0.9447,0.923633,0.9363,0.944177


In [7]:
features = {}
for filename in os.listdir(FEATURES_PATH):
    class_ = re.search(r'\w+(?=.data)', filename).group()
    with open(os.path.join(FEATURES_PATH, filename), 'rb') as f:
        features[class_] = pickle.load(f)

In [8]:
centroids = {}
for classes in metabase_df['Classes'].unique():
    class_1, class_2 = classes.split('-')
    X = np.concatenate((features[class_1], features[class_2]), axis=0)
    kmeans = KMeans(n_clusters=2, random_state=0)
    kmeans.fit(X)
    centroids[classes] = kmeans.cluster_centers_

In [9]:
distances = {}
for tasks in combinations(metabase_df['Classes'].unique(), 2):
    task_1, task_2 = tasks
    distances[tasks] = min(
        np.mean([
            euclidean(centroids[task_1][0], centroids[task_2][0]),
            euclidean(centroids[task_1][1], centroids[task_2][1])
        ]),
        np.mean([
            euclidean(centroids[task_1][0], centroids[task_2][1]),
            euclidean(centroids[task_1][1], centroids[task_2][0])
        ])
    )

In [10]:
similar_tasks_by_tasks = {}
for task in metabase_df['Classes'].unique():
    hypotheses = []
    for tasks in distances.keys():
        if task in tasks:
            hypothese = tasks[1] if task == tasks[0] else tasks[0]
            hypotheses.append((hypothese, distances[tasks]))
    similar_tasks_by_tasks[task] = sorted(hypotheses, key=itemgetter(1))

In [11]:
ranking_by_tasks = {}
for task in metabase_df['Classes'].unique():
    ranking_by_tasks[task] = metabase_df[
        metabase_df['Classes'] == task
    ].sort_values('Accuracy_avg', ascending=False)[[
        'Architecture',
        'Accuracy_avg',
        'Precision_Avg',
        'Recall_Avg',
        'F1_avg'
    ]].to_dict(orient='records')

In [12]:
statistics_by_tasks = {}
for task in metabase_df['Classes'].unique():
    similar_task, centroids_distance = similar_tasks_by_tasks[task][0]
    ranking_architectures = list(
        map(itemgetter('Architecture'), ranking_by_tasks[task])
    )
    recommended_architecture_rank = ranking_architectures.index(
        ranking_by_tasks[similar_task][0]['Architecture']
    )
    statistics_by_tasks[task] = {
        'similar_task': similar_task,
        'centroids_distance': centroids_distance,
        'ranking_architectures': ranking_architectures,
        'recommended_architecture_rank': recommended_architecture_rank,
        'amount_of_architectures': len(ranking_architectures),
        'best_architecture': ranking_by_tasks[task][0],
        'worst_architecture': ranking_by_tasks[task][-1],
        'recommended_architecture': ranking_by_tasks[task][
            recommended_architecture_rank
        ]
    }

In [13]:
recommendations_df = pd.DataFrame(
    metabase_df['Classes'].unique(),
    columns=['Task']
)

In [14]:
recommendations_df['Similar_task'] = [
    statistics_by_tasks[task]['similar_task']
    for task in recommendations_df['Task']
]

In [15]:
recommendations_df['Centroids_distance'] = [
    statistics_by_tasks[task]['centroids_distance']
    for task in recommendations_df['Task']
]

In [16]:
recommendations_df['Amount_of_architectures'] = [
    statistics_by_tasks[task]['amount_of_architectures']
    for task in recommendations_df['Task']
]

In [17]:
recommendations_df['Ranking_architectures'] = [
    ', '.join(statistics_by_tasks[task]['ranking_architectures'])
    for task in recommendations_df['Task']
]

In [18]:
recommendations_df['Recommended_architecture_rank'] = [
    ordinal(statistics_by_tasks[task]['recommended_architecture_rank'] + 1)
    for task in recommendations_df['Task']
]

In [19]:
params = [
    'Architecture',
    'Accuracy_avg',
    'Precision_Avg',
    'Recall_Avg',
    'F1_avg'
]
types = ['recommended', 'best', 'worst']
for p, t in product(params, types):
    recommendations_df[f'{p}[{t}]'] = [
        statistics_by_tasks[task][t + '_architecture'][p]
        for task in recommendations_df['Task']
    ]

In [20]:
recommendations_df

Unnamed: 0,Task,Similar_task,Centroids_distance,Amount_of_architectures,Ranking_architectures,Recommended_architecture_rank,Architecture[recommended],Architecture[best],Architecture[worst],Accuracy_avg[recommended],Accuracy_avg[best],Accuracy_avg[worst],Precision_Avg[recommended],Precision_Avg[best],Precision_Avg[worst],Recall_Avg[recommended],Recall_Avg[best],Recall_Avg[worst],F1_avg[recommended],F1_avg[best],F1_avg[worst]
0,airplane-automobile,automobile-ship,0.052087,6,"MiniVGG1, MiniVGG3, AlexNet1, MiniGoogLeNet1, ...",4th,MiniGoogLeNet1,MiniVGG1,SmallSqueezeNet,0.94405,0.9719,0.87925,0.93243,0.956602,0.85842,0.9128,0.9766,0.808,0.938552,0.972037,0.824166
1,airplane-bird,airplane-deer,0.037969,6,"MiniVGG1, MiniGoogLeNet1, AlexNet1, MiniVGG3, ...",2nd,MiniGoogLeNet1,MiniVGG1,ResNet10,0.9186,0.91925,0.8848,0.885863,0.878482,0.843588,0.9158,0.9418,0.8753,0.918248,0.921148,0.883662
2,airplane-cat,cat-ship,0.051728,6,"MiniVGG3, AlexNet1, MiniVGG1, MiniGoogLeNet1, ...",1st,MiniVGG3,MiniVGG3,SmallSqueezeNet,0.95015,0.95015,0.9046,0.920075,0.920075,0.872486,0.9716,0.9716,0.8927,0.951379,0.951379,0.900057
3,airplane-deer,airplane-bird,0.037969,6,"MiniGoogLeNet1, MiniVGG3, AlexNet1, ResNet10, ...",6th,MiniVGG1,MiniGoogLeNet1,MiniVGG1,0.89945,0.96365,0.89945,0.836805,0.945979,0.836805,0.9932,0.9657,0.9932,0.908942,0.963716,0.908942
4,airplane-dog,dog-ship,0.051966,6,"MiniGoogLeNet1, MiniVGG3, AlexNet1, MiniVGG1, ...",4th,MiniVGG1,MiniGoogLeNet1,SmallSqueezeNet,0.9483,0.96525,0.9428,0.9141,0.953657,0.911598,0.9797,0.9553,0.962,0.950101,0.96488,0.944186
5,airplane-frog,frog-ship,0.051883,6,"MiniGoogLeNet1, MiniVGG3, AlexNet1, SmallSquee...",1st,MiniGoogLeNet1,MiniGoogLeNet1,MiniVGG1,0.9656,0.9656,0.94675,0.95172,0.95172,0.906626,0.961,0.961,0.9937,0.964927,0.964927,0.949362
6,airplane-horse,horse-ship,0.049007,6,"MiniVGG3, MiniGoogLeNet1, AlexNet1, MiniVGG1, ...",1st,MiniVGG3,MiniVGG3,ResNet10,0.965,0.965,0.9348,0.942993,0.942993,0.909471,0.9795,0.9795,0.9278,0.965587,0.965587,0.934305
7,airplane-ship,airplane-bird,0.095961,6,"MiniVGG3, MiniGoogLeNet1, AlexNet1, MiniVGG1, ...",4th,MiniVGG1,MiniVGG3,SmallSqueezeNet,0.8831,0.9357,0.70285,0.818816,0.904545,0.667966,0.9645,0.9446,0.6881,0.892213,0.936251,0.669603
8,airplane-truck,ship-truck,0.050565,6,"MiniVGG3, MiniGoogLeNet1, AlexNet1, MiniVGG1, ...",1st,MiniVGG3,MiniVGG3,SmallSqueezeNet,0.94775,0.94775,0.8408,0.915364,0.915364,0.816722,0.9735,0.9735,0.94,0.949271,0.949271,0.87413
9,automobile-bird,automobile-deer,0.042926,6,"MiniGoogLeNet1, MiniVGG3, AlexNet1, MiniVGG1, ...",3rd,AlexNet1,MiniGoogLeNet1,SmallSqueezeNet,0.97445,0.97695,0.90715,0.958421,0.961333,0.890751,0.9837,0.9875,0.8509,0.97471,0.977209,0.856313


In [21]:
recommendations_df.to_csv('recommendations.csv', index=False)