In [3]:
import pickle
import os

with open(os.path.join("results", "glass", f'results_0.pickle'), 'rb') as f:
    results = pickle.load(f)

results

{'baseline_knn_time': array([0.1540075, 0.0070467, 0.0059965]),
 'baseline_knn_acc': array([0.62790698, 0.69767442, 0.58139535]),
 'baseline_time': array([0.0013885, 0.0009983, 0.0085672, 0.0037015]),
 'baseline_acc': array([0.58139535, 0.53488372, 0.6744186 , 0.72093023]),
 'smart_time': array([[[2.3594546, 0.0590264],
         [0.0597788, 0.0608105],
         [0.0633466, 0.0617818]],
 
        [[0.0539741, 0.0560436],
         [0.0561274, 0.0598776],
         [0.0579494, 0.0598743]],
 
        [[0.0562593, 0.0647282],
         [0.0617295, 0.0715498],
         [0.0676939, 0.0727452]],
 
        [[0.0645766, 0.0715691],
         [0.0708678, 0.0721596],
         [0.0748834, 0.0767115]]]),
 'smart_acc': array([[[0.60465116, 0.6744186 ],
         [0.69767442, 0.6744186 ],
         [0.69767442, 0.69767442]],
 
        [[0.60465116, 0.62790698],
         [0.6744186 , 0.69767442],
         [0.62790698, 0.65116279]],
 
        [[0.62790698, 0.69767442],
         [0.6744186 , 0.6744186 ],
    

In [7]:
import numpy as np
y_neighbors = np.array([0,1,2,0,1,2,0,1,1,1])
k=len(y_neighbors)
treshold = 0.8

unique, counts = np.unique(y_neighbors, return_counts=True)
dominant_class = unique[np.argmax(counts)]
print(unique, counts)

print(counts[np.argmax(counts)], treshold*k)

[0 1 2] [3 5 2]
5 8.0


In [1]:
from typing import Literal
from average_results import ret_avg_results
import numpy as np
from itertools import product

def generate_table_exp1(datasets=['mnist', 'covertype', 'yeast', 'skin', 'statlog'],knn_algo:Literal['brute', 'kd_tree', 'ball_tree']='brute'):
    ret = ret_avg_results(datasets=datasets, knn_algo=knn_algo)
    if ret is None:
        print("No results found")
        return

    baselines = {}
    smart_accs = {}
    for dataset in sorted(ret.keys()):
        results_dict = ret[dataset]
        clfs= results_dict["clfs"]
        ks= results_dict['ks']
        thresholds= results_dict['thresholds']
        t1_idx = thresholds.index(1.0)
        # print(clfs)
        # x_baselines = ["{:.1f}".format(xb * 100) for xb in results_dict["baseline_acc"]]
        # baselines[dataset] = x_baselines
        # f.write(f'{dataset}, ks: {ks}\n')
        # f.write("\t".join(x_baselines))
        # f.write('\n')
        
        smart= results_dict[f"smart_acc"] #shape = (n_clfs, n_k, n_t)
        smart = np.swapaxes(smart, 1, 2) # shape = (n_clfs, n_t, n_k)
        for iclf, clf in enumerate(clfs):
            baselines[f'{dataset}_{clf}'] = "{:.1f}".format(results_dict["baseline_acc"][iclf] * 100)

            curr_smart = smart[iclf,t1_idx,:] # shape = (n_k)
            k_le_10 = [idx for idx, k in enumerate(ks) if k==10]
            mean_acc_k_le_10 = np.mean(curr_smart[k_le_10])
            means = [mean_acc_k_le_10] + [np.mean(curr_smart[idx]) for idx in range(len(ks)) if ks[idx] > 10]
            means = ["{:.1f}".format(mean * 100) for mean in means]
            # print(f'{dataset}_{clf}')
            # print(means)

            smart_accs[f"{dataset}_{clf}"] = means
    with open('exp1_tab.txt','w') as f:
        # for dataset, clf in product(datasets, clfs):
        finals = {}
        for dataset in sorted(datasets):
            for clf in sorted(clfs):
                # f.write(f'{dataset}_{clf}\n')
                baseline = baselines[f'{dataset}_{clf}']
                smarts = smart_accs[f'{dataset}_{clf}']

                final = np.array(smarts + [baseline], dtype=object).reshape((2,3))
                finals[f'{dataset}_{clf}'] = final

        finals_per_ds = []
        for dataset in sorted(datasets):
            final_ds = np.hstack([finals[f'{dataset}_{clf}'] for clf in sorted(clfs)])
            finals_per_ds.append(final_ds)

        final_arr = np.vstack(finals_per_ds)
        final_arr = final_arr.tolist()
        # print(final_arr)
        f.write("\n".join(["\t".join(row) for row in final_arr]))
        # print(final_arr)
        # np.savetxt("exp1_tab.txt", final_arr, delimiter="\t")
        # print(final_arr.shape)
            # print(final_ds.shape)

            #     f.write("\t".join(means))
            #     f.write("\t")
            # f.write('\n')
                
def generate_table_exp2(datasets=['mnist', 'covertype', 'yeast', 'skin', 'statlog'],knn_algo:Literal['brute', 'kd_tree', 'ball_tree']='brute'):
    ret = ret_avg_results(datasets=datasets, knn_algo=knn_algo)
    if ret is None:
        print("No results found")
        return

    baseline_accs = {}
    baseline_times = {}
    smart_accs = {}
    smart_times = {}
    for dataset in sorted(ret.keys()):
        results_dict = ret[dataset]
        clfs= results_dict["clfs"]
        ks= results_dict['ks']
        thresholds= results_dict['thresholds']
        # t1_idx = thresholds.index(1.0)
        # t_p6_idx = thresholds.index(0.6)
        
        smart_acc= results_dict[f"smart_acc"] #shape = (n_clfs, n_k, n_t)
        smart_acc = np.swapaxes(smart_acc, 1, 2) # shape = (n_clfs, n_t, n_k)
        smart_time = results_dict[f"smart_time"] #shape = (n_clfs, n_k, n_t)
        smart_time = np.swapaxes(smart_time, 1, 2)
        
        for iclf, clf in enumerate(clfs):
            baseline_accs[f'{dataset}_{clf}'] = "{:.1f}".format(results_dict["baseline_acc"][iclf] * 100)
            baseline_times[f'{dataset}_{clf}'] = "{:.1f}".format(results_dict["baseline_time"][iclf])

            for it, t in enumerate(filter(lambda t: t in (0.6,1.0), thresholds)):
                

                curr_smart = smart_acc[iclf,it,:] # shape = (n_k)
                curr_time = smart_time[iclf,it,:] # shape = (n_k)

                best_k = min(filter(lambda k: ks.index(k) in np.argwhere(curr_smart == np.amax(curr_smart)),ks))
                best_k_idx = ks.index(best_k)
                best_k_acc = curr_smart[best_k_idx]
                best_k_acc = "{:.1f}".format(best_k_acc * 100)
                best_k_time = curr_time[best_k_idx]
                best_k_time = "{:.1f}".format(best_k_time)

                smart_accs[f"{dataset}_{clf}_{t}"] = best_k_acc
                smart_times[f"{dataset}_{clf}_{t}"] = best_k_time
                # if best_k < 10:
                    # print(f'{dataset}_{clf} t:{t} best k: {best_k} acc: {best_k_acc}')
            # print()
        # print()
    with open('exp2_tab.txt','w') as f:
        finals = {}
        for dataset in sorted(datasets):
            for clf in sorted(clfs):
                baseline_acc = baseline_accs[f'{dataset}_{clf}']
                baseline_time = baseline_times[f'{dataset}_{clf}']
                smart_accs_final = [smart_accs[f'{dataset}_{clf}_{t}'] for t in (0.6, 1)]
                smart_times_final = [smart_times[f'{dataset}_{clf}_{t}'] for t in (0.6, 1)]

                final = np.array(smart_accs_final + [baseline_acc] + smart_times_final +  [baseline_time], dtype=object).reshape((2,3))
                finals[f'{dataset}_{clf}'] = final

        finals_per_ds = []
        for dataset in sorted(datasets):
            final_ds = np.hstack([finals[f'{dataset}_{clf}'] for clf in sorted(clfs)])
            finals_per_ds.append(final_ds)

        final_arr = np.vstack(finals_per_ds)
        final_arr = final_arr.tolist()
        f.write("\n".join(["\t".join(row) for row in final_arr]))    
        

generate_table_exp1(datasets=['covertype', 'glass', 'mnist', 'skin','statlog', 'usps', 'wine', 'yeast'],)