# Análise dos resultados (V2)

In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import ipywidgets as w
import colorcet as cc

Definições uteis para outras situações:

In [14]:
%load_ext autoreload
%autoreload 2
from utils import RESULTS_V2_PATH, DATASET_LIST, ABREV_DICT
import utils

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Coletando resultados

In [15]:
csv_files = !ls  $RESULTS_V2_PATH

In [34]:
def get_results_info(file_list: list) -> pd.DataFrame:
    
    results_info = pd.DataFrame(file_list, columns=["file"])
    results_info[["dataset", "split", "learner", "method"]] = results_info.file.str.split("_", n=3, expand=True)
    results_info['file'] = results_info.file.map(lambda x: os.path.join(RESULTS_V2_PATH, x))
    results_info['method'] = results_info.method.apply(lambda x: x.split('.')[0])
    
    return results_info

results_info = get_results_info(csv_files)
results_info.head()

Unnamed: 0,file,dataset,split,learner,method
0,../results/v2/abalone-3class_1x5_5NN_borderlin...,abalone-3class,1x5,5NN,borderline_points_sampling
1,../results/v2/abalone-3class_1x5_5NN_class_bal...,abalone-3class,1x5,5NN,class_balance_sampling
2,../results/v2/abalone-3class_1x5_5NN_class_lik...,abalone-3class,1x5,5NN,class_likelihood_sampling
3,../results/v2/abalone-3class_1x5_5NN_class_lik...,abalone-3class,1x5,5NN,class_likeliood_diff_sampling
4,../results/v2/abalone-3class_1x5_5NN_density_w...,abalone-3class,1x5,5NN,density_weighted_sampling


## Análise de curvas de aprendizado

In [35]:
n_methods = results_info.method.nunique()
COLOR_DICT = dict(zip(results_info.method.unique().tolist(), cc.glasbey_dark[:n_methods]))

In [36]:
def plot_learning_curve(dataset, learner):
    
    fig, ax = plt.subplots(figsize=(10, 8))
    ax.grid(True)
    ax.set_ylim(-1,1)
    ax.set_title(f"Average learning curves for AL methods")
    
    
    result_files = results_info[(results_info.learner == learner) & (results_info.dataset == dataset)].file.tolist()

    results_df = pd.concat(pd.read_csv(f) for f in result_files)

    auc_dict = {}
    for method, data in results_df.groupby("method"):
        avg_scores =  data.groupby("query").kappa.mean()
        auc = (np.trapz(avg_scores, avg_scores.index))
        auc_dict[method] = auc

    ranked_method_list = sorted(auc_dict.items(), key=lambda x: x[1],reverse=True)

    i=0
    for method, _ in ranked_method_list:
        avg_curve = results_df[results_df.method == method].groupby("query").kappa.mean()
        ax.plot(avg_curve, utils.get_style(method), linewidth=0.5, color=COLOR_DICT[method], label=ABREV_DICT[method],
            markevery=(0.3, 0.2))
        i+=1
        
    ax.legend(loc='upper right', bbox_to_anchor=(1.2, 1.02), ncols=1, prop={'size': 12}, framealpha=1)

w.interact(plot_learning_curve,
          dataset=w.SelectionSlider(options=utils.DATASET_LIST),
          learner=w.ToggleButtons(options=utils.ABREV_MODEL.keys(), value="SVC"))

interactive(children=(SelectionSlider(description='dataset', options=('abalone-3class', 'artificial-characters…

<function __main__.plot_learning_curve(dataset, learner)>

## Curvas de Ranking

In [37]:
def plot_ranking_curves(learner):
    df = pd.concat(pd.read_csv(f) for f in  results_info[(results_info.learner == learner)].file)

    print(df.shape)
    avg_ranks_df = df.groupby(["method", "query", "dataset"]).apply(lambda x: x.kappa.mean())

    return avg_ranks_df

    avg_ranks_df['rank'] = avg_ranks_df.groupby("query").kappa.rank(ascending=False)
    return avg_ranks_df

result = plot_ranking_curves("SVC")
result
# result[result['query'] == 1].sort_values(by="rank")

(1042724, 8)


method                        query  dataset                            
borderline_points_sampling    0      abalone-3class                         0.159848
                                     artificial-characters                  0.127924
                                     autoUniv-au1-1000                     -0.000566
                                     autoUniv-au6-cd1-400                   0.012923
                                     autoUniv-au7-300-drift-au7-cpd1-800    0.035120
                                                                              ...   
tree_depth_unpruned_sampling  100    wilt                                   0.330746
                                     wine                                   0.957785
                                     wine-quality-red                       0.268648
                                     wine-quality-white-5class              0.127812
                                     yeast-4class                           0