# Evaluation for Extrapolating the Learning Curves

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd

%matplotlib inline

results_file = os.path.expanduser('~/projects/lccv/lccv/data/extrapolation.csv')
df = pd.read_csv(results_file)

classifiers = df['classifier'].unique()

In [None]:
for classifier in classifiers:
    result_clf = df.loc[df['classifier'] == classifier]
    
    # for regression acc plot
    perf = np.abs(result_clf['performance_next_point'] - result_clf['performance_prediction']).values
    count, bins_count = np.histogram(perf, bins=1000)
    pdf = count / sum(count)
    cdf = np.cumsum(pdf)
    
    # for curve plot
    curve_plot_x = np.arange(0, 1, 0.01)
    curve_plot_y = []
    for i in curve_plot_x:
        cur_val = ((result_clf['performance_next_point'] < i) & (result_clf['performance_prediction'].values < i)) | ((result_clf['performance_next_point'].values >= i) & (result_clf['performance_prediction'].values >= i))
        curve_plot_y.append(sum(cur_val) / len(cur_val)) 
    
    fig, (ax1, ax2, ax3)  = plt.subplots(1, 3, figsize=(16,4))
    fig.suptitle(classifier)
    ax1.scatter(result_clf['delta_current_actual'], result_clf['delta_current_prediction'])
    ax2.plot(bins_count[1:], cdf)
    ax3.plot(curve_plot_x, curve_plot_y)
    
    plt.show()
    print('results on %d tasks:' % len(result_clf['task_id']), result_clf['task_id'].values)