In [11]:
from os import listdir
import pandas as pd
import pickle
import matplotlib.pyplot as plt
import numpy as np

def Plot(results,file_save,names=[], est_on=True, lbl='svm_linear', marks=True, mark_20=.8877, mark_90=.1477):
    font = {'family': 'normal',

                'size': 10}

    plt.rc('font', **font)
    paras = {'lines.linewidth': 2, 'legend.fontsize': 10, 'axes.labelsize': 10, 'legend.frameon': True,
             'figure.autolayout': False, 'figure.figsize': (16, 12)}

    plt.rcParams.update(paras)

    fig = plt.figure()
    count = 0
    count2 = -1
    for result in results:
        count2 += 1
        count += 1
        if(count==3):
            count +=2
        ax=plt.subplot(3,4,count)
        pos=result['true'][0]
        total = result['true'][1]
        colors=['red', 'orange', 'yellow', 'green', 'blue', 'indigo', 'violet']
        style = ['+', 'd', 'o', 'v', '^', 's', '.']

        i=0
        for key in result:
            if est_on==True:
                if key == 'true' or 'apfd' in key or 'supervised' != key:
                    continue
            else:
                if key == 'true'or 'apfd' in key or 'supervised_' not in key:
                    continue
            x = np.array(list(map(float,result[key]['x'])))/total
            y= np.array(list(map(float,result[key]['pos'])))/pos
            if est_on==False:
                label = result[key]['apfd']
                label = round(float(label), 2)
            else:
                label = ''
            if est_on == False:
                if 'svm_linear' in key:
                    ax.plot(x, y, color=colors[i], markersize=10, markevery=100, marker=style[i], linestyle = '-', label=key.split('_', 1)[1] + ' ' + str(label))
                else:
                    ax.plot(x, y, linewidth=3, color=colors[i], markersize=7, markevery=100, marker=style[i], linestyle='-',
                            label=key.split('_', 1)[1] + ' ' + str(label))
            else:
                legend_learner, = ax.plot(x, y, color=colors[i], linestyle='-', label=lbl)
                ax.annotate(names[count2], (.3,.01), color='k', fontsize=16, verticalalignment='bottom', horizontalalignment='left')
            if len(result[key]['est'])>1 and est_on:
                z= np.array(list(map(float,result[key]['est'])))/pos
                legend_est, = ax.plot(x, z, color=colors[i],linestyle = ':', label='estimation')
            i+=1
        if marks:
            ax.plot([.2, .2], [0, 1.0], color='g', linestyle='--', label='cost@20', linewidth=2)
            ax.plot([0, 1], [.9, .9], color='b', linestyle='--', label='recall@90', linewidth=2)

            ax.plot([.2], [mark_20] , markersize=10, marker='o', color='g')
            ax.annotate(str(mark_20), (.2,mark_20-.1), color='g', verticalalignment='bottom', horizontalalignment='left')

            ax.plot([mark_90], [.9] , markersize=10, marker='o', color='b')
            ax.annotate(str(mark_90), (mark_90,.9), color='b', verticalalignment='bottom', horizontalalignment='left')

        plt.subplots_adjust(left=.1, right=1., wspace = 0.22, hspace = 0.35)
        #ax.legend(bbox_to_anchor=(1.02, 1), loc=2, ncol=1, borderaxespad=0.)
        plt.ylabel("Recall", fontweight='bold', fontsize=14)
        plt.xlabel("Cost", fontweight='bold', fontsize=14)

    plt.figlegend([legend_learner, legend_est], ['retrieval', 'estimation'], loc=(.7,.75), fontsize=20)
    plt.savefig('../../test_figure/'+file_save+".png")
    plt.savefig('../../test_figure/'+file_save+".pdf")
    plt.close(fig)

In [12]:
def plot_HPC(input = '../../dump_90_15/'):
    files = listdir(input)
    for file in files:
        with open("../../dump_90_15/"+file,"rb") as handle:
            result = pickle.load(handle)
        Plot(result,'.'.join(file.split('.')[:-1]), est_on=True)

In [13]:
def APFD_form_results(x, step_size=10):
    n = x['true'][0]
    m = x['true'][1]
    apfd = 0
    c = [key for key in x if 'supervised' in key]
    old_step = 0
    old_found = 0
    all_apfds = {}
    for key in c:
        for i in x[key]['pos'][1:]:
            new_step = old_step + step_size

            new_found = i - old_found
            old_found = i
            apfd += (new_found * ((new_step + old_step) / 2))
            old_step = new_step
        apfd = 1 - float(apfd) / n / m + 1 / (2 * n)
        all_apfds.setdefault(key, apfd)
    return all_apfds


def combine_n_runs_for_median_only(path='../../dump_90_7/', dest_path='../../dump_90_7/', n=5):
    files = listdir(path)

    for file in files:
        if 'ipynb' in file:
            continue
        result = {}
        est = {}
        thres = {}
        with open(path + file, 'rb') as handle:
            res = pickle.load(handle)
            for i in range(n):
                result[i] = res['supervised' + str(i)]['pos']
                est[i] = res['supervised' + str(i)]['est']
                #thres[i] = res['supervised' + str(i)]['thres']

            df = pd.DataFrame(result)
            df_est = pd.DataFrame(est)

            res['supervised'] = res['supervised0'].copy()
            res['supervised']['pos'] = df.T.describe().T['50%'].tolist()

            res['supervised']['est'] = df_est.T.describe().T['50%'].tolist()

            a = APFD_form_results(res, step_size=10)
            res['apfds'] = a
        with open(dest_path + '.'.join(file.split('.')[:-1]) + '.pkl', "wb") as handle:
            pickle.dump(res, handle)
    

In [14]:
#combine_n_runs_for_median_only()

In [15]:
files = listdir('../../dump_90')
results = []
for file in files:
    with open("../../dump_90/"+file,"rb") as handle:
        results.append(pickle.load(handle)) 
names = ['   ant   ', ' jmeter  ', ' argouml ', ' columba ', '   emf   ', 'hibernate', '  jedit  ', 'jfreechart', '  jruby  ', '  sql12  ']
Plot(results,'test', names=names, est_on=True, marks=False)


  (prop.get_family(), self.defaultFamily[fontext]))
