In [None]:
%matplotlib inline

import os

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from autorank import autorank, plot_stats, create_report, latex_table

PROJECTS = ['ant-ivy', 'archiva', 'calcite', 'cayenne', 'commons-bcel', 'commons-beanutils',
            'commons-codec', 'commons-collections', 'commons-compress', 'commons-configuration',
            'commons-dbcp', 'commons-digester', 'commons-io', 'commons-jcs', 'commons-jexl',
            'commons-lang', 'commons-math', 'commons-net', 'commons-scxml', 
            'commons-validator', 'commons-vfs', 'deltaspike', 'eagle', 'giraph', 'gora', 'jspwiki',
            'knox', 'kylin', 'lens', 'mahout', 'manifoldcf','nutch','opennlp','parquet-mr',
            'santuario-java', 'systemml', 'tika', 'wss4j']

DATA_PATH = '../data/'
FIGURES_PATH = '../figures/'

RANDOM_SEED = 42
np.random.seed(RANDOM_SEED)

# Autorank train/test split

In [None]:
r = pd.read_csv('{}/train_test_all.csv'.format(DATA_PATH))
fig, axs = plt.subplots(2, 4, figsize=(18,4))

axx = 0
axy = 0
for lbl in ['adhoc_label', 'bug_label']:
    axy = 0
    for pm in ['f1', 'roc_auc', 'ub', 'lb']:
        populations = pd.DataFrame()
        for ms in ['jit', 'static', 'pmd', 'jit_static_pmd']:
            name = ms
            if name == 'jit_static_pmd':
                name = 'combined'
            populations[name] = np.concatenate((r[(r['label'] == lbl) & (r['metric_set'] == ms)]['rf_{}'.format(pm)].values, r[(r['label'] == lbl) & (r['metric_set'] == ms)]['lr_{}'.format(pm)].values))

        order = 'descending'
        if pm == 'lb':
            order = 'ascending'

        result = autorank(populations, alpha=0.003125, verbose=False, order=order)
        plot_stats(result, ax=axs[axx, axy])
        
        if lbl == 'adhoc_label':
            lbl_name = 'ad-hoc SZZ'
        if lbl == 'bug_label':
            lbl_name = 'ITS SZZ'
        if pm == 'f1':
            pm_name = 'F-measure'
        if pm == 'roc_auc':
            pm_name = 'AUC'
        if pm == 'ub':
            pm_name = 'Upper bound'
        if pm == 'lb':
            pm_name = 'Lower bound'
        
        latex_table(result, label='tbl:stat_results_tt_{}_{}'.format(pm, lbl_name))
        axs[axx, axy].set_title('{}, {}'.format(pm_name, lbl_name))
        axy += 1
    axx += 1
# plt.show()
plt.tight_layout()
plt.savefig(FIGURES_PATH + '/ar_tt.pdf')

# Autorank interval

In [None]:
# load interval data data
scores = []
for project_name in PROJECTS:
    fname = '{}/interval_mean_{}.csv'.format(DATA_PATH, project_name)
    if os.path.exists(fname):
        scores.append(pd.read_csv(fname))

r = pd.concat(scores)

fig, axs = plt.subplots(2, 4, figsize=(18,4))

axx = 0
axy = 0
for lbl in ['label_adhoc', 'label_bug']:
    axy = 0
    for pm in ['f1', 'roc_auc', 'ub', 'lb']:
        populations = pd.DataFrame()
        for ms in ['jit', 'static', 'pmd', 'jit_static_pmd']:
            name = ms
            if name == 'jit_static_pmd':
                name = 'combined'
            populations[name] = np.concatenate((r[(r['label'] == lbl) & (r['metric_set'] == ms)]['rf_{}'.format(pm)].values, r[(r['label'] == lbl) & (r['metric_set'] == ms)]['lr_{}'.format(pm)].values))

        order = 'descending'
        if pm == 'lb':
            order = 'ascending'

        result = autorank(populations, alpha=0.003125, verbose=False, order=order)
        plot_stats(result, ax=axs[axx, axy])
        

        if lbl == 'label_adhoc':
            lbl_name = 'ad-hoc SZZ'
        if lbl == 'label_bug':
            lbl_name = 'ITS SZZ'
        if pm == 'f1':
            pm_name = 'F-measure'
        if pm == 'roc_auc':
            pm_name = 'AUC'
        if pm == 'ub':
            pm_name = 'Upper bound'
        if pm == 'lb':
            pm_name = 'Lower bound'
        
        latex_table(result, label='tbl:stat_results_interval_{}_{}'.format(pm, lbl_name))
        axs[axx, axy].set_title('{}, {}'.format(pm_name, lbl_name))
        axy += 1
    axx += 1

plt.tight_layout()
plt.savefig(FIGURES_PATH + '/ar_interval.pdf')
# plt.show()