In [1]:
import pandas as pd
from dataloader import *
from classifier import *

In [2]:
log = True

In [3]:
vers_files = {
    p: [data_files[p]] + [
        "../data_files/cvp/%s_%i.csv" % (p.lower(), i)
        for i in range(1,5)
    ]
    for p in projects
}

In [4]:
project_df = {
    p: [
        load_df("%s_%d" % (p, i), fns[i], True).drop_duplicates()
        for i in range(5)
    ]
    for p, fns in vers_files.items()
}

In [5]:
data_stat_df = pd.DataFrame(index=projects, columns=['Version %d' % i for i in range(5)])
for p in projects:
    for i in range(5):
        df = project_df[p][i]
        data_stat_df.at[p, 'Version %d' % i] = len(df[df['IS_MOCK']])

In [6]:
data_stat_df

Unnamed: 0,Version 0,Version 1,Version 2,Version 3,Version 4
Hadoop,14771,14773,12779,12767,12512
Flink,7005,4823,6326,4373,6266
Hive,1549,1504,1451,1142,1062
Camel,1864,1878,836,2172,2144
CXF,1527,1827,1645,1579,1461
Druid,1939,1154,1069,1162,991
HBase,1093,938,864,1250,1245
Dubbo,788,226,154,145,137
Oozie,279,274,268,13,22
Storm,377,447,290,258,110


In [7]:
def _model_core(p):
#     from sklearn.feature_selection import SelectKBest, chi2
    train = balance_dataset(p[0])
    test = balance_dataset(p[1])
    X_train = train.drop(['CUT', 'IS_MOCK', 'TC', 'TM', 'D', 'L'], axis=1)
    y_train = train['IS_MOCK']
#     feature_selection = SelectKBest(score_func=chi2, k=12)
#     X_train = feature_selection.fit_transform(X_train, y_train)
    X_test = test.drop(['CUT', 'IS_MOCK', 'TC', 'TM', 'D', 'L'], axis=1)
    y_test = test['IS_MOCK']
#     X_test = feature_selection.transform(X_test)
    return run_classifier(X_train, X_test, y_train, y_test)

def _ver_run(train_idx, test_idx, dfs):
    import multiprocessing as mp
    import numpy as np
    train = pd.concat([
        dfs[i]
        for i in train_idx
    ]).drop(['PROJ'], axis=1).drop_duplicates(['TC','TM','D'])
    train_ind = {
        (it['TC'],it['TM'],it['D'])
        for _, it in train.iterrows()
    }
    test =  pd.concat([
        dfs[i]
        for i in test_idx
    ]).drop(['PROJ'], axis=1).drop_duplicates()
    test = test[~test.apply(lambda it: (it['TC'], it['TM'], it['D']) in train_ind, axis=1)].drop_duplicates()
    test_mock_count = len(test[test['IS_MOCK']])
    if log:
        print("%d mocks in test" % test_mock_count)
    if test_mock_count == 0:
        return test_mock_count, {
        m: 0
        for m in metrics
    }
    pl = mp.Pool()
    perf_data = pl.map(_model_core, [(train, test) for i in range(100)])
    pl.close()
    ret = {
        m: np.mean([p[m] for p in perf_data])*100
        for m in metrics
    }
    if log:
        print(ret)
    return test_mock_count, ret


def inter_version_single_proj(proj_name, ver_df):
    import numpy as np
    if log:
        print(proj_name)
    data_pair_idx = [
        (list(range(i+1,5)), [i])
        for i in range(4)
    ]
    perf_data = [
        _ver_run(tri, tsi, ver_df)
        for tri, tsi in data_pair_idx
    ]
    return {
        m: np.sum([p[m]*n for n,p in perf_data])/np.sum([n for n,_ in perf_data])
        for m in metrics
    }
    

In [8]:
def run_inter_version(proj_df):
    return {
        p: inter_version_single_proj(p, dfs)
        for p, dfs in proj_df.items()
    }

In [9]:
performance = run_inter_version(project_df)

Hadoop
459 mocks in test
{'accuracy': 78.64923747276688, 'precision': 74.76459510357816, 'recall': 86.49237472766885, 'f1-score': 80.20202020202021}
1620 mocks in test
{'accuracy': 72.43827160493825, 'precision': 73.86736703873935, 'recall': 69.44444444444444, 'f1-score': 71.58765510658604}
367 mocks in test
{'accuracy': 75.61307901907358, 'precision': 80.92105263157897, 'recall': 67.02997275204358, 'f1-score': 73.32339791356185}
326 mocks in test
{'accuracy': 83.58895705521472, 'precision': 82.88288288288292, 'recall': 84.66257668711656, 'f1-score': 83.76327769347498}
Flink
1538 mocks in test
{'accuracy': 81.79453836150844, 'precision': 79.42238267148012, 'recall': 85.82574772431728, 'f1-score': 82.5}
956 mocks in test
{'accuracy': 75.99372384937237, 'precision': 78.99649941656945, 'recall': 70.81589958158995, 'f1-score': 74.68284611141753}
903 mocks in test
{'accuracy': 82.66888150609081, 'precision': 83.83027522935777, 'recall': 80.95238095238096, 'f1-score': 82.36619718309859}
166 

In [10]:
performance_df = pd.DataFrame([
    {
        'project': p,
        **perf
    }
    for p, perf in performance.items()
]).set_index(['project'])
performance_df

Unnamed: 0_level_0,accuracy,precision,recall,f1-score
project,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Hadoop,75.198413,76.010076,73.737374,74.675773
Flink,79.79231,80.2608,78.950323,79.373998
Hive,69.574468,73.850911,60.957447,65.495548
Camel,72.331461,72.068537,73.314607,72.489584
CXF,61.59292,70.502271,40.0,51.010686
Druid,68.75,74.360642,56.899351,64.334749
HBase,83.817829,81.989435,87.015504,84.246425
Dubbo,68.328141,83.333358,44.758048,56.931109
Oozie,64.110429,94.989775,30.674847,44.612395
Storm,73.294872,80.820588,60.015779,66.987383


In [11]:
performance_df.describe()

Unnamed: 0,accuracy,precision,recall,f1-score
count,10.0,10.0,10.0,10.0
mean,71.679084,78.818639,60.632328,66.015765
std,6.764684,7.189467,18.11617,12.4612
min,61.59292,70.502271,30.674847,44.612395
25%,68.433606,73.978344,47.793373,58.782019
50%,70.952964,78.135438,60.486613,66.241466
75%,74.722527,81.697224,73.631682,74.129226
max,83.817829,94.989775,87.015504,84.246425


In [15]:
print(performance_df.round(2).to_latex())

\begin{tabular}{lrrrr}
\toprule
{} &  accuracy &  precision &  recall &  f1-score \\
project &           &            &         &           \\
\midrule
Hadoop  &     75.20 &      76.01 &   73.74 &     74.68 \\
Flink   &     79.79 &      80.26 &   78.95 &     79.37 \\
Hive    &     69.57 &      73.85 &   60.96 &     65.50 \\
Camel   &     72.33 &      72.07 &   73.31 &     72.49 \\
CXF     &     61.59 &      70.50 &   40.00 &     51.01 \\
Druid   &     68.75 &      74.36 &   56.90 &     64.33 \\
HBase   &     83.82 &      81.99 &   87.02 &     84.25 \\
Dubbo   &     68.33 &      83.33 &   44.76 &     56.93 \\
Oozie   &     64.11 &      94.99 &   30.67 &     44.61 \\
Storm   &     73.29 &      80.82 &   60.02 &     66.99 \\
\bottomrule
\end{tabular}

