In [1]:
import pandas as pd

In [2]:
bench_data = pd.read_csv('results.csv')

bench_data.head()

Unnamed: 0,Strategy,Ground Truth Binary,Ground Truth Source,Japicmp Binary,Japicmp Source,Revapi Binary,Revapi Source,Roseau Binary,Roseau Source
0,RemoveTypeR21,1,1,1,1,1,1,1,1
1,ChangeRecordComponent0ToLongFromRecordApiR21,1,1,1,1,1,1,1,1
2,ReduceR21VisibilityToPackagePrivate,1,1,1,1,1,1,1,1
3,ChangeRecordComponent0ToByteVarargsFromRecordA...,1,1,1,1,1,1,1,1
4,ChangeRecordComponent0ToLongVarargsFromRecordA...,1,1,1,1,1,1,1,1


In [3]:
GROUND_TRUTH_KEY = 'Ground Truth'
tools_analyzed = ['Japicmp', 'Revapi', 'Roseau']

GENERAL_BREAKING_SUFFIX = ' Global'
BINARY_BREAKING_SUFFIX = ' Binary'
SOURCE_BREAKING_SUFFIX = ' Source'

full_data = pd.DataFrame(columns=['bench'])
full_data['bench'] = bench_data['Strategy']
full_data[f'{GROUND_TRUTH_KEY}{BINARY_BREAKING_SUFFIX}'] = bench_data[f'{GROUND_TRUTH_KEY}{BINARY_BREAKING_SUFFIX}'].map(lambda x: True if x == 1 else False)
full_data[f'{GROUND_TRUTH_KEY}{SOURCE_BREAKING_SUFFIX}'] = bench_data[f'{GROUND_TRUTH_KEY}{SOURCE_BREAKING_SUFFIX}'].map(lambda x: True if x == 1 else False)
full_data[f'{GROUND_TRUTH_KEY}{GENERAL_BREAKING_SUFFIX}'] = full_data[f'{GROUND_TRUTH_KEY}{BINARY_BREAKING_SUFFIX}'] | full_data[f'{GROUND_TRUTH_KEY}{SOURCE_BREAKING_SUFFIX}']

for tool in tools_analyzed:
    full_data[f'{tool}{BINARY_BREAKING_SUFFIX}'] = bench_data[f'{tool}{BINARY_BREAKING_SUFFIX}'].map(lambda x: True if x == 1 else False)
    full_data[f'{tool}{SOURCE_BREAKING_SUFFIX}'] = bench_data[f'{tool}{SOURCE_BREAKING_SUFFIX}'].map(lambda x: True if x == 1 else False)
    full_data[f'{tool}{GENERAL_BREAKING_SUFFIX}'] = full_data[f'{tool}{BINARY_BREAKING_SUFFIX}'] | full_data[f'{tool}{SOURCE_BREAKING_SUFFIX}']

full_data.head()

Unnamed: 0,bench,Ground Truth Binary,Ground Truth Source,Ground Truth Global,Japicmp Binary,Japicmp Source,Japicmp Global,Revapi Binary,Revapi Source,Revapi Global,Roseau Binary,Roseau Source,Roseau Global
0,RemoveTypeR21,True,True,True,True,True,True,True,True,True,True,True,True
1,ChangeRecordComponent0ToLongFromRecordApiR21,True,True,True,True,True,True,True,True,True,True,True,True
2,ReduceR21VisibilityToPackagePrivate,True,True,True,True,True,True,True,True,True,True,True,True
3,ChangeRecordComponent0ToByteVarargsFromRecordA...,True,True,True,True,True,True,True,True,True,True,True,True
4,ChangeRecordComponent0ToLongVarargsFromRecordA...,True,True,True,True,True,True,True,True,True,True,True,True


In [4]:
def compute_metrics_for_breaking_type(data, suffix, tools):
    metrics_data = pd.DataFrame(columns=['tool', 'precision', 'recall', 'f1'])

    for tool in tools:
        tp = data[(data[f'{tool}{suffix}'] == True) & (data[f'{GROUND_TRUTH_KEY}{suffix}'] == True)].count().iloc[0]
        fp = data[(data[f'{tool}{suffix}'] == True) & (data[f'{GROUND_TRUTH_KEY}{suffix}'] == False)].count().iloc[0]
        fn = data[(data[f'{tool}{suffix}'] == False) & (data[f'{GROUND_TRUTH_KEY}{suffix}'] == True)].count().iloc[0]

        precision = tp / (tp + fp)
        recall = tp / (tp + fn)
        f1 = 2 * (precision * recall) / (precision + recall)

        metrics_data = pd.concat(
            [
                metrics_data,
                pd.DataFrame.from_records([{
                    'tool': tool,
                    'precision': precision,
                    'recall': recall,
                    'f1': f1
                }])
            ],
            ignore_index=True
        )

    return metrics_data


In [5]:
general_breaking_metrics_data = compute_metrics_for_breaking_type(full_data, GENERAL_BREAKING_SUFFIX, tools_analyzed)

general_breaking_metrics_data

Unnamed: 0,tool,precision,recall,f1
0,Japicmp,0.582935,0.993748,0.734822
1,Revapi,0.578085,0.99921,0.732429
2,Roseau,0.889015,0.996311,0.93961


In [6]:
binary_breaking_metrics_data = compute_metrics_for_breaking_type(full_data, BINARY_BREAKING_SUFFIX, tools_analyzed)

binary_breaking_metrics_data

Unnamed: 0,tool,precision,recall,f1
0,Japicmp,0.575208,0.999945,0.730312
1,Revapi,0.572583,0.999168,0.727986
2,Roseau,0.876967,1.0,0.934451


In [7]:
source_breaking_metrics_data = compute_metrics_for_breaking_type(full_data, SOURCE_BREAKING_SUFFIX, tools_analyzed)

source_breaking_metrics_data

Unnamed: 0,tool,precision,recall,f1
0,Japicmp,0.492175,0.992604,0.658058
1,Revapi,0.494582,0.991761,0.660019
2,Roseau,0.756748,0.992144,0.858604


In [8]:
print(full_data['Ground Truth Global'].value_counts())
print(full_data['Ground Truth Binary'].value_counts())
print(full_data['Ground Truth Source'].value_counts())

True     77255
False    66780
Name: Ground Truth Global, dtype: int64
True     73339
False    70696
Name: Ground Truth Binary, dtype: int64
False    78733
True     65302
Name: Ground Truth Source, dtype: int64
