In [3]:
import pandas as pd
import re

In [4]:
bench_data = pd.read_csv('results.csv')

bench_data.head()

Unnamed: 0,Strategy,Ground Truth Binary,Ground Truth Source,Japicmp Binary,Japicmp Source,Revapi Binary,Revapi Source,Roseau Binary,Roseau Source
0,RemoveTypeC3,1,1,1,1,1,1,1,1
1,ReduceC3VisibilityToPackagePrivate,1,1,1,1,1,1,1,1
2,AddFinalModifierToC3,1,1,1,1,1,1,1,1
3,AddAbstractModifierToC3,1,1,1,1,1,1,1,1
4,ReduceFieldF25InApiC3VisibilityToPackagePrivate,1,1,1,1,1,1,1,1


In [5]:
GROUND_TRUTH_KEY = 'Ground Truth'
tools_analyzed = ['Japicmp', 'Revapi', 'Roseau']

GENERAL_BREAKING_SUFFIX = ' Global'
BINARY_BREAKING_SUFFIX = ' Binary'
SOURCE_BREAKING_SUFFIX = ' Source'

full_data = pd.DataFrame(columns=['bench'])
full_data['bench'] = bench_data['Strategy']
full_data[f'{GROUND_TRUTH_KEY}{BINARY_BREAKING_SUFFIX}'] = bench_data[f'{GROUND_TRUTH_KEY}{BINARY_BREAKING_SUFFIX}'].map(lambda x: True if x == 1 else False)
full_data[f'{GROUND_TRUTH_KEY}{SOURCE_BREAKING_SUFFIX}'] = bench_data[f'{GROUND_TRUTH_KEY}{SOURCE_BREAKING_SUFFIX}'].map(lambda x: True if x == 1 else False)
full_data[f'{GROUND_TRUTH_KEY}{GENERAL_BREAKING_SUFFIX}'] = full_data[f'{GROUND_TRUTH_KEY}{BINARY_BREAKING_SUFFIX}'] | full_data[f'{GROUND_TRUTH_KEY}{SOURCE_BREAKING_SUFFIX}']

for tool in tools_analyzed:
    full_data[f'{tool}{BINARY_BREAKING_SUFFIX}'] = bench_data[f'{tool}{BINARY_BREAKING_SUFFIX}'].map(lambda x: True if x == 1 else False)
    full_data[f'{tool}{SOURCE_BREAKING_SUFFIX}'] = bench_data[f'{tool}{SOURCE_BREAKING_SUFFIX}'].map(lambda x: True if x == 1 else False)
    full_data[f'{tool}{GENERAL_BREAKING_SUFFIX}'] = full_data[f'{tool}{BINARY_BREAKING_SUFFIX}'] | full_data[f'{tool}{SOURCE_BREAKING_SUFFIX}']

full_data.head()

Unnamed: 0,bench,Ground Truth Binary,Ground Truth Source,Ground Truth Global,Japicmp Binary,Japicmp Source,Japicmp Global,Revapi Binary,Revapi Source,Revapi Global,Roseau Binary,Roseau Source,Roseau Global
0,RemoveTypeC3,True,True,True,True,True,True,True,True,True,True,True,True
1,ReduceC3VisibilityToPackagePrivate,True,True,True,True,True,True,True,True,True,True,True,True
2,AddFinalModifierToC3,True,True,True,True,True,True,True,True,True,True,True,True
3,AddAbstractModifierToC3,True,True,True,True,True,True,True,True,True,True,True,True
4,ReduceFieldF25InApiC3VisibilityToPackagePrivate,True,True,True,True,True,True,True,True,True,True,True,True


In [6]:
STRATEGIES = [
    re.compile(r'^Add.+ModifierTo.+$'),
    re.compile(r'^AddException.+ToConstructor.+In.+$'),
    re.compile(r'^AddException.+ToMethod.+In.+$'),
    re.compile(r'^AddMethodToType.+$'),
    re.compile(r'^AddModifier.+ToField.+In.+$'),
    re.compile(r'^AddModifier.+ToMethod.+In.+$'),
    re.compile(r'^AddParameter.+ToConstructor.+In.+$'),
    re.compile(r'^AddParameter.+ToMethod.+In.+$'),
    re.compile(r'^AddRecordComponent.+ToRecord.+$'),
    re.compile(r'^ChangeField.+In.+TypeTo.+$'),
    re.compile(r'^ChangeMethod.+In.+TypeTo.+$'),
    re.compile(r'^ChangeParameter.+To.+FromConstructor.+In.+$'),
    re.compile(r'^ChangeParameter.+To.+FromMethod.+In.+$'),
    re.compile(r'^ChangeRecordComponent.+To.+FromRecord%s$'),
    re.compile(r'^Reduce.+VisibilityTo.+$'),
    re.compile(r'^ReduceConstructor.+In.+VisibilityTo.+$'),
    re.compile(r'^ReduceField.+In.+VisibilityTo.+$'),
    re.compile(r'^ReduceMethod.+In.+VisibilityTo.+$'),
    re.compile(r'^Remove.+ModifierIn.+$'),
    re.compile(r'^RemoveConstructor.+In.+$'),
    re.compile(r'^RemoveEnumValue.+In.+$'),
    re.compile(r'^RemoveException.+FromConstructor.+In.+$'),
    re.compile(r'^RemoveException.+FromMethod.+In.+$'),
    re.compile(r'^RemoveField.+In.+$'),
    re.compile(r'^RemoveMethod.+In.+$'),
    re.compile(r'^RemoveModifier.+ToField.+In.+$'),
    re.compile(r'^RemoveModifier.+ToMethod.+In.+$'),
    re.compile(r'^RemoveParameter.+FromConstructor.+In.+$'),
    re.compile(r'^RemoveParameter.+FromMethod.+In.+$'),
    re.compile(r'^RemoveRecordComponent.+FromRecord.+$'),
    re.compile(r'^RemoveType.+$'),
]

def get_strategies_from_benchs(benchs):
    unique_strategies = set()
    for bench in benchs:
        for strategy in STRATEGIES:
            if strategy.match(bench):
                unique_strategies.add(strategy.pattern)
                break

    return sorted(unique_strategies)


In [18]:
not_binary_breaking_for_ground_truth_but_breaking_for_all_tools = full_data.query('`Ground Truth Binary` == False and `Japicmp Binary` == True and `Revapi Binary` == True and `Roseau Binary` == True')
not_binary_breaking_for_ground_truth_but_breaking_for_all_tools = not_binary_breaking_for_ground_truth_but_breaking_for_all_tools.sort_values(by='bench')

print(len(not_binary_breaking_for_ground_truth_but_breaking_for_all_tools.index))

binary_benchs = not_binary_breaking_for_ground_truth_but_breaking_for_all_tools['bench'].tolist()
binary_strategies = get_strategies_from_benchs(binary_benchs)

print(binary_strategies)

not_binary_breaking_for_ground_truth_but_breaking_for_all_tools[['bench']]

951
['^AddModifier.+ToField.+In.+$', '^AddModifier.+ToMethod.+In.+$', '^AddParameter.+ToMethod.+In.+$', '^ChangeField.+In.+TypeTo.+$', '^ChangeMethod.+In.+TypeTo.+$', '^ChangeParameter.+To.+FromMethod.+In.+$', '^Reduce.+VisibilityTo.+$', '^RemoveMethod.+In.+$', '^RemoveModifier.+ToField.+In.+$', '^RemoveParameter.+FromMethod.+In.+$']


Unnamed: 0,bench
21885,AddModifierStaticToFieldF145InApiC5
22029,AddModifierStaticToFieldF161InApiC5
43919,AddModifierStaticToFieldF241InApiE24
82,AddModifierStaticToFieldF33InApiC3
226,AddModifierStaticToFieldF49InApiC3
...,...
25552,RemoveParameter0FromMethodM1407CharArrInApiC5
25569,RemoveParameter0FromMethodM1408IntArrInApiC5
25586,RemoveParameter0FromMethodM1409JavaLangBoolean...
25603,RemoveParameter0FromMethodM1410JavaLangThreadA...


In [15]:
not_source_breaking_for_ground_truth_but_breaking_for_all_tools = full_data.query('`Ground Truth Source` == False and `Japicmp Source` == True and `Revapi Source` == True and `Roseau Source` == True')
not_source_breaking_for_ground_truth_but_breaking_for_all_tools = not_source_breaking_for_ground_truth_but_breaking_for_all_tools.sort_values(by='bench')

print(len(not_source_breaking_for_ground_truth_but_breaking_for_all_tools.index))

source_benchs = not_source_breaking_for_ground_truth_but_breaking_for_all_tools['bench'].tolist()
source_strategies = get_strategies_from_benchs(source_benchs)

print(source_strategies)

not_source_breaking_for_ground_truth_but_breaking_for_all_tools[['bench']]

15373
['^AddException.+ToMethod.+In.+$', '^AddParameter.+ToConstructor.+In.+$', '^AddParameter.+ToMethod.+In.+$', '^AddRecordComponent.+ToRecord.+$', '^ChangeMethod.+In.+TypeTo.+$', '^ChangeParameter.+To.+FromMethod.+In.+$', '^Reduce.+VisibilityTo.+$', '^RemoveConstructor.+In.+$', '^RemoveException.+FromConstructor.+In.+$', '^RemoveException.+FromMethod.+In.+$']


Unnamed: 0,bench
23154,AddExceptionJavaLangExceptionToMethodM1313InApiC5
23204,AddExceptionJavaLangExceptionToMethodM1315Java...
23253,AddExceptionJavaLangExceptionToMethodM1317Char...
23288,AddExceptionJavaLangExceptionToMethodM1319Java...
23323,AddExceptionJavaLangExceptionToMethodM1321Char...
...,...
14316,RemoveExceptionJavaLangExceptionFromMethodM991...
14362,RemoveExceptionJavaLangExceptionFromMethodM992...
14412,RemoveExceptionJavaLangExceptionFromMethodM994...
14466,RemoveExceptionJavaLangExceptionFromMethodM996...


In [16]:
not_global_breaking_for_ground_truth_but_breaking_for_all_tools = full_data.query('`Ground Truth Global` == False and `Japicmp Global` == True and `Revapi Global` == True and `Roseau Global` == True')
not_global_breaking_for_ground_truth_but_breaking_for_all_tools = not_global_breaking_for_ground_truth_but_breaking_for_all_tools.sort_values(by='bench')

print(len(not_global_breaking_for_ground_truth_but_breaking_for_all_tools.index))

global_benchs = not_global_breaking_for_ground_truth_but_breaking_for_all_tools['bench'].tolist()
global_strategies = get_strategies_from_benchs(global_benchs)

print(global_strategies)

not_global_breaking_for_ground_truth_but_breaking_for_all_tools[['bench']]

1277
['^AddException.+ToMethod.+In.+$', '^AddModifier.+ToField.+In.+$', '^Reduce.+VisibilityTo.+$', '^RemoveException.+FromConstructor.+In.+$', '^RemoveException.+FromMethod.+In.+$']


Unnamed: 0,bench
23154,AddExceptionJavaLangExceptionToMethodM1313InApiC5
23204,AddExceptionJavaLangExceptionToMethodM1315Java...
23253,AddExceptionJavaLangExceptionToMethodM1317Char...
23288,AddExceptionJavaLangExceptionToMethodM1319Java...
23323,AddExceptionJavaLangExceptionToMethodM1321Char...
...,...
14316,RemoveExceptionJavaLangExceptionFromMethodM991...
14362,RemoveExceptionJavaLangExceptionFromMethodM992...
14412,RemoveExceptionJavaLangExceptionFromMethodM994...
14466,RemoveExceptionJavaLangExceptionFromMethodM996...


In [17]:
all_strategies = set()
all_strategies.update(binary_strategies)
all_strategies.update(source_strategies)
all_strategies.update(global_strategies)
all_strategies = sorted(all_strategies)
print(len(all_strategies))
print(all_strategies)

16
['^AddException.+ToMethod.+In.+$', '^AddModifier.+ToField.+In.+$', '^AddModifier.+ToMethod.+In.+$', '^AddParameter.+ToConstructor.+In.+$', '^AddParameter.+ToMethod.+In.+$', '^AddRecordComponent.+ToRecord.+$', '^ChangeField.+In.+TypeTo.+$', '^ChangeMethod.+In.+TypeTo.+$', '^ChangeParameter.+To.+FromMethod.+In.+$', '^Reduce.+VisibilityTo.+$', '^RemoveConstructor.+In.+$', '^RemoveException.+FromConstructor.+In.+$', '^RemoveException.+FromMethod.+In.+$', '^RemoveMethod.+In.+$', '^RemoveModifier.+ToField.+In.+$', '^RemoveParameter.+FromMethod.+In.+$']
