In [1]:
import pandas as pd
import re

# Data

In [2]:
results_data = pd.read_csv('results.csv')
results_data.head()

Unnamed: 0,Strategy,Ground Truth Binary,Ground Truth Source,Roseau Binary,Roseau Source
0,ReduceR21VisibilityToPackagePrivate,1,1,1,1
1,RemoveTypeR21,1,1,1,1
2,ChangeRecordComponent0ToLongVarargsFromRecordA...,1,1,1,1
3,ChangeRecordComponent0ToLongFromRecordApiR21,1,1,1,1
4,RemoveFinalModifierInR21,0,0,0,0


In [3]:
errors_data = pd.read_csv('errors.csv')
errors_data.head()

Unnamed: 0,Strategy,Error
0,AddRecordComponentLongToRecordApiR21,Failed to package new api to jar
1,AddRecordComponentLongVarargsToRecordApiR21,Failed to package new api to jar
2,AddRecordComponentByteToRecordApiR21,Failed to package new api to jar
3,AddRecordComponentByteVarargsToRecordApiR21,Failed to package new api to jar
4,AddRecordComponentJavaLangIntegerToRecordApiR21,Failed to package new api to jar


In [4]:
GROUND_TRUTH_KEY = 'Ground Truth'
tools_analyzed = ['Roseau']

GENERAL_BREAKING_SUFFIX = ' Global'
BINARY_BREAKING_SUFFIX = ' Binary'
SOURCE_BREAKING_SUFFIX = ' Source'

full_data = pd.DataFrame(columns=['bench'])
full_data['bench'] = results_data['Strategy']
full_data[f'{GROUND_TRUTH_KEY}{BINARY_BREAKING_SUFFIX}'] = results_data[f'{GROUND_TRUTH_KEY}{BINARY_BREAKING_SUFFIX}'].map(lambda x: True if x == 1 else False)
full_data[f'{GROUND_TRUTH_KEY}{SOURCE_BREAKING_SUFFIX}'] = results_data[f'{GROUND_TRUTH_KEY}{SOURCE_BREAKING_SUFFIX}'].map(lambda x: True if x == 1 else False)
full_data[f'{GROUND_TRUTH_KEY}{GENERAL_BREAKING_SUFFIX}'] = full_data[f'{GROUND_TRUTH_KEY}{BINARY_BREAKING_SUFFIX}'] | full_data[f'{GROUND_TRUTH_KEY}{SOURCE_BREAKING_SUFFIX}']

for tool in tools_analyzed:
    full_data[f'{tool}{BINARY_BREAKING_SUFFIX}'] = results_data[f'{tool}{BINARY_BREAKING_SUFFIX}'].map(lambda x: True if x == 1 else False)
    full_data[f'{tool}{SOURCE_BREAKING_SUFFIX}'] = results_data[f'{tool}{SOURCE_BREAKING_SUFFIX}'].map(lambda x: True if x == 1 else False)
    full_data[f'{tool}{GENERAL_BREAKING_SUFFIX}'] = full_data[f'{tool}{BINARY_BREAKING_SUFFIX}'] | full_data[f'{tool}{SOURCE_BREAKING_SUFFIX}']

full_data.head()

Unnamed: 0,bench,Ground Truth Binary,Ground Truth Source,Ground Truth Global,Roseau Binary,Roseau Source,Roseau Global
0,ReduceR21VisibilityToPackagePrivate,True,True,True,True,True,True
1,RemoveTypeR21,True,True,True,True,True,True
2,ChangeRecordComponent0ToLongVarargsFromRecordA...,True,True,True,True,True,True
3,ChangeRecordComponent0ToLongFromRecordApiR21,True,True,True,True,True,True
4,RemoveFinalModifierInR21,False,False,False,False,False,False


In [5]:
STRATEGIES = [
    re.compile(r'^Add.+ModifierTo.+$'),
    re.compile(r'^AddException.+ToConstructor.+In.+$'),
    re.compile(r'^AddException.+ToMethod.+In.+$'),
    re.compile(r'^AddImplementedInterfaceToType.+$'),
    re.compile(r'^AddMethodToType.+$'),
    re.compile(r'^AddModifier.+ToField.+In.+$'),
    re.compile(r'^AddModifier.+ToMethod.+In.+$'),
    re.compile(r'^AddParameter.+ToConstructor.+In.+$'),
    re.compile(r'^AddParameter.+ToMethod.+In.+$'),
    re.compile(r'^AddRecordComponent.+ToRecord.+$'),
    re.compile(r'^AddSuperClassToClass.+$'),
    re.compile(r'^ChangeField.+In.+TypeTo.+$'),
    re.compile(r'^ChangeMethod.+In.+TypeTo.+$'),
    re.compile(r'^ChangeParameter.+To.+FromConstructor.+In.+$'),
    re.compile(r'^ChangeParameter.+To.+FromMethod.+In.+$'),
    re.compile(r'^ChangeRecordComponent.+To.+FromRecord.+$'),
    re.compile(r'^ReduceConstructor.+In.+VisibilityTo.+$'),
    re.compile(r'^ReduceField.+In.+VisibilityTo.+$'),
    re.compile(r'^ReduceMethod.+In.+VisibilityTo.+$'),
    re.compile(r'^Reduce.+VisibilityTo.+$'),
    re.compile(r'^Remove.+ModifierIn.+$'),
    re.compile(r'^RemoveConstructor.+In.+$'),
    re.compile(r'^RemoveEnumValue.+In.+$'),
    re.compile(r'^RemoveException.+FromConstructor.+In.+$'),
    re.compile(r'^RemoveException.+FromMethod.+In.+$'),
    re.compile(r'^RemoveField.+In.+$'),
    re.compile(r'^RemoveImplementedInterface.+FromType.+$'),
    re.compile(r'^RemoveMethod.+In.+$'),
    re.compile(r'^RemoveModifier.+ToField.+In.+$'),
    re.compile(r'^RemoveModifier.+ToMethod.+In.+$'),
    re.compile(r'^RemoveParameter.+FromConstructor.+In.+$'),
    re.compile(r'^RemoveParameter.+FromMethod.+In.+$'),
    re.compile(r'^RemoveRecordComponent.+FromRecord.+$'),
    re.compile(r'^RemoveSuperClassFromClass.+$'),
    re.compile(r'^RemoveType.+$'),
]

# Global Results

In [6]:
def compute_metrics_for_breaking_type(data, suffix, tools):
    metrics_data = pd.DataFrame(columns=['tool', 'accuracy', 'precision', 'recall', 'f1'])

    for tool in tools:
        tp = data[(data[f'{tool}{suffix}'] == True) & (data[f'{GROUND_TRUTH_KEY}{suffix}'] == True)].count().iloc[0]
        tn = data[(data[f'{tool}{suffix}'] == False) & (data[f'{GROUND_TRUTH_KEY}{suffix}'] == False)].count().iloc[0]
        fp = data[(data[f'{tool}{suffix}'] == True) & (data[f'{GROUND_TRUTH_KEY}{suffix}'] == False)].count().iloc[0]
        fn = data[(data[f'{tool}{suffix}'] == False) & (data[f'{GROUND_TRUTH_KEY}{suffix}'] == True)].count().iloc[0]

        accuracy = (tp + tn) / (tp + tn + fp + fn) if (tp + tn + fp + fn) > 0 else -1
        precision = tp / (tp + fp) if (tp + fp) > 0 else -1
        recall = tp / (tp + fn) if (tp + fn) > 0 else -1
        f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else -1

        metrics_data = pd.concat(
            [
                metrics_data,
                pd.DataFrame.from_records([{
                    'tool': tool,
                    'accuracy': accuracy,
                    'precision': precision,
                    'recall': recall,
                    'f1': f1
                }])
            ],
            ignore_index=True
        )

    return metrics_data


In [7]:
general_breaking_metrics_data = compute_metrics_for_breaking_type(full_data, GENERAL_BREAKING_SUFFIX, tools_analyzed)

general_breaking_metrics_data

Unnamed: 0,tool,accuracy,precision,recall,f1
0,Roseau,0.947962,0.89258,0.995188,0.941095


In [8]:
binary_breaking_metrics_data = compute_metrics_for_breaking_type(full_data, BINARY_BREAKING_SUFFIX, tools_analyzed)

binary_breaking_metrics_data

Unnamed: 0,tool,accuracy,precision,recall,f1
0,Roseau,0.944974,0.877374,1.0,0.934682


In [9]:
source_breaking_metrics_data = compute_metrics_for_breaking_type(full_data, SOURCE_BREAKING_SUFFIX, tools_analyzed)

source_breaking_metrics_data

Unnamed: 0,tool,accuracy,precision,recall,f1
0,Roseau,0.886373,0.760299,0.990901,0.860417


In [10]:
print(full_data['Ground Truth Global'].value_counts())
print(full_data['Ground Truth Binary'].value_counts())
print(full_data['Ground Truth Source'].value_counts())

False    110098
True      78977
Name: Ground Truth Global, dtype: int64
False    114636
True      74439
Name: Ground Truth Binary, dtype: int64
False    122251
True      66824
Name: Ground Truth Source, dtype: int64


# FP Analysis

In [11]:
def get_strategies_from_benchs(benchs):
    unique_strategies = set()
    for bench in benchs:
        for strategy in STRATEGIES:
            if strategy.match(bench):
                unique_strategies.add(strategy.pattern)
                break

    return sorted(unique_strategies)


not_binary_breaking_for_ground_truth_but_breaking_for_all_tools = full_data.query('`Ground Truth Binary` == False and `Roseau Binary` == True')
not_binary_breaking_for_ground_truth_but_breaking_for_all_tools = not_binary_breaking_for_ground_truth_but_breaking_for_all_tools.sort_values(by='bench')

print(len(not_binary_breaking_for_ground_truth_but_breaking_for_all_tools.index))

binary_benchs = not_binary_breaking_for_ground_truth_but_breaking_for_all_tools['bench'].tolist()
binary_strategies = get_strategies_from_benchs(binary_benchs)

print(binary_strategies)

not_binary_breaking_for_ground_truth_but_breaking_for_all_tools[['bench']]

10404
['^AddModifier.+ToField.+In.+$', '^AddModifier.+ToMethod.+In.+$', '^AddParameter.+ToConstructor.+In.+$', '^AddParameter.+ToMethod.+In.+$', '^AddSuperClassToClass.+$', '^ChangeField.+In.+TypeTo.+$', '^ChangeMethod.+In.+TypeTo.+$', '^ChangeParameter.+To.+FromConstructor.+In.+$', '^ChangeParameter.+To.+FromMethod.+In.+$', '^Reduce.+VisibilityTo.+$', '^ReduceConstructor.+In.+VisibilityTo.+$', '^ReduceField.+In.+VisibilityTo.+$', '^ReduceMethod.+In.+VisibilityTo.+$', '^RemoveConstructor.+In.+$', '^RemoveField.+In.+$', '^RemoveImplementedInterface.+FromType.+$', '^RemoveMethod.+In.+$', '^RemoveModifier.+ToField.+In.+$', '^RemoveModifier.+ToMethod.+In.+$', '^RemoveParameter.+FromMethod.+In.+$', '^RemoveSuperClassFromClass.+$']


Unnamed: 0,bench
97327,AddModifierFinalToFieldF365InApiC9
97346,AddModifierFinalToFieldF366InApiC9
97365,AddModifierFinalToFieldF367InApiC9
97386,AddModifierFinalToFieldF368InApiC9
97403,AddModifierFinalToFieldF369InApiC9
...,...
96520,RemoveSuperClassFromClassC3310
77692,RemoveSuperClassFromClassC3311
118682,RemoveSuperClassFromClassC3312
117881,RemoveSuperClassFromClassC3313


In [12]:
not_source_breaking_for_ground_truth_but_breaking_for_all_tools = full_data.query('`Ground Truth Source` == False and `Roseau Source` == True')
not_source_breaking_for_ground_truth_but_breaking_for_all_tools = not_source_breaking_for_ground_truth_but_breaking_for_all_tools.sort_values(by='bench')

print(len(not_source_breaking_for_ground_truth_but_breaking_for_all_tools.index))

source_benchs = not_source_breaking_for_ground_truth_but_breaking_for_all_tools['bench'].tolist()
source_strategies = get_strategies_from_benchs(source_benchs)

print(source_strategies)

not_source_breaking_for_ground_truth_but_breaking_for_all_tools[['bench']]

20876
['^AddException.+ToConstructor.+In.+$', '^AddException.+ToMethod.+In.+$', '^AddModifier.+ToField.+In.+$', '^AddParameter.+ToConstructor.+In.+$', '^AddParameter.+ToMethod.+In.+$', '^AddRecordComponent.+ToRecord.+$', '^ChangeField.+In.+TypeTo.+$', '^ChangeMethod.+In.+TypeTo.+$', '^ChangeParameter.+To.+FromConstructor.+In.+$', '^ChangeParameter.+To.+FromMethod.+In.+$', '^ChangeRecordComponent.+To.+FromRecord.+$', '^ReduceConstructor.+In.+VisibilityTo.+$', '^ReduceField.+In.+VisibilityTo.+$', '^ReduceMethod.+In.+VisibilityTo.+$', '^RemoveConstructor.+In.+$', '^RemoveException.+FromConstructor.+In.+$', '^RemoveException.+FromMethod.+In.+$', '^RemoveField.+In.+$', '^RemoveMethod.+In.+$', '^RemoveModifier.+ToField.+In.+$', '^RemoveModifier.+ToMethod.+In.+$', '^RemoveParameter.+FromMethod.+In.+$']


Unnamed: 0,bench
116480,AddExceptionJavaIoIOExceptionToConstructorC9Ja...
116233,AddExceptionJavaIoIOExceptionToConstructorC9Ja...
63978,AddExceptionJavaIoIOExceptionToMethodM1269InApiC7
64003,AddExceptionJavaIoIOExceptionToMethodM1271Java...
64026,AddExceptionJavaIoIOExceptionToMethodM1273Java...
...,...
107020,RemoveParameter0FromMethodM2253JavaSqlTimeArrI...
107068,RemoveParameter0FromMethodM2254IntArrArrInApiC9
107114,RemoveParameter0FromMethodM2255JavaLangLongArr...
107162,RemoveParameter0FromMethodM2256JavaUtilRandomA...


In [13]:
all_strategies = set()
all_strategies.update(binary_strategies)
all_strategies.update(source_strategies)
all_strategies = sorted(all_strategies)
print(len(all_strategies))
print(all_strategies)

27
['^AddException.+ToConstructor.+In.+$', '^AddException.+ToMethod.+In.+$', '^AddModifier.+ToField.+In.+$', '^AddModifier.+ToMethod.+In.+$', '^AddParameter.+ToConstructor.+In.+$', '^AddParameter.+ToMethod.+In.+$', '^AddRecordComponent.+ToRecord.+$', '^AddSuperClassToClass.+$', '^ChangeField.+In.+TypeTo.+$', '^ChangeMethod.+In.+TypeTo.+$', '^ChangeParameter.+To.+FromConstructor.+In.+$', '^ChangeParameter.+To.+FromMethod.+In.+$', '^ChangeRecordComponent.+To.+FromRecord.+$', '^Reduce.+VisibilityTo.+$', '^ReduceConstructor.+In.+VisibilityTo.+$', '^ReduceField.+In.+VisibilityTo.+$', '^ReduceMethod.+In.+VisibilityTo.+$', '^RemoveConstructor.+In.+$', '^RemoveException.+FromConstructor.+In.+$', '^RemoveException.+FromMethod.+In.+$', '^RemoveField.+In.+$', '^RemoveImplementedInterface.+FromType.+$', '^RemoveMethod.+In.+$', '^RemoveModifier.+ToField.+In.+$', '^RemoveModifier.+ToMethod.+In.+$', '^RemoveParameter.+FromMethod.+In.+$', '^RemoveSuperClassFromClass.+$']


# Strategies not evaluated

In [14]:
def uniformize_strategy_name(strategy):
    for s in STRATEGIES:
        if s.match(strategy):
            return s.pattern

    return None

In [15]:
analyzed_strategies = results_data.copy()
analyzed_strategies['StrategyUniform'] = results_data['Strategy'].map(uniformize_strategy_name)
analyzed_strategies = analyzed_strategies.groupby('StrategyUniform').agg({'Strategy': 'count'}).reset_index().sort_values(by='Strategy', ascending=False)

errors_strategies = errors_data.copy()
errors_strategies['StrategyUniform'] = results_data['Strategy'].map(uniformize_strategy_name)
errors_strategies = errors_strategies.groupby('StrategyUniform').agg({'Strategy': 'count'}).reset_index().sort_values(by='Strategy', ascending=False)

not_analyzed_strategies = sorted(set([s.pattern for s in STRATEGIES]) - set(analyzed_strategies['StrategyUniform'].tolist()))
not_analyzed_and_not_in_errors_strategies = sorted(set(not_analyzed_strategies) - set(errors_strategies['StrategyUniform'].tolist()))
print('Number of strategies not analyzed and not in errors:', len(not_analyzed_and_not_in_errors_strategies))
print(not_analyzed_and_not_in_errors_strategies)

Number of strategies not analyzed and not in errors: 1
['^RemoveParameter.+FromConstructor.+In.+$']


In [16]:
analyzed_strategies.head()

Unnamed: 0,StrategyUniform,Strategy
14,^ChangeParameter.+To.+FromMethod.+In.+$,43520
12,^ChangeMethod.+In.+TypeTo.+$,29453
29,^RemoveModifier.+ToMethod.+In.+$,28348
8,^AddParameter.+ToMethod.+In.+$,28160
19,^ReduceMethod.+In.+VisibilityTo.+$,14098


In [17]:
errors_strategies.head()

Unnamed: 0,StrategyUniform,Strategy
14,^ChangeParameter.+To.+FromMethod.+In.+$,43520
12,^ChangeMethod.+In.+TypeTo.+$,29453
29,^RemoveModifier.+ToMethod.+In.+$,28348
8,^AddParameter.+ToMethod.+In.+$,28160
19,^ReduceMethod.+In.+VisibilityTo.+$,14098


# Results by Strategy

In [18]:
for strategy in STRATEGIES:
    results_for_strategy = full_data[full_data['bench'].str.match(strategy.pattern)]

    print('-----------------------------------------')
    print(f"{strategy.pattern.replace('.+', '')[1:-1]} ({len(results_for_strategy.index)} strategies)")

    print('Global')
    print(compute_metrics_for_breaking_type(results_for_strategy, GENERAL_BREAKING_SUFFIX, tools_analyzed))

    print('Binary')
    print(compute_metrics_for_breaking_type(results_for_strategy, BINARY_BREAKING_SUFFIX, tools_analyzed))

    print('Source')
    print(compute_metrics_for_breaking_type(results_for_strategy, SOURCE_BREAKING_SUFFIX, tools_analyzed))
    print('-----------------------------------------')
    print()

-----------------------------------------
AddModifierTo (179 strategies)
Global
     tool  accuracy  precision  recall   f1
0  Roseau       1.0        1.0     1.0  1.0
Binary
     tool  accuracy  precision  recall   f1
0  Roseau       1.0        1.0     1.0  1.0
Source
     tool  accuracy  precision  recall   f1
0  Roseau       1.0        1.0     1.0  1.0
-----------------------------------------

-----------------------------------------
AddExceptionToConstructorIn (133 strategies)
Global
     tool  accuracy  precision  recall        f1
0  Roseau  0.984962   0.977528     1.0  0.988636
Binary
     tool  accuracy precision recall  f1
0  Roseau       1.0        -1     -1  -1
Source
     tool  accuracy  precision  recall        f1
0  Roseau  0.984962   0.977528     1.0  0.988636
-----------------------------------------

-----------------------------------------
AddExceptionToMethodIn (4200 strategies)
Global
     tool  accuracy  precision  recall        f1
0  Roseau  0.951667   0.784272 