In [1]:
import pandas as pd
import re

from functools import reduce

# Methods

In [5]:
STRATEGIES = [
    re.compile(r'^Add.+ModifierTo.+$'),
    re.compile(r'^AddException.+ToConstructor.+In.+$'),
    re.compile(r'^AddException.+ToMethod.+In.+$'),
    re.compile(r'^AddImplementedInterfaceToType.+$'),
    re.compile(r'^AddMethodToType.+$'),
    re.compile(r'^AddModifier.+ToField.+In.+$'),
    re.compile(r'^AddModifier.+ToMethod.+In.+$'),
    re.compile(r'^AddParameter.+ToConstructor.+In.+$'),
    re.compile(r'^AddParameter.+ToMethod.+In.+$'),
    re.compile(r'^AddRecordComponent.+ToRecord.+$'),
    re.compile(r'^AddSuperClassToClass.+$'),
    re.compile(r'^ChangeField.+In.+TypeTo.+$'),
    re.compile(r'^ChangeMethod.+In.+TypeTo.+$'),
    re.compile(r'^ChangeParameter.+To.+FromConstructor.+In.+$'),
    re.compile(r'^ChangeParameter.+To.+FromMethod.+In.+$'),
    re.compile(r'^ChangeRecordComponent.+To.+FromRecord.+$'),
    re.compile(r'^ReduceConstructor.+In.+VisibilityTo.+$'),
    re.compile(r'^ReduceField.+In.+VisibilityTo.+$'),
    re.compile(r'^ReduceMethod.+In.+VisibilityTo.+$'),
    re.compile(r'^Reduce.+VisibilityTo.+$'),
    re.compile(r'^Remove.+ModifierIn.+$'),
    re.compile(r'^RemoveConstructor.+In.+$'),
    re.compile(r'^RemoveEnumValue.+In.+$'),
    re.compile(r'^RemoveException.+FromConstructor.+In.+$'),
    re.compile(r'^RemoveException.+FromMethod.+In.+$'),
    re.compile(r'^RemoveField.+In.+$'),
    re.compile(r'^RemoveImplementedInterface.+FromType.+$'),
    re.compile(r'^RemoveMethod.+In.+$'),
    re.compile(r'^RemoveModifier.+ToField.+In.+$'),
    re.compile(r'^RemoveModifier.+ToMethod.+In.+$'),
    re.compile(r'^RemoveParameter.+FromConstructor.+In.+$'),
    re.compile(r'^RemoveParameter.+FromMethod.+In.+$'),
    re.compile(r'^RemoveRecordComponent.+FromRecord.+$'),
    re.compile(r'^RemoveSuperClassFromClass.+$'),
    re.compile(r'^RemoveType.+$'),
]

In [6]:
def uniformize_strategy_name(strategy):
    for s in STRATEGIES:
        if s.match(strategy):
            return s.pattern

    return None

In [7]:
def compute_metrics_for_breaking_type(data):
    metrics_data = []

    for tool in TOOLS_ANALYZED:
        for suffix in BREAKING_SUFFIXES:
            tp = data[(data[f'{tool} {suffix}'] == True) & (data[f'{GROUND_TRUTH_KEY} {suffix}'] == True)].count().iloc[0]
            tn = data[(data[f'{tool} {suffix}'] == False) & (data[f'{GROUND_TRUTH_KEY} {suffix}'] == False)].count().iloc[0]
            fp = data[(data[f'{tool} {suffix}'] == True) & (data[f'{GROUND_TRUTH_KEY} {suffix}'] == False)].count().iloc[0]
            fn = data[(data[f'{tool} {suffix}'] == False) & (data[f'{GROUND_TRUTH_KEY} {suffix}'] == True)].count().iloc[0]

            accuracy = (tp + tn) / (tp + tn + fp + fn) if (tp + tn + fp + fn) > 0 else -1
            precision = tp / (tp + fp) if (tp + fp) > 0 else -1
            recall = tp / (tp + fn) if (tp + fn) > 0 else -1
            f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else -1

            metrics_data.append({
                'tool': tool,
                'suffix': suffix,
                'accuracy': accuracy,
                'precision': precision,
                'recall': recall,
                'f1': f1
            })

    return pd.DataFrame(metrics_data).set_index(['tool', 'suffix'])


# Data

In [10]:
results_data = pd.read_csv('results.csv')
results_data['StrategyUniform'] = results_data['Strategy'].map(uniformize_strategy_name)
results_data.head()

Unnamed: 0,Strategy,Ground Truth Binary,Ground Truth Source,Roseau Binary,Roseau Source,StrategyUniform
0,RemoveTypeR21,1,1,1,1,^RemoveType.+$
1,ReduceR21VisibilityToPackagePrivate,1,1,1,1,^Reduce.+VisibilityTo.+$
2,ChangeRecordComponent0ToByteFromRecordApiR21,1,1,1,1,^ChangeRecordComponent.+To.+FromRecord.+$
3,AddImplementedInterfaceToTypeR21,0,0,0,0,^AddImplementedInterfaceToType.+$
4,ChangeRecordComponent0ToLongFromRecordApiR21,1,1,1,1,^ChangeRecordComponent.+To.+FromRecord.+$


In [11]:
errors_data = pd.read_csv('errors.csv')
errors_data['StrategyUniform'] = errors_data['Strategy'].map(uniformize_strategy_name)
errors_data.head()

Unnamed: 0,Strategy,Error,StrategyUniform
0,AddRecordComponentLongToRecordApiR21,Failed to package new api to jar,^AddRecordComponent.+ToRecord.+$
1,AddRecordComponentByteToRecordApiR21,Failed to package new api to jar,^AddRecordComponent.+ToRecord.+$
2,AddRecordComponentByteVarargsToRecordApiR21,Failed to package new api to jar,^AddRecordComponent.+ToRecord.+$
3,AddRecordComponentLongVarargsToRecordApiR21,Failed to package new api to jar,^AddRecordComponent.+ToRecord.+$
4,AddRecordComponentJavaLangIntegerToRecordApiR21,Failed to package new api to jar,^AddRecordComponent.+ToRecord.+$


In [12]:
impossible_strategies_data = pd.read_csv('impossible_strategies.csv')
impossible_strategies_data['StrategyUniform'] = impossible_strategies_data['Strategy'].map(uniformize_strategy_name)
impossible_strategies_data.head()

Unnamed: 0,Strategy,StrategyUniform
0,RemoveFinalModifierInR21,^Remove.+ModifierIn.+$
1,RemoveFinalModifierInR20,^Remove.+ModifierIn.+$
2,RemoveFinalModifierInR23,^Remove.+ModifierIn.+$
3,RemoveFinalModifierInR22,^Remove.+ModifierIn.+$
4,AddAbstractModifierToC3373,^Add.+ModifierTo.+$


In [28]:
print(f"Number of strategies in results: {len(results_data)}")
print(f"Number of strategies in errors: {len(errors_data)}")
print(f"Number of strategies in impossible strategies: {len(impossible_strategies_data)}")
print(f"Number of strategies in all data: {len(results_data) + len(errors_data) + len(impossible_strategies_data)}")

Number of strategies in results: 146110
Number of strategies in errors: 192835
Number of strategies in impossible strategies: 42995
Number of strategies in all data: 381940


In [13]:
GROUND_TRUTH_KEY = 'Ground Truth'
TOOLS_ANALYZED = ['Roseau']

GENERAL_BREAKING_SUFFIX = 'Global'
BINARY_BREAKING_SUFFIX = 'Binary'
SOURCE_BREAKING_SUFFIX = 'Source'
BREAKING_SUFFIXES = [GENERAL_BREAKING_SUFFIX, BINARY_BREAKING_SUFFIX, SOURCE_BREAKING_SUFFIX]

full_data = pd.DataFrame(columns=['bench', 'benchUniform'])
full_data['bench'] = results_data['Strategy']
full_data['benchUniform'] = results_data['StrategyUniform']
full_data[f'{GROUND_TRUTH_KEY} {BINARY_BREAKING_SUFFIX}'] = results_data[f'{GROUND_TRUTH_KEY} {BINARY_BREAKING_SUFFIX}'].map(lambda x: True if x == 1 else False)
full_data[f'{GROUND_TRUTH_KEY} {SOURCE_BREAKING_SUFFIX}'] = results_data[f'{GROUND_TRUTH_KEY} {SOURCE_BREAKING_SUFFIX}'].map(lambda x: True if x == 1 else False)
full_data[f'{GROUND_TRUTH_KEY} {GENERAL_BREAKING_SUFFIX}'] = full_data[f'{GROUND_TRUTH_KEY} {BINARY_BREAKING_SUFFIX}'] | full_data[f'{GROUND_TRUTH_KEY} {SOURCE_BREAKING_SUFFIX}']

for tool in TOOLS_ANALYZED:
    full_data[f'{tool} {BINARY_BREAKING_SUFFIX}'] = results_data[f'{tool} {BINARY_BREAKING_SUFFIX}'].map(lambda x: True if x == 1 else False)
    full_data[f'{tool} {SOURCE_BREAKING_SUFFIX}'] = results_data[f'{tool} {SOURCE_BREAKING_SUFFIX}'].map(lambda x: True if x == 1 else False)
    full_data[f'{tool} {GENERAL_BREAKING_SUFFIX}'] = full_data[f'{tool} {BINARY_BREAKING_SUFFIX}'] | full_data[f'{tool} {SOURCE_BREAKING_SUFFIX}']

full_data.head()

Unnamed: 0,bench,benchUniform,Ground Truth Binary,Ground Truth Source,Ground Truth Global,Roseau Binary,Roseau Source,Roseau Global
0,RemoveTypeR21,^RemoveType.+$,True,True,True,True,True,True
1,ReduceR21VisibilityToPackagePrivate,^Reduce.+VisibilityTo.+$,True,True,True,True,True,True
2,ChangeRecordComponent0ToByteFromRecordApiR21,^ChangeRecordComponent.+To.+FromRecord.+$,True,True,True,True,True,True
3,AddImplementedInterfaceToTypeR21,^AddImplementedInterfaceToType.+$,False,False,False,False,False,False
4,ChangeRecordComponent0ToLongFromRecordApiR21,^ChangeRecordComponent.+To.+FromRecord.+$,True,True,True,True,True,True


# Global Results

In [14]:
compute_metrics_for_breaking_type(full_data)

Unnamed: 0_level_0,Unnamed: 1_level_0,accuracy,precision,recall,f1
tool,suffix,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Roseau,Global,0.93266,0.892543,0.995187,0.941074
Roseau,Binary,0.928999,0.877684,1.0,0.934858
Roseau,Source,0.85296,0.760217,0.990897,0.860363


# FP Analysis

In [15]:
not_binary_breaking_but_for_roseau = full_data.query('`Ground Truth Binary` == False and `Roseau Binary` == True')

print('Number of benchs not breaking binary but for Roseau:', len(not_binary_breaking_but_for_roseau))

strategies_not_bb_but_roseau = sorted(not_binary_breaking_but_for_roseau.drop_duplicates(subset=['benchUniform'])['benchUniform'].tolist())
print('Number of strategies not breaking binary but for Roseau:', len(strategies_not_bb_but_roseau))
print(strategies_not_bb_but_roseau)

Number of benchs not breaking binary but for Roseau: 10374
Number of strategies not breaking binary but for Roseau: 20
['^AddModifier.+ToField.+In.+$', '^AddModifier.+ToMethod.+In.+$', '^AddParameter.+ToConstructor.+In.+$', '^AddParameter.+ToMethod.+In.+$', '^ChangeField.+In.+TypeTo.+$', '^ChangeMethod.+In.+TypeTo.+$', '^ChangeParameter.+To.+FromConstructor.+In.+$', '^ChangeParameter.+To.+FromMethod.+In.+$', '^Reduce.+VisibilityTo.+$', '^ReduceConstructor.+In.+VisibilityTo.+$', '^ReduceField.+In.+VisibilityTo.+$', '^ReduceMethod.+In.+VisibilityTo.+$', '^RemoveConstructor.+In.+$', '^RemoveField.+In.+$', '^RemoveImplementedInterface.+FromType.+$', '^RemoveMethod.+In.+$', '^RemoveModifier.+ToField.+In.+$', '^RemoveModifier.+ToMethod.+In.+$', '^RemoveParameter.+FromMethod.+In.+$', '^RemoveSuperClassFromClass.+$']


In [16]:
not_source_breaking_but_for_roseau = full_data.query('`Ground Truth Source` == False and `Roseau Source` == True')

print('Number of benchs not breaking source but for Roseau:', len(not_source_breaking_but_for_roseau))

strategies_not_bs_but_roseau = sorted(not_source_breaking_but_for_roseau.drop_duplicates(subset=['benchUniform'])['benchUniform'].tolist())
print('Number of strategies not breaking source but for Roseau:', len(strategies_not_bs_but_roseau))
print(strategies_not_bs_but_roseau)

Number of benchs not breaking source but for Roseau: 20876
Number of strategies not breaking source but for Roseau: 22
['^AddException.+ToConstructor.+In.+$', '^AddException.+ToMethod.+In.+$', '^AddModifier.+ToField.+In.+$', '^AddParameter.+ToConstructor.+In.+$', '^AddParameter.+ToMethod.+In.+$', '^AddRecordComponent.+ToRecord.+$', '^ChangeField.+In.+TypeTo.+$', '^ChangeMethod.+In.+TypeTo.+$', '^ChangeParameter.+To.+FromConstructor.+In.+$', '^ChangeParameter.+To.+FromMethod.+In.+$', '^ChangeRecordComponent.+To.+FromRecord.+$', '^ReduceConstructor.+In.+VisibilityTo.+$', '^ReduceField.+In.+VisibilityTo.+$', '^ReduceMethod.+In.+VisibilityTo.+$', '^RemoveConstructor.+In.+$', '^RemoveException.+FromConstructor.+In.+$', '^RemoveException.+FromMethod.+In.+$', '^RemoveField.+In.+$', '^RemoveMethod.+In.+$', '^RemoveModifier.+ToField.+In.+$', '^RemoveModifier.+ToMethod.+In.+$', '^RemoveParameter.+FromMethod.+In.+$']


# Strategies not evaluated

In [17]:
analyzed_strategies = results_data.groupby('StrategyUniform').agg({'Strategy': 'count'}).reset_index().sort_values(by='Strategy', ascending=False)
analyzed_strategies.head()

Unnamed: 0,StrategyUniform,Strategy
14,^ChangeParameter.+To.+FromMethod.+In.+$,43520
12,^ChangeMethod.+In.+TypeTo.+$,28823
8,^AddParameter.+ToMethod.+In.+$,28160
6,^AddModifier.+ToMethod.+In.+$,9652
19,^ReduceMethod.+In.+VisibilityTo.+$,8398


In [18]:
errors_strategies = errors_data.groupby('StrategyUniform').agg({'Strategy': 'count'}).reset_index().sort_values(by='Strategy', ascending=False)
errors_strategies.head()

Unnamed: 0,StrategyUniform,Strategy
6,^AddParameter.+ToMethod.+In.+$,76720
9,^ChangeParameter.+To.+FromMethod.+In.+$,50320
8,^ChangeMethod.+In.+TypeTo.+$,33247
4,^AddModifier.+ToMethod.+In.+$,14896
12,^ReduceMethod.+In.+VisibilityTo.+$,8702


In [19]:
impossible_strategies = impossible_strategies_data.groupby('StrategyUniform').agg({'Strategy': 'count'}).reset_index().sort_values(by='Strategy', ascending=False)
impossible_strategies.head()

Unnamed: 0,StrategyUniform,Strategy
14,^RemoveModifier.+ToMethod.+In.+$,24548
9,^ReduceMethod.+In.+VisibilityTo.+$,5700
4,^AddModifier.+ToMethod.+In.+$,3952
12,^RemoveException.+FromMethod.+In.+$,2906
2,^AddException.+ToMethod.+In.+$,2794


In [20]:
strategies_stats = reduce(lambda  left,right: pd.merge(left,right,on=['StrategyUniform'], how='outer'), [analyzed_strategies, errors_strategies, impossible_strategies])
strategies_stats = strategies_stats.fillna(0)
strategies_stats = strategies_stats.rename(columns={
    'StrategyUniform': 'Strategy',
    'Strategy_x': 'Analyzed',
    'Strategy_y': 'Errors',
    'Strategy': 'Impossible'
})
strategies_stats = strategies_stats.astype({'Analyzed': 'int', 'Errors': 'int', 'Impossible': 'int'})
strategies_stats = strategies_stats.set_index('Strategy')
strategies_stats = strategies_stats.sort_values(by='Strategy')

strategies_stats['% Analyzed'] = round(strategies_stats['Analyzed'] / (strategies_stats['Analyzed'] + strategies_stats['Errors'] + strategies_stats['Impossible']) * 100)
strategies_stats['% Errors'] = round(strategies_stats['Errors'] / (strategies_stats['Analyzed'] + strategies_stats['Errors'] + strategies_stats['Impossible']) * 100)
strategies_stats['% Impossible'] = round(strategies_stats['Impossible'] / (strategies_stats['Analyzed'] + strategies_stats['Errors'] + strategies_stats['Impossible']) * 100)
strategies_stats = strategies_stats.astype({'% Analyzed': 'int', '% Errors': 'int', '% Impossible': 'int'})

strategies_stats

Unnamed: 0_level_0,Analyzed,Errors,Impossible,% Analyzed,% Errors,% Impossible
Strategy,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
^Add.+ModifierTo.+$,61,207,118,16,54,31
^AddException.+ToConstructor.+In.+$,98,4,35,72,3,26
^AddException.+ToMethod.+In.+$,1406,1500,2794,25,26,49
^AddImplementedInterfaceToType.+$,136,0,0,100,0,0
^AddMethodToType.+$,32,57,0,36,64,0
^AddModifier.+ToField.+In.+$,544,0,608,47,0,53
^AddModifier.+ToMethod.+In.+$,9652,14896,3952,34,52,14
^AddParameter.+ToConstructor.+In.+$,2060,680,0,75,25,0
^AddParameter.+ToMethod.+In.+$,28160,76720,0,27,73,0
^AddRecordComponent.+ToRecord.+$,520,320,0,62,38,0


In [21]:
not_analyzed_strategies = sorted(set([s.pattern for s in STRATEGIES]) - set(analyzed_strategies['StrategyUniform'].tolist()))
print('Number of strategies not analyzed:', len(not_analyzed_strategies))
not_analyzed_strategies

Number of strategies not analyzed: 1


['^RemoveParameter.+FromConstructor.+In.+$']

In [22]:
not_errors_strategies = sorted(set([s.pattern for s in STRATEGIES]) - set(errors_strategies['StrategyUniform'].tolist()))
print('Number of strategies not in error:', len(not_errors_strategies))
not_errors_strategies

Number of strategies not in error: 12


['^AddImplementedInterfaceToType.+$',
 '^AddModifier.+ToField.+In.+$',
 '^AddSuperClassToClass.+$',
 '^ChangeField.+In.+TypeTo.+$',
 '^ChangeParameter.+To.+FromConstructor.+In.+$',
 '^ChangeRecordComponent.+To.+FromRecord.+$',
 '^Reduce.+VisibilityTo.+$',
 '^RemoveEnumValue.+In.+$',
 '^RemoveException.+FromConstructor.+In.+$',
 '^RemoveField.+In.+$',
 '^RemoveModifier.+ToField.+In.+$',
 '^RemoveRecordComponent.+FromRecord.+$']

In [23]:
possible_strategies = sorted(set([s.pattern for s in STRATEGIES]) - set(impossible_strategies['StrategyUniform'].tolist()))
print('Number of possible strategies:', len(possible_strategies))
possible_strategies

Number of possible strategies: 19


['^AddImplementedInterfaceToType.+$',
 '^AddMethodToType.+$',
 '^AddParameter.+ToConstructor.+In.+$',
 '^AddParameter.+ToMethod.+In.+$',
 '^AddRecordComponent.+ToRecord.+$',
 '^ChangeField.+In.+TypeTo.+$',
 '^ChangeParameter.+To.+FromConstructor.+In.+$',
 '^ChangeParameter.+To.+FromMethod.+In.+$',
 '^ChangeRecordComponent.+To.+FromRecord.+$',
 '^Reduce.+VisibilityTo.+$',
 '^RemoveConstructor.+In.+$',
 '^RemoveEnumValue.+In.+$',
 '^RemoveField.+In.+$',
 '^RemoveImplementedInterface.+FromType.+$',
 '^RemoveMethod.+In.+$',
 '^RemoveParameter.+FromConstructor.+In.+$',
 '^RemoveParameter.+FromMethod.+In.+$',
 '^RemoveRecordComponent.+FromRecord.+$',
 '^RemoveType.+$']

In [24]:
full_analyzed_strategies = sorted(set(not_errors_strategies) & set(possible_strategies))
print('Number of strategies fully analyzed:', len(full_analyzed_strategies))
full_analyzed_strategies

Number of strategies fully analyzed: 8


['^AddImplementedInterfaceToType.+$',
 '^ChangeField.+In.+TypeTo.+$',
 '^ChangeParameter.+To.+FromConstructor.+In.+$',
 '^ChangeRecordComponent.+To.+FromRecord.+$',
 '^Reduce.+VisibilityTo.+$',
 '^RemoveEnumValue.+In.+$',
 '^RemoveField.+In.+$',
 '^RemoveRecordComponent.+FromRecord.+$']

# Results by Strategy

In [25]:
for strategy in STRATEGIES:
    results_for_strategy = full_data[full_data['bench'].str.match(strategy.pattern)]

    print(f"{strategy.pattern.replace('.+', '')[1:-1]} ({len(results_for_strategy.index)} strategies)")
    print('-----------------------------------------')

    print(compute_metrics_for_breaking_type(results_for_strategy))
    print()

AddModifierTo (61 strategies)
-----------------------------------------
               accuracy  precision  recall   f1
tool   suffix                                  
Roseau Global       1.0        1.0     1.0  1.0
       Binary       1.0        1.0     1.0  1.0
       Source       1.0        1.0     1.0  1.0

AddExceptionToConstructorIn (98 strategies)
-----------------------------------------
               accuracy  precision  recall        f1
tool   suffix                                       
Roseau Global  0.979592   0.977528     1.0  0.988636
       Binary  1.000000  -1.000000    -1.0 -1.000000
       Source  0.979592   0.977528     1.0  0.988636

AddExceptionToMethodIn (1406 strategies)
-----------------------------------------
               accuracy  precision  recall        f1
tool   suffix                                       
Roseau Global  0.855619   0.784272     1.0  0.879095
       Binary  1.000000  -1.000000    -1.0 -1.000000
       Source  0.855619   0.784272     1