## Reproduction of Table 4

This notebook reproduces the values in Table 4 (project-specific reproduction performance). 

In [7]:
import sys
sys.path.append("../../scripts/")
import json
import pandas

In [8]:
with open('../../data/Defects4J/all_d4j_crashes.txt') as f:
    jcrashpack_bugs = [e.strip().replace('-', '_') for e in f.readlines()]

with open('../../data/Defects4J/invalid_bug_reports.txt') as f:
    invalid_bugs = [e.strip().replace('-', '_') for e in f.readlines()]

In [9]:
def evaluate(raw_result, only_crash=False):
    rows = []
    
    for bug_id, test_exec_results in raw_result.items():
        if bug_id in invalid_bugs:
            continue
        if only_crash and bug_id not in jcrashpack_bugs:
            continue
            
        for i, (fname, res) in enumerate(test_exec_results.items()):
            javalang_parse_error = False
            is_compile_error = False
            is_runtime_error = False
            buggy_version_failing = False
            fixed_version_failing = False
            success = False

            if isinstance(res, str):
                javalang_parse_error = True 
            elif res['buggy']['compile_error'] or res['fixed']['compile_error']:
                is_compile_error = True
            elif res['buggy']['runtime_error'] or res['fixed']['runtime_error']:
                is_runtime_error = True
            else:
                if res['buggy']['autogen_failed']:
                    buggy_version_failing = True
                if res['fixed']['autogen_failed']:
                    fixed_version_failing = True
                if buggy_version_failing and (not fixed_version_failing):
                    success = True
            
            rows.append({
                'project': bug_id.split('_')[0],
                'bug_id': bug_id,
                'test_no': i+1,
                'javalang_parse_error': javalang_parse_error,
                'is_compile_error': is_compile_error,
                'is_runtime_error': is_runtime_error,
                'buggy_version_failing': buggy_version_failing,
                'fixed_version_failing': fixed_version_failing,
                'fname': fname,
                'success': success,
            })
    
    return pandas.DataFrame(rows)

In [10]:
RESULT_PATH = '../../results/example2_n50.json'

In [11]:
with open(RESULT_PATH) as f:
    reproduction_df = evaluate(json.load(f))

aggr_eval_df = reproduction_df.groupby('bug_id').sum().reset_index()
aggr_eval_df['project'] = aggr_eval_df.bug_id.apply(lambda x: x.split('_')[0])
succeeded_bugs = aggr_eval_df[aggr_eval_df.success > 0]

total_succ = 0
total_bugs = 0
tab_results = []
for project in sorted(set(succeeded_bugs.project.tolist())):
    succeeded_bug_ids = succeeded_bugs[succeeded_bugs.project == project].bug_id.tolist()
    succ_num = len(succeeded_bug_ids)
    total_succ += succ_num
    total_num = (aggr_eval_df.project == project).sum()
    total_bugs += total_num
    tab_results.append({'Project': project, 'rep': succ_num, 'total': total_num})
tab_results.append({'Project': 'Total', 'rep': total_succ, 'total': total_bugs})

tab_df = pandas.DataFrame(tab_results)
tab_df

Unnamed: 0,Project,rep,total
0,Chart,5,7
1,Cli,14,29
2,Closure,2,172
3,Codec,10,18
4,Collections,1,4
5,Compress,4,46
6,Csv,6,16
7,Gson,7,11
8,JacksonCore,8,24
9,JacksonDatabind,30,107
