In [1]:
import pandas as pd 
import os
import csv
import json
import glob
import re

In [2]:
def createDirIfNotExists(folder_name):
    if not os.path.isdir(folder_name): 
        os.makedirs(folder_name)

In [3]:
root="/home/jovyan/work"
analysis_results_path = root + "/analysis/results/"
createDirIfNotExists(analysis_results_path)

In [4]:
def searchRegression(results):
    candidates = []
    success_results = [ r for r in results if r['ExecuteTest'] ]
    if len(success_results) > 1:
        first_success_found = success_results[1]
        
        # BORDER CASE: REGRESSION IS IN PREV COMMIT
        if first_success_found['id'] == 2:
            candidates.append(results[1]['commit'][0:8])
            
        for result in reversed(results[2:first_success_found['id']]):
            candidates.append(result['commit'][0:8])
            if result['Build'] and result['BuildTest']: break
                
    return candidates

In [5]:
def analyzeBug(project, bug_path):
    bug_name = re.search(r"Bug_(\d+)", bug_path).group(0)
    bug_result = {
        'id': project + "_" + bug_name,
#         'project': project,
        "fix_pass": True,
        "prev_fails": True,
        "Type": None,
        "BIC_candidates": []
    }
    results = []
    # FOR EACH COMMIT
    for index, row in pd.read_csv(bug_path+'commit_history.csv').iterrows():
        with open(bug_path+"commits/{id}-{c_hash}/result.json".format(id=row['id'], c_hash=row['hash'])) as f:
            result = json.load(f)
            results.append({
                'id': index,
                'commit': row['hash'],
                'Build': result['isSourceBuildSuccess'],
                'BuildTest': result['isTestBuildSuccess'],
                'ExecuteTest': result['isTestExecutionSuccess']
            })  
            
        if index == 0 and not result['isTestExecutionSuccess']: # FIX COMMIT - SHOULD PASS
            bug_result['Type'] = "Failed FIX"
            bug_result['fix_pass'] = False
            bug_result['prev_fails'] = None
            break
            
        if index == 1 and result['isTestExecutionSuccess']: # PREV COMMIT - SHOULD NOT PASS
            bug_result['Type'] = "Flaky test"
            bug_result['prev_fails'] = False
            break
            
    results_dir = analysis_results_path+"{project}/{bug_name}/".format(project=project, bug_name=bug_name)
    createDirIfNotExists(results_dir)
    
    with open(results_dir+"results.csv", 'w+') as csvfile: 
        writer = csv.DictWriter(csvfile, fieldnames = results[0].keys()) 
        writer.writeheader()
        writer.writerows(results)
    
    # SEARCH FOR BIC IF FIX COMMIT PASS AND PREV COMMIT FAILS
    if bug_result['fix_pass'] and bug_result['prev_fails']:
        
        
        # SEARCH REGRESSION
        candidates = searchRegression(results)
        if len(candidates) > 0:
        
            bug_result['BIC_candidates'] = candidates
            bug_result['Type'] = "Regression"
            
        # SEARCH OTHER BIC
        else:
            
            success_test_build = [ r for r in results if r['BuildTest'] ]
            bug_result['BIC_candidates'] = [success_test_build[-1]['commit'][0:8]]
            bug_result['Type'] = "BuggyFeatureSinceCreation"
        
    
    return bug_result

In [6]:
project_test = "Compress"
analyzeBug(project_test, "{root}/results/{project}/Bug_1/".format(root=root, project=project_test))

{'id': 'Compress_Bug_1',
 'fix_pass': True,
 'prev_fails': True,
 'Type': 'BuggyFeatureSinceCreation',
 'BIC_candidates': ['ca165390']}

In [9]:
projects = ["JacksonXml", "Time", "Collections", "Compress", "Csv", "JacksonCore", "JacksonDatabind", "Gson"]
bug_results = []
for project in projects:
    # FOR EACH BUG
    for bug_path in glob.glob("{root}/results/{project}/Bug_*/".format(root=root, project=project)):
        bug_result = analyzeBug(project, bug_path)
        bug_results.append(bug_result)    

In [10]:
df = pd.DataFrame(bug_results)
df.set_index('id').sort_index()

Unnamed: 0_level_0,fix_pass,prev_fails,Type,BIC_candidates
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Collections_Bug_25,True,True,BuggyFeatureSinceCreation,[2d00ed46]
Collections_Bug_26,False,,Failed FIX,[]
Collections_Bug_27,False,,Failed FIX,[]
Collections_Bug_28,True,True,BuggyFeatureSinceCreation,[15ad7824]
Compress_Bug_1,True,True,BuggyFeatureSinceCreation,[ca165390]
Csv_Bug_1,True,True,BuggyFeatureSinceCreation,[38741a48]
Gson_Bug_1,True,True,BuggyFeatureSinceCreation,[57d1f32d]
Gson_Bug_10,True,False,Flaky test,[]
Gson_Bug_11,True,True,BuggyFeatureSinceCreation,[7ea5cc2b]
Gson_Bug_12,True,True,Regression,[b2c00a3b]
