In [1]:
import pandas as pd 
import os
import csv
import json
import glob
import re

In [2]:
def createDirIfNotExists(folder_name):
    if not os.path.isdir(folder_name): 
        os.makedirs(folder_name)

In [3]:
root="/home/jovyan/work"
analysis_results_path = root + "/analysis/results/"
createDirIfNotExists(analysis_results_path)

In [4]:
def searchRegression(results):
    candidates = []
    success_results = [ r for r in results if r['ExecuteTest'] ]
    if len(success_results) > 1:
        first_success_found = success_results[1]
        
        # BORDER CASE: REGRESSION IS IN PREV COMMIT
        if first_success_found['id'] == 2:
            candidates.append(results[1]['commit'][0:8])
            
        for result in reversed(results[2:first_success_found['id']]):
            candidates.append(result['commit'][0:8])
            if result['Build'] and result['BuildTest']: break
                
    return candidates

In [30]:
def analyzeBug(project, bug_path):
    bug_name = re.search(r"Bug_(\d+)", bug_path).group(0)
    bug_result = {
        'id': project + "_" + bug_name,
        'project': project,
        "fix_pass": True,
        "prev_fails": True,
        "Type": None,
        "BIC_candidates": [],
        "info": "-"
    }
    results = []
    
    if not os.path.isfile(bug_path+'commit_history.csv'):
        bug_result['Type'] = "Other error"
        return bug_result
    
    # FOR EACH COMMIT
    for index, row in pd.read_csv(bug_path+'commit_history.csv').iterrows():
        commit_path = bug_path+"commits/{id}-{c_hash}/".format(id=row['id'], c_hash=row['hash'])
        with open(commit_path+"result.json") as f:
            raw_result = json.load(f)
            result = {
                'id': index,
                'commit': row['hash'],
                'Build': raw_result['isSourceBuildSuccess'],
                'BuildTest': raw_result['isTestBuildSuccess'],
                'ExecuteTest': raw_result['isTestExecutionSuccess']
            }
            results.append(result)  
            
        if index == 0 and not result['ExecuteTest']: # FIX COMMIT - SHOULD PASS
            bug_result['Type'] = "Test fails in the fix commit"
            bug_result['fix_pass'] = False
            bug_result['prev_fails'] = None
            
            if result['Build']:
                if result['BuildTest']:
                    if not os.path.isfile(commit_path+"test-report.xml"):
                        bug_result['info'] = "The test was not executed"
                    else:
                        bug_result['info'] = "Test execution fails"
                else:
                    bug_result['info'] = "Failure in test build"
            else:
                bug_result['info'] = "Failure in source build"
            break
            
            
        if index == 1 and result['ExecuteTest']: # PREV COMMIT - SHOULD NOT PASS
            bug_result['Type'] = "Test passes in the commit prior to the fix"
            bug_result['prev_fails'] = False
            break

    results_dir = analysis_results_path+"{project}/{bug_name}/".format(project=project, bug_name=bug_name)
    createDirIfNotExists(results_dir)
    
    with open(results_dir+"results.csv", 'w+') as csvfile: 
        writer = csv.DictWriter(csvfile, fieldnames = results[0].keys()) 
        writer.writeheader()
        writer.writerows(results)
    
    # SEARCH FOR BIC IF FIX COMMIT PASS AND PREV COMMIT FAILS
    if bug_result['fix_pass'] and bug_result['prev_fails']:
        
        # SEARCH REGRESSION
        candidates = searchRegression(results)
        if len(candidates) > 0:
        
            bug_result['BIC_candidates'] = candidates
            bug_result['Type'] = "A regression is detected"
            
            if len(candidates) == 1:
                bug_result['info'] = "Unique candidates" 
            else:
                bug_result['info'] = "Multiple candidates" 
            
        # SEARCH OTHER BIC
        else:
            
            success_test_build = [ r for r in results if r['BuildTest'] ]
            candidate_commit = success_test_build[-1]
            if candidate_commit['id']+1 == len(results):
                # Candidate commit is first commit
                bug_result['info'] = "Reach first commit"
            else:
                candidate_commit_antecesor = results[candidate_commit['id']+1]
                if candidate_commit_antecesor['Build']:
                    bug_result['info'] = "Failure in test build"
                else:
                    bug_result['info'] = "Failure in source build" 
            bug_result['BIC_candidates'] = [(candidate_commit['commit'][0:8])]
            bug_result['Type'] = "No regression is detected"
        
    
    return bug_result

In [31]:
project_test = "Time"
bug=23
analyzeBug(project_test, "{root}/results/{project}/Bug_{id}/".format(root=root, project=project_test, id=bug))

{'id': 'Time_Bug_23',
 'project': 'Time',
 'fix_pass': False,
 'prev_fails': None,
 'Type': 'Test fails in the fix commit',
 'BIC_candidates': [],
 'info': 'The test was not executed'}

In [32]:
projects = [
    "JacksonXml", "Time", "Collections", "Compress", "Csv", "JacksonCore", "JacksonDatabind", "Gson"
]
bug_results = []
for project in projects:
    # FOR EACH BUG
    for bug_path in glob.glob("{root}/results/{project}/Bug_*/".format(root=root, project=project)):
        try:
            bug_result = analyzeBug(project, bug_path)
            bug_results.append(bug_result)  
        except Exception as e:
            print(bug_path)
            print(e)

In [33]:
df = pd.DataFrame(bug_results)
df[df['Type'] != None].set_index('id').sort_index()
print(len(df))
df['Type'].value_counts()

255


No regression is detected                     163
Test fails in the fix commit                   45
A regression is detected                       35
Test passes in the commit prior to the fix     12
Name: Type, dtype: int64

In [34]:
#df[df['Type']=='Test failed at fix commit']
df.groupby(['Type', "info"]).count()['id']

Type                                        info                     
A regression is detected                    Multiple candidates           10
                                            Unique candidates             25
No regression is detected                   Failure in source build      116
                                            Failure in test build         45
                                            Reach first commit             2
Test fails in the fix commit                Failure in source build       22
                                            Failure in test build          3
                                            Test execution fails           4
                                            The test was not executed     16
Test passes in the commit prior to the fix  -                             12
Name: id, dtype: int64

In [37]:
#df[df['info']=='Multiple candidates']
df[df['Type']=='Test passes in the commit prior to the fix']

Unnamed: 0,id,project,fix_pass,prev_fails,Type,BIC_candidates,info
45,Compress_Bug_31,Compress,True,False,Test passes in the commit prior to the fix,[],-
110,JacksonCore_Bug_15,JacksonCore,True,False,Test passes in the commit prior to the fix,[],-
138,JacksonDatabind_Bug_74,JacksonDatabind,True,False,Test passes in the commit prior to the fix,[],-
219,JacksonDatabind_Bug_78,JacksonDatabind,True,False,Test passes in the commit prior to the fix,[],-
239,Gson_Bug_6,Gson,True,False,Test passes in the commit prior to the fix,[],-
240,Gson_Bug_9,Gson,True,False,Test passes in the commit prior to the fix,[],-
243,Gson_Bug_4,Gson,True,False,Test passes in the commit prior to the fix,[],-
244,Gson_Bug_8,Gson,True,False,Test passes in the commit prior to the fix,[],-
246,Gson_Bug_2,Gson,True,False,Test passes in the commit prior to the fix,[],-
248,Gson_Bug_3,Gson,True,False,Test passes in the commit prior to the fix,[],-
