In [2]:
import pandas as pd 
import os
import csv
import json
import glob
import re

In [3]:
def createDirIfNotExists(folder_name):
    if not os.path.isdir(folder_name): 
        os.makedirs(folder_name)

In [4]:
root="/home/jovyan/work"
analysis_results_path = root + "/analysis/results/"
createDirIfNotExists(analysis_results_path)

In [5]:
def searchRegression(results):
    candidates = []
    success_results = [ r for r in results if r['ExecuteTest'] ]
    if len(success_results) > 1:
        first_success_found = success_results[1]
        
        # BORDER CASE: REGRESSION IS IN PREV COMMIT
        if first_success_found['id'] == 2:
            candidates.append(results[1]['commit'][0:8])
            
        for result in reversed(results[2:first_success_found['id']]):
            candidates.append(result['commit'][0:8])
            if result['Build'] and result['BuildTest']: break
                
    return candidates

In [77]:
def generateResults(project, bug_path):
    results = []
    
    # FOR EACH COMMIT
    for index, row in pd.read_csv(bug_path+'commit_history.csv').iterrows():
        commit_path = bug_path+"commits/{id}-{c_hash}/".format(id=row['id'], c_hash=row['hash'])
        if os.path.isfile(commit_path+"result.json"):
            with open(commit_path+"result.json") as f:
                raw_result = json.load(f)
                result = {
                    'id': row['id'],
                    'commit': row['hash'],
                    'Build': raw_result['isSourceBuildSuccess'],
                    'BuildTest': raw_result['isTestBuildSuccess'],
                    'ExecuteTest': raw_result['isTestExecutionSuccess'],
                    'HasTestReport': os.path.isfile(commit_path+"test-report.xml"),
                    'parents': []
                }
                results.append(result)  
            
    return results

In [93]:
def analyzeBug(project, bug_id):
    
    bug_name = "Bug_"+str(bug_id)
    bug_path = "{root}/results/{project}/{bug_name}/".format(root=root, project=project, bug_name=bug_name)
    
    with open("{root}/configFiles/{project}/bugs/{bug_name}.json".format(root=root,project=project, bug_name=bug_name)) as f:
        bug_info = json.load(f)
        
    test_name = re.search(r"-Dtest=(.*) test",bug_info['test_command']).group(1)

    bug_result = {
        'id': project + "_" + bug_name,
        'bug': bug_name,
        'project': project,
        "fix_pass": True,
        "prev_fails": True,
        "category": None,
        "test_name": test_name,
        "bug_report": bug_info['bug_report'],
        "fix_commit": bug_info['fix_commit'],
        "BIC_candidates": [],
        "sub_category": "-"
    }
    
    if not os.path.isfile(bug_path+'commit_history.csv'):
        print(bug_path+'commit_history.csv')
        bug_result['category'] = "Other error"
        return bug_result
    
    results_dir = analysis_results_path+"{project}/{bug_name}/".format(project=project, bug_name=bug_name)
    createDirIfNotExists(results_dir)

    results = generateResults(project,bug_path)
    
    fix_result = results[0]
    
    if not fix_result['ExecuteTest']: # FIX COMMIT - SHOULD PASS
        
        bug_result['category'] = "Test fails in the fix commit"
        bug_result['fix_pass'] = False
        bug_result['prev_fails'] = None
        
        if fix_result['Build']:
            if fix_result['BuildTest']:
                if not fix_result['HasTestReport']:
                    bug_result['sub_category'] = "The test was not executed"
                else:
                    bug_result['sub_category'] = "Test execution fails"
            else:
                bug_result['sub_category'] = "Failure in test build"
        else:
            bug_result['sub_category'] = "Failure in source build"
    
    else: 
        
        prev_result = results[1]
        
        if prev_result['ExecuteTest']: # PREV COMMIT - SHOULD NOT PASS
            bug_result['category'] = "Test passes in the commit prior to the fix"
            bug_result['prev_fails'] = False
        else:
        
            # SEARCH FOR BIC IF FIX COMMIT PASS AND PREV COMMIT FAILS
            if bug_result['fix_pass'] and bug_result['prev_fails']:

                # SEARCH REGRESSION
                candidates = searchRegression(results)
                if len(candidates) > 0:

                    bug_result['BIC_candidates'] = candidates
                    bug_result['category'] = "A regression is detected"

                    if len(candidates) == 1:
                        bug_result['sub_category'] = "Unique candidates" 
                    else:
                        bug_result['sub_category'] = "Multiple candidates" 

                # SEARCH OTHER BIC
                else:

                    success_test_build = [ r for r in results if r['BuildTest'] ]
                    candidate_commit = success_test_build[-1]
                    if candidate_commit['id']+1 == len(results):
                        # Candidate commit is first commit
                        bug_result['sub_category'] = "Reach first commit"
                    else:
                        candidate_commit_antecesor = results[candidate_commit['id']+1]
                        if candidate_commit_antecesor['Build']:
                            bug_result['sub_category'] = "Failure in test build"
                        else:
                            bug_result['sub_category'] = "Failure in source build" 
                    bug_result['BIC_candidates'] = [(candidate_commit['commit'][0:8])]
                    bug_result['category'] = "No regression is detected"
    
    # Save bug result
    with open(results_dir+"bug_result.json",'w+') as json_file:
        json.dump(bug_result, json_file, indent=4)
    
    # SAVE COMMIT HISTORY
    with open(results_dir+"commit_history_results.csv", 'w+') as csvfile: 
        writer = csv.DictWriter(csvfile, fieldnames = results[0].keys()) 
        writer.writeheader()
        writer.writerows(results)

    return bug_result

In [94]:
project_test = "Time"#"JacksonCore"
bug=23#11
analyzeBug(project_test, bug)

{'id': 'Time_Bug_23',
 'bug': 'Bug_23',
 'project': 'Time',
 'fix_pass': False,
 'prev_fails': None,
 'category': 'Test fails in the fix commit',
 'test_name': 'org.joda.time.TestDateTimeZone#testForID_String_old',
 'bug_report': 'https://sourceforge.net/p/joda-time/bugs/112',
 'fix_commit': '14dedcbc04682c1b1b6c5ebe91bc930b79eeb572',
 'BIC_candidates': [],
 'sub_category': 'The test was not executed'}

In [95]:
projects = [
    "JacksonXml", "Time", "Collections", "Compress", "Csv", "JacksonCore", "JacksonDatabind", "Gson", "Jsoup"
]
bug_results = []
for project in projects:
    # FOR EACH BUG
    for bug_path in glob.glob("{root}/results/{project}/Bug_*/".format(root=root, project=project)):
        bug = re.search(r"Bug_(\d+)", bug_path).group(1)
        try:
            bug_result = analyzeBug(project, bug)
            bug_results.append(bug_result)  
        except Exception as e:
            print(bug_path)
            print(e)

In [96]:
df = pd.DataFrame(bug_results)[['id', 'category', 'sub_category']]
df[df['category'] != None].set_index('id').sort_index()
print(len(df))
df['category'].value_counts()

348


No regression is detected                     246
Test fails in the fix commit                   47
A regression is detected                       43
Test passes in the commit prior to the fix     12
Name: category, dtype: int64

In [18]:
#df[df['category']=='Test failed at fix commit']
df.groupby(['category', "sub_category"]).count()['id']

category                                    sub_category             
A regression is detected                    Multiple candidates           10
                                            Unique candidates             33
No regression is detected                   Failure in source build      117
                                            Failure in test build        127
                                            Reach first commit             2
Test fails in the fix commit                Failure in source build       22
                                            Failure in test build          3
                                            Test execution fails           6
                                            The test was not executed     16
Test passes in the commit prior to the fix  -                             12
Name: id, dtype: int64

In [19]:
df_projects = pd.DataFrame(bug_results)[['id', 'category', 'sub_category', 'project']]
df_projects[df_projects['project']=="Jsoup"].groupby(['category', "sub_category"]).count()['id']

category                      sub_category           
A regression is detected      Unique candidates           8
No regression is detected     Failure in source build     1
                              Failure in test build      82
Test fails in the fix commit  Test execution fails        2
Name: id, dtype: int64

In [21]:
df[df['category']=='Test fails in the fix commit'][df['sub_category']=='Test execution fails']

  df[df['category']=='Test fails in the fix commit'][df['sub_category']=='Test execution fails']


Unnamed: 0,id,category,sub_category
12,Time_Bug_9,Test fails in the fix commit,Test execution fails
16,Time_Bug_8,Test fails in the fix commit,Test execution fails
23,Time_Bug_7,Test fails in the fix commit,Test execution fails
26,Time_Bug_16,Test fails in the fix commit,Test execution fails
257,Jsoup_Bug_67,Test fails in the fix commit,Test execution fails
334,Jsoup_Bug_78,Test fails in the fix commit,Test execution fails
