In [39]:
import pandas as pd 
import os
import json
import glob
import re
import sys
import concurrent.futures
from junitparser import JUnitXml, Failure, Error, Skipped
sys.setrecursionlimit(20000) # Current limit = 999

from CommitGraph import CommitGraph

In [40]:
def createDirIfNotExists(folder_name):
    if not os.path.isdir(folder_name): 
        os.makedirs(folder_name)

In [41]:
root="/home/jovyan/work"
analysis_results_path = root + "/analysis/results/"
createDirIfNotExists(analysis_results_path)

In [66]:
def searchRegression(graph, init_node):
    paths = []
    candidates = []
    visited = []
    queue = []   

    visited.append(init_node)
    queue.append(init_node)

    while queue:
        node = queue.pop()  

        successParents = True
        parents = graph[node['commit']]['parents']

        if node['State'] == "TestFail":
            candidates = []

        for parent_hash in parents:
            if parent_hash not in graph: # Reach first commit
                successParents = False
                if len(queue)==0:
                    break
                else: 
                    continue # Check other branches
            parent = graph[parent_hash]
            successParent = parent['State'] == "TestSuccess"
            successParents = successParents and successParent
            if not successParent:
                if parent['State'] in ["BuildError", "TestBuildError"]:
                    candidates.append(node)
                if parent_hash not in visited:
                    visited.append(parent_hash)
                    queue.append(parent)
    
        if successParents and node['State'] != "TestSuccess":
            
            if node['State'] == 'TestFail': 
                pass
                #return [node]
            else:
                candidates = candidates + [node]
                if len(queue)==0:
                    return candidates
                else:
                    paths.append(candidates)
    
    if len(paths) > 0: return paths[0]
    return []

In [52]:
def analyzeTestReport(test_method, report_path):
    # APPLY THIS TO EACH COMMIT WITH FAILED TEST
    xml = JUnitXml.fromfile(report_path)
    test_case = None
    for tc in xml:
        if test_method == tc.name:
            test_case = tc
            break
    if test_case is None: raise Exception("Test case not found")

    result = {
        'name': test_method,
        'success': True,
        'failure': False,
        'failure_msg': None,
        'error': False,
        'error_msg': None,
        'skipped': False
    }
    for elem in tc:
        result['success'] = False
        if elem.__class__ is Failure:
            result['failure'] = True
            result['failure_msg'] = elem.type
        if elem.__class__ is Error:
            result['error'] = True
            result['error_msg'] = elem.message
        if elem.__class__ is Skipped:
            result['skipped'] = True

    return result



In [56]:
def analyzeBug(project, bug_id):
    
    bug_name = "Bug_"+str(bug_id)
    bug_path = "{root}/results/{project}/{bug_name}/".format(root=root, project=project, bug_name=bug_name)
    
    with open("{root}/configFiles/{project}/bugs/{bug_name}.json".format(root=root,project=project, bug_name=bug_name)) as f:
        bug_info = json.load(f)
        
    test_name = re.search(r"-Dtest=(.*) test",bug_info['test_command']).group(1)

    bug_result = {
        'id': project + "_" + bug_name,
        'bug': bug_name,
        'project': project,
        "fix_pass": True,
        "prev_fails": True,
        "category": None,
        "sub_category": "-",
        "test_name": test_name,
        "bug_report": bug_info['bug_report'],
        "fix_commit": bug_info['fix_commit'],
        "BIC_candidates": []
    }
    
    if not os.path.isfile(bug_path+'commit_history.csv'):
        print(bug_path+'commit_history.csv')
        bug_result['category'] = "Other error"
        return bug_result
    
    results_dir = analysis_results_path+"{project}/{bug_name}/".format(project=project, bug_name=bug_name)
    createDirIfNotExists(results_dir)

    commit_graph = CommitGraph(project, bug_id, bug_path, results_dir, restore=True)
    
    fix_commit = commit_graph.graph[bug_info['fix_commit']]

    
    if not fix_commit['ExecuteTest']: # FIX COMMIT - SHOULD PASS
        
        bug_result['category'] = "Test fails in the fix commit"
        bug_result['fix_pass'] = False
        bug_result['prev_fails'] = None
        
        if fix_commit['Build']:
            if fix_commit['BuildTest']:
                if not fix_commit['HasTestReport']:
                    bug_result['sub_category'] = "The test was not executed"
                else:
                    bug_result['sub_category'] = "Test execution fails"
                    test_method = test_name.split("#")[1]
                    analyzeTestReport(test_method, bug_path+"commits/0-"+fix_commit['commit']+"/test-report.xml")
                     
            else:
                bug_result['sub_category'] = "Failure in test build"
        else:
            bug_result['sub_category'] = "Failure in source build"
    
    else: 

        # SEARCH REGRESSION
        candidates = searchRegression(commit_graph.graph, fix_commit)
        if len(candidates) > 0:

            bug_result['BIC_candidates'] = list(map(lambda c: (c['id'],c['commit'] ),candidates))
            bug_result['category'] = "A regression is detected"

            if len(candidates) == 1:
                bug_result['sub_category'] = "Unique candidates" 
            else:
                bug_result['sub_category'] = "Multiple candidates" 

        # SEARCH OTHER BIC
        else:
            bug_result['category'] = "No regression is detected"
            bug_result['sub_category'] = "-"

#             success_test_build = [ r for r in results if r['BuildTest'] ]
#             candidate_commit = success_test_build[-1]
#             if candidate_commit['id']+1 == len(results):
#                 # Candidate commit is first commit
#                 bug_result['sub_category'] = "Reach first commit"
#             else:
#                 candidate_commit_antecesor = results[candidate_commit['id']+1]
#                 if candidate_commit_antecesor['Build']:
#                     bug_result['sub_category'] = "Failure in test build"
#                 else:
#                     bug_result['sub_category'] = "Failure in source build" 
#             bug_result['BIC_candidates'] = [(candidate_commit['commit'][0:8])]
#             bug_result['category'] = "No regression is detected"
    
    # Save bug result
    with open(results_dir+"bug_result.json",'w+') as json_file:
        json.dump(bug_result, json_file, indent=4)

    return bug_result

In [26]:
# CASE 1: Regression
candidates = analyzeBug("JacksonCore", 11)['BIC_candidates']
assert candidates[0][0] == 162

In [27]:
# CASE 1.1: Regression - Case when algorithm reach first commit and need to check remaining in queue
candidates = analyzeBug("JacksonCore", 10)['BIC_candidates']
assert candidates[0][0] == 99

In [28]:
# CASE 1.2: Regression - Could be more candidates
candidates = analyzeBug("JacksonDatabind", 59)['BIC_candidates']
assert set([146]) <= set(map(lambda c: c[0],candidates))

In [29]:
# CASE 2: No Regression - No candidate commit
candidates = analyzeBug("JacksonDatabind", 86)['BIC_candidates']
assert len(candidates) == 0

In [30]:
# CASE 2.1: No Regression - No candidate commit
candidates = analyzeBug("Collections", 28)['BIC_candidates']
assert len(candidates) == 0

In [31]:
# CASE 3: Regresion but with buildability limitations
candidates = analyzeBug("JacksonDatabind", 52)['BIC_candidates']
assert set([905, 906, 907, 908, 909, 910, 911, 912, 915, 919, 920, 921, 922, 923, 924, 925, 926, 927]) <= set(map(lambda c: c[0],candidates))

In [63]:
# CASE 3.1: Regresion but with buildability limitations
candidates = analyzeBug("JacksonDatabind", 38)['BIC_candidates']
assert set([70]) <= set(map(lambda c: c[0],candidates))

In [64]:
projects = [
    "JacksonXml", "Time", "Collections", "Compress", "Csv", "JacksonCore", "JacksonDatabind", "Gson", "Jsoup"
]
future_results = []
with concurrent.futures.ThreadPoolExecutor() as executor:
    for project in projects:
        # FOR EACH BUG
        for bug_path in glob.glob("{root}/results/{project}/Bug_*/".format(root=root, project=project)):
            bug = re.search(r"Bug_(\d+)", bug_path).group(1)
            try:
                future = executor.submit(analyzeBug, project, bug)
                future_results.append(future)
            except Exception as e:
                print(bug_path)
                print(e)
bug_results = [future.result() for future in future_results]

In [65]:
df = pd.DataFrame(bug_results)[['id', 'category', 'sub_category']]
df[df['category'] != None].set_index('id').sort_index()
print(len(df))
df['category'].value_counts()

348


No regression is detected       260
Test fails in the fix commit     47
A regression is detected         41
Name: category, dtype: int64

In [20]:
#df[df['category']=='Test failed at fix commit']
df.groupby(['category', "sub_category"]).count()['id']

category                      sub_category             
A regression is detected      Multiple candidates           19
                              Unique candidates             22
No regression is detected     -                            260
Test fails in the fix commit  Failure in source build       22
                              Failure in test build          3
                              Test execution fails           6
                              The test was not executed     16
Name: id, dtype: int64

In [17]:
df_projects = pd.DataFrame(bug_results)[['id', 'category', 'sub_category', 'project']]
df_projects[df_projects['project']=="J"].groupby(['category', "sub_category"]).count()['id']

Series([], Name: id, dtype: int64)

In [23]:
#df[df['category']=='Test fails in the fix commit'][df['sub_category']=='Test execution fails']
df[df['category']=='A regression is detected'][df['sub_category']=='Unique candidates']

  df[df['category']=='A regression is detected'][df['sub_category']=='Unique candidates']


Unnamed: 0,id,category,sub_category
9,Time_Bug_1,A regression is detected,Unique candidates
37,Compress_Bug_28,A regression is detected,Unique candidates
72,Compress_Bug_45,A regression is detected,Unique candidates
106,JacksonCore_Bug_11,A regression is detected,Unique candidates
115,JacksonCore_Bug_10,A regression is detected,Unique candidates
118,JacksonCore_Bug_21,A regression is detected,Unique candidates
154,JacksonDatabind_Bug_24,A regression is detected,Unique candidates
172,JacksonDatabind_Bug_35,A regression is detected,Unique candidates
180,JacksonDatabind_Bug_41,A regression is detected,Unique candidates
201,JacksonDatabind_Bug_87,A regression is detected,Unique candidates
