In [109]:
import pandas as pd 
import os
import json
import glob
import re
import sys
sys.setrecursionlimit(20000) # Current limit = 999

from CommitGraph import CommitGraph

In [110]:
def createDirIfNotExists(folder_name):
    if not os.path.isdir(folder_name): 
        os.makedirs(folder_name)

In [111]:
root="/home/jovyan/work"
analysis_results_path = root + "/analysis/results/"
createDirIfNotExists(analysis_results_path)

In [134]:
def searchRegression(graph, init_node):
    candidates = []
    visited = []
    queue = []   

    visited.append(init_node)
    queue.append(init_node)

    while queue:
        node = queue.pop(0) 

        parents = graph[node['commit']]['parents']

        successParents = True

        for parent_hash in parents:
            if parent_hash == "": continue
            parent = graph[parent_hash]
            successParent = parent['State'] == "TestSuccess"
            successParents = successParents and successParent
            if not successParent and parent_hash not in visited:
                if parent['State'] in ["BuildError", "TestBuildError"]:
                    candidates.append(node)
                if parent['State'] == "TestFail":
                    candidates = []
                visited.append(parent_hash)
                queue.append(parent)
    
        if successParents and node['State'] != "TestSuccess":
            candidates = candidates + [node]
    return candidates

In [113]:
def analyzeBug(project, bug_id):
    
    bug_name = "Bug_"+str(bug_id)
    bug_path = "{root}/results/{project}/{bug_name}/".format(root=root, project=project, bug_name=bug_name)
    
    with open("{root}/configFiles/{project}/bugs/{bug_name}.json".format(root=root,project=project, bug_name=bug_name)) as f:
        bug_info = json.load(f)
        
    test_name = re.search(r"-Dtest=(.*) test",bug_info['test_command']).group(1)

    bug_result = {
        'id': project + "_" + bug_name,
        'bug': bug_name,
        'project': project,
        "fix_pass": True,
        "prev_fails": True,
        "category": None,
        "sub_category": "-",
        "test_name": test_name,
        "bug_report": bug_info['bug_report'],
        "fix_commit": bug_info['fix_commit'],
        "BIC_candidates": []
    }
    
    if not os.path.isfile(bug_path+'commit_history.csv'):
        print(bug_path+'commit_history.csv')
        bug_result['category'] = "Other error"
        return bug_result
    
    results_dir = analysis_results_path+"{project}/{bug_name}/".format(project=project, bug_name=bug_name)
    createDirIfNotExists(results_dir)

    commit_graph = CommitGraph(project, bug_id, bug_path, results_dir, restore=True)
    
    fix_commit = commit_graph.graph[bug_info['fix_commit']]

    
    if not fix_commit['ExecuteTest']: # FIX COMMIT - SHOULD PASS
        
        bug_result['category'] = "Test fails in the fix commit"
        bug_result['fix_pass'] = False
        bug_result['prev_fails'] = None
        
        if fix_commit['Build']:
            if fix_commit['BuildTest']:
                if not fix_commit['HasTestReport']:
                    bug_result['sub_category'] = "The test was not executed"
                else:
                    bug_result['sub_category'] = "Test execution fails"
            else:
                bug_result['sub_category'] = "Failure in test build"
        else:
            bug_result['sub_category'] = "Failure in source build"
    
    else: 

        # SEARCH REGRESSION
        candidates = searchRegression(commit_graph.graph, fix_commit)
        if len(candidates) > 0:

            bug_result['BIC_candidates'] = list(map(lambda c: (c['id'],c['commit'] ),candidates))
            bug_result['category'] = "A regression is detected"

            if len(candidates) == 1:
                bug_result['sub_category'] = "Unique candidates" 
            else:
                bug_result['sub_category'] = "Multiple candidates" 

        # SEARCH OTHER BIC
        else:
            pass

#             success_test_build = [ r for r in results if r['BuildTest'] ]
#             candidate_commit = success_test_build[-1]
#             if candidate_commit['id']+1 == len(results):
#                 # Candidate commit is first commit
#                 bug_result['sub_category'] = "Reach first commit"
#             else:
#                 candidate_commit_antecesor = results[candidate_commit['id']+1]
#                 if candidate_commit_antecesor['Build']:
#                     bug_result['sub_category'] = "Failure in test build"
#                 else:
#                     bug_result['sub_category'] = "Failure in source build" 
#             bug_result['BIC_candidates'] = [(candidate_commit['commit'][0:8])]
#             bug_result['category'] = "No regression is detected"
    
    # Save bug result
    with open(results_dir+"bug_result.json",'w+') as json_file:
        json.dump(bug_result, json_file, indent=4)

    return bug_result

In [135]:
# Failure in test build JacksonDatabind Bug_17
# CASE 1: Regression - Bug at commit 162
analyzeBug("JacksonCore", 11)['BIC_candidates']

162


[(162, '30a2670b336c3a4e4dee57460577b4bcc4b9e9b6')]

In [118]:
# CASE 2: No Regression - No candidate commit
#analyzeBug("JacksonDatabind", 86)['BIC_candidates']

In [136]:
# CASE 3: Regresion but with buildability limitations
analyzeBug("JacksonDatabind", 38)['BIC_candidates']

92
378


[(161, '92e3b56a875d183c39aea24b1d9c876cc6a363b1'),
 (115, '8ee51ad3a84987359401719198d7c0bff597b1a3'),
 (162, '77d1b25b27f0536ad0559468c7902a1bfe6190b8'),
 (119, '9305cbb61f00c16c1fa8398d6af42dbbaca22d9a'),
 (163, '5ada97ce40217626cb2cdca011fe369d0be6e729'),
 (120, '1ed892ecabe31ff5ef11960dadc04b33388d43f7'),
 (164, '52ae85f72940673a6a54bfd18c836d18d07c243c'),
 (121, '5289e1245af4b63e1ac22c68049cf01d838db971'),
 (165, 'ff192c11f6574c9ceeae5f1e8f9a655ea262a828'),
 (124, 'be1136361aa2eb9ddf65f9af48dda36abc4a4cf0'),
 (166, 'ecbf40f900f454297b112225a6e9561d69269d14'),
 (125, '19ba37248e23156b7898b198d77ee75e6ec6c06e'),
 (167, '0e4249a2b6cd4ce71a2980b50dcd9765ad03324c'),
 (126, 'f54f0f1f64903c5b479b0b42d2d0db6f308d0f31'),
 (168, '35715752711100f3e476f6ce135f9fb27d656f81'),
 (127, '4a8e9b4279131ee3504119c9d94c732776058abe'),
 (169, '756a6a076db2f63072e767c3ec3311e0b554db30'),
 (128, 'c4ad251ef7e5d9e646ed4ea5351bc272e9c236a9'),
 (170, '77438e99339674758b5430e18951d8b33abf0d64'),
 (129, '20a6

In [122]:
# projects = [
#     "JacksonXml", "Time", "Collections", "Compress", "Csv", "JacksonCore", "JacksonDatabind", "Gson", "Jsoup"
# ]
# bug_results = []
# for project in projects:
#     # FOR EACH BUG
#     for bug_path in glob.glob("{root}/results/{project}/Bug_*/".format(root=root, project=project)):
#         bug = re.search(r"Bug_(\d+)", bug_path).group(1)
#         try:
#             bug_result = analyzeBug(project, bug)
#             bug_results.append(bug_result)  
#         except Exception as e:
#             print(bug_path)
#             print(e)

In [None]:
df = pd.DataFrame(bug_results)[['id', 'category', 'sub_category']]
df[df['category'] != None].set_index('id').sort_index()
print(len(df))
df['category'].value_counts()

In [None]:
#df[df['category']=='Test failed at fix commit']
df.groupby(['category', "sub_category"]).count()['id']

In [None]:
df_projects = pd.DataFrame(bug_results)[['id', 'category', 'sub_category', 'project']]
df_projects[df_projects['project']=="Jsoup"].groupby(['category', "sub_category"]).count()['id']

In [None]:
df[df['category']=='Test fails in the fix commit'][df['sub_category']=='Test execution fails']