In [1]:
import pandas as pd 
import os
import csv
import json
import glob
import re
import sys
import graphviz
from CommitGraph import CommitGraph

In [2]:
def createDirIfNotExists(folder_name):
    if not os.path.isdir(folder_name): 
        os.makedirs(folder_name)

In [3]:
root="/home/jovyan/work"
analysis_results_path = root + "/analysis/results/"
createDirIfNotExists(analysis_results_path)

In [4]:
# def searchRegression(results):
#     candidates = []
#     success_results = [ r for r in results if r['ExecuteTest'] ]
#     if len(success_results) > 1:
#         first_success_found = success_results[1]
        
#         # BORDER CASE: REGRESSION IS IN PREV COMMIT
#         if first_success_found['id'] == 2:
#             candidates.append(results[1]['commit'][0:8])
            
#         for result in reversed(results[2:first_success_found['id']]):
#             candidates.append(result['commit'][0:8])
#             if result['Build'] and result['BuildTest']: break
                
#     return candidates

In [5]:
def analyzeBug(project, bug_id):
    
    bug_name = "Bug_"+str(bug_id)
    bug_path = "{root}/results/{project}/{bug_name}/".format(root=root, project=project, bug_name=bug_name)
    
    with open("{root}/configFiles/{project}/bugs/{bug_name}.json".format(root=root,project=project, bug_name=bug_name)) as f:
        bug_info = json.load(f)
        
    test_name = re.search(r"-Dtest=(.*) test",bug_info['test_command']).group(1)

    bug_result = {
        'id': project + "_" + bug_name,
        'bug': bug_name,
        'project': project,
        "fix_pass": True,
        "prev_fails": True,
        "category": None,
        "sub_category": "-",
        "test_name": test_name,
        "bug_report": bug_info['bug_report'],
        "fix_commit": bug_info['fix_commit'],
        "BIC_candidates": []
    }
    
    if not os.path.isfile(bug_path+'commit_history.csv'):
        print(bug_path+'commit_history.csv')
        bug_result['category'] = "Other error"
        return bug_result
    
    results_dir = analysis_results_path+"{project}/{bug_name}/".format(project=project, bug_name=bug_name)
    createDirIfNotExists(results_dir)

    commit_graph = CommitGraph(project, bug_id, bug_path, results_dir, restore=True)
    
    fix_commit = commit_graph.graph[bug_info['fix_commit']]
    
#     fix_result = results[bug_info['fix_commit']]
    
#     if not fix_result['ExecuteTest']: # FIX COMMIT - SHOULD PASS
        
#         bug_result['category'] = "Test fails in the fix commit"
#         bug_result['fix_pass'] = False
#         bug_result['prev_fails'] = None
        
#         if fix_result['Build']:
#             if fix_result['BuildTest']:
#                 if not fix_result['HasTestReport']:
#                     bug_result['sub_category'] = "The test was not executed"
#                 else:
#                     bug_result['sub_category'] = "Test execution fails"
#             else:
#                 bug_result['sub_category'] = "Failure in test build"
#         else:
#             bug_result['sub_category'] = "Failure in source build"
    
#     else: 

#         # SEARCH REGRESSION
#         candidates = searchRegression(results)
#         if len(candidates) > 0:

#             bug_result['BIC_candidates'] = candidates
#             bug_result['category'] = "A regression is detected"

#             if len(candidates) == 1:
#                 bug_result['sub_category'] = "Unique candidates" 
#             else:
#                 bug_result['sub_category'] = "Multiple candidates" 

#         # SEARCH OTHER BIC
#         else:

#             success_test_build = [ r for r in results if r['BuildTest'] ]
#             candidate_commit = success_test_build[-1]
#             if candidate_commit['id']+1 == len(results):
#                 # Candidate commit is first commit
#                 bug_result['sub_category'] = "Reach first commit"
#             else:
#                 candidate_commit_antecesor = results[candidate_commit['id']+1]
#                 if candidate_commit_antecesor['Build']:
#                     bug_result['sub_category'] = "Failure in test build"
#                 else:
#                     bug_result['sub_category'] = "Failure in source build" 
#             bug_result['BIC_candidates'] = [(candidate_commit['commit'][0:8])]
#             bug_result['category'] = "No regression is detected"
    
    # Save bug result
    with open(results_dir+"bug_result.json",'w+') as json_file:
        json.dump(bug_result, json_file, indent=4)

#     return bug_result

In [6]:
# Failure in test build JacksonDatabind Bug_17
project_test ="JacksonCore"
bug=11
analyzeBug(project_test, bug)

In [None]:
projects = [
    "JacksonXml", "Time", "Collections", "Compress", "Csv", "JacksonCore", "JacksonDatabind", "Gson", "Jsoup"
]
bug_results = []
for project in projects:
    # FOR EACH BUG
    for bug_path in glob.glob("{root}/results/{project}/Bug_*/".format(root=root, project=project)):
        bug = re.search(r"Bug_(\d+)", bug_path).group(1)
        try:
            bug_result = analyzeBug(project, bug)
            bug_results.append(bug_result)  
        except Exception as e:
            print(bug_path)
            print(e)

In [None]:
df = pd.DataFrame(bug_results)[['id', 'category', 'sub_category']]
df[df['category'] != None].set_index('id').sort_index()
print(len(df))
df['category'].value_counts()

In [None]:
#df[df['category']=='Test failed at fix commit']
df.groupby(['category', "sub_category"]).count()['id']

In [None]:
df_projects = pd.DataFrame(bug_results)[['id', 'category', 'sub_category', 'project']]
df_projects[df_projects['project']=="Jsoup"].groupby(['category', "sub_category"]).count()['id']

In [None]:
df[df['category']=='Test fails in the fix commit'][df['sub_category']=='Test execution fails']