In [1]:
import json
import glob
import re
import csv
import pandas as pd
import random
import subprocess

In [2]:
root="/home/jovyan/work"
analysis_results_path = root + "/analysis/results/"

In [3]:
repository = {
    "Cli": "https://github.com/apache/commons-cli/commit/",
    "JxPath": "https://github.com/apache/commons-jxpath/commit/",
    "Codec": "https://github.com/apache/commons-codec/commit/",
    "Time": "https://github.com/JodaOrg/joda-time/commit/",
    "Compress": "https://github.com/apache/commons-compress/commit/",
    "JacksonCore": "https://github.com/FasterXML/jackson-core/commit/",
    "JacksonXml": "https://github.com/FasterXML/jackson-dataformat-xml/commit/",
    "JacksonDatabind": "https://github.com/FasterXML/jackson-databind/commit/",
    "Gson": "https://github.com/google/gson/commit/",
    "Jsoup": "https://github.com/jhy/jsoup/commit/",
    "Lang": "https://github.com/apache/commons-lang/commit/",
    "Math": "https://github.com/Maes95/commons-math/commit/",
    "Closure": "https://github.com/google/closure-compiler/commit/",
    "Mockito": "https://github.com/mockito/mockito/commit/",
    "Csv": "https://github.com/apache/commons-csv/commit/",
    "Collections": "https://github.com/apache/commons-collections/commit/"
}

In [4]:
projects = [
    "JacksonXml", "Time", "Collections", "Compress", "Csv", "JacksonCore", "JacksonDatabind", "Gson", "Jsoup",
    "Lang", "Math", "Closure", "Mockito", "Cli", "JxPath", "Codec"
]
bugs_with_no_bic = []
for project in projects:
    # FOR EACH BUG
    for bug_path in glob.glob("{root}/results/{project}/Bug_*/".format(root=root, project=project)):
        bug_id = re.search(r"Bug_(\d+)", bug_path).group(1)
        result_path = root+"/analysis/results/{project}/Bug_{bug_id}/bug_result.json".format(project=project, bug_id=bug_id)
        with open(result_path) as f:
            result = json.load(f)
            if result["category"] == "No regression is detected":
                bugs_with_no_bic.append({
                    'bug_id': bug_id,
                    'project': project
                })

In [5]:
random.seed(2023)
random_bugs = random.sample(bugs_with_no_bic,100)

In [6]:
def strToList(text):
    return (
        text
        .replace("'", "")
        .replace('"', '')
        .replace(']', '')
        .replace('[', '')
    ).split(",")

In [7]:
build_log_path="/results/%s/Bug_%s/commits/%s-%s/source-build.log"
test_build_log_path="/results/%s/Bug_%s/commits/%s-%s/test-build.log"
def getError(idx,project, bug, commit):
    
    if commit['State'] == 'BuildError':
        logs_path = root+build_log_path%(project,bug, commit['id'],commit['commit'])
        
    if commit['State'] == 'TestBuildError':
        logs_path = root+test_build_log_path%(project,bug, commit['id'],commit['commit'])
        
    log = subprocess.check_output(['cat', logs_path]).decode('utf-8')
    
    file_name= "%d-%s-Bug_%s.log"%(idx,project,bug)
    dest_path = "random_bugs_analyzed/logs/"+file_name
    
    with open(dest_path, 'w') as f:
        f.write(log)
    
    match = re.search("error: (.*)", log)
    if match is not None:
        return match.group(1), file_name
    else:
        return "<Check file log file>", file_name

In [8]:
def calculateFirstTestFail(idx,bug):
    commit_history_results_path = root+"/analysis/results/{project}/Bug_{bug_id}/commit_history_results.csv".format(
        project=bug["project"], bug_id=bug["bug_id"]
    )
    with open(commit_history_results_path) as csvfile:
        commits = list(csv.DictReader(csvfile))
    commits_dict = {c['commit']:c for c in commits}
    first_test_fail_commit = None
    bfc = commits[0]
    for commit in commits:
        if commit['State'] == 'TestFail':
            first_test_fail_commit = commit
    parents = strToList(first_test_fail_commit['parents'])
    parents = [ commits_dict[p.strip()] if p != '' else None for p in parents ]
    parents_errors = []
    parents_logs = []
    for parent in parents:
        if parent is None:
            parents_errors.append("FFC is first commit")
            parents_logs.append("")
        else:
            error, log_file = getError(idx,bug["project"], bug["bug_id"], parent)
            parents_errors.append(error)
            parents_logs.append(log_file)
    result = {
        'idx': idx,
        'project': bug["project"],
        'bug_id': bug["bug_id"],
        'BFC': repository[bug["project"]]+bfc['commit'],
        'FFC': repository[bug["project"]]+first_test_fail_commit['commit'],
        'FFC_parent_error': "|#|".join(parents_errors),
        'FFC_parent_log':  "|#|".join(parents_logs)
    }
    return result

In [9]:
results = []
for idx, bug in enumerate(random_bugs):
    result = calculateFirstTestFail(idx,bug)
    results.append(result)

In [10]:
results[0]

{'idx': 0,
 'project': 'Math',
 'bug_id': '95',
 'BFC': 'https://github.com/Maes95/commons-math/commit/fbf87122e0f7229892b6dbbf2e211cc46acea008',
 'FFC': 'https://github.com/Maes95/commons-math/commit/5145f82217496e6e32ca70d525f69c33260ffd9c',
 'FFC_parent_error': 'package junit.swingui does not exist',
 'FFC_parent_log': '0-Math-Bug_95.log'}

In [11]:
pd.DataFrame.from_dict(results).to_csv('random_bugs_analyzed/100_random_bugs.csv', index=False)  