In [1]:
import csv
import pandas as pd
import re
import hashlib
import os
import json
import subprocess
import glob

root = "/home/jovyan/work"

In [2]:
def fromJsonList(list_str):
    return json.loads(list_str.replace("'","\""))

In [3]:
def getParent(commit, commits_dict):
    c_hash = fromJsonList(commit['parents'])[0]
    if c_hash == '':
        return None
    return commits_dict[c_hash]

In [4]:
def searchNoTransplantableReason(project, bug):

    commit_history_results_path = root+"/analysis/results/%s/Bug_%s/commit_history_results.csv"%(project,bug)

    with open(commit_history_results_path) as csvfile:
        commits = list(csv.DictReader(csvfile))
    
    commits_dict = {c['commit']:c for c in commits}
        
    last_commit_when_transplantation_works = None
    for commit in commits:
        if commit['State'] == "TestFail":
            last_commit_when_transplantation_works = commit

    try:
        first_commit_no_transplantable = getParent(last_commit_when_transplantation_works, commits_dict)
    except Exception as e:
        print(last_commit_when_transplantation_works, project, bug)
        raise e        
    
    if first_commit_no_transplantable is None:
        # Test always can be transplanted
        return None, "Always transplantable", "Always transplantable",""
    
    
    while first_commit_no_transplantable['State'] != "TestBuildError":
        first_commit_no_transplantable = getParent(first_commit_no_transplantable, commits_dict)
        if first_commit_no_transplantable is None:
            first_commit_no_transplantable = getParent(last_commit_when_transplantation_works, commits_dict)
            # Test stop be able to be transplanted due to source build problem
            return first_commit_no_transplantable, "Limited by Source Build", "Source build error",""
    
            
    commit = first_commit_no_transplantable       
    logs_path = root+"/results/%s/Bug_%s/commits/%s-%s/test-build.log"%(project,bug, commit['id'],commit['commit'])
    log = subprocess.check_output(['cat', logs_path]).decode('utf-8')

    match = re.search("error: (.*)", log)
    
    
    not_compatible_error = "Test code not compatible with source code"
    error_type = not_compatible_error
    
    if match is not None:
        error = match.group(1)
        if error == "cannot find symbol":
            pass
        elif error.startswith("duplicate class"):
            error = "duplicate class"
        elif error.startswith("incompatible types"):
            error = "incompatible types"
        elif error.startswith("no suitable method found"):
            error = "no suitable method found"
        elif error.startswith("no suitable constructor found"):
            error = "no suitable constructor found"
        elif re.search("package .* does not exist", error):
            error = "package X does not exist"
        elif "cannot be applied to given types" in error:
            error = "cannot be applied to given types"
        elif "is not abstract and does not override abstract method" in log:
            error = "Class X is not abstract and does not override abstract method Y"
        elif "not supported in " in error:
            error = "Java version error"
            error_type = "Java version error"
        elif "unreported exception" in error:
            error = "unreported exception"
        elif "as of release 5" in error:
            error = "Java version error"
            error_type = "Java version error"
        elif "has private access" in error:
            error = "has private access"
        
        detailed_error = re.search("error: (.*)\n(.*)\n(.*)\n(.*)\n(.*)", log).group(0)
    else:
        if "cannot find symbol" in log:
            error = "cannot find symbol"
            error_type = not_compatible_error
        elif "not supported in " in log:
            error = "Java version error"
            error_type = "Java version error"
        elif "incompatible types" in log:
            error = "incompatible types"
        elif re.search("method .* in class .* cannot be applied to given types", log):
            error = "method X in class Y cannot be applied to given types"
        elif re.search("reference to .* is ambiguous", log):
            error = "reference to X is ambiguous"
        elif "is not abstract and does not override abstract method" in log:
            error = "Class X is not abstract and does not override abstract method Y"
        elif "method does not override or implement a method from a supertype" in log:
            error = "method does not override or implement a method from a supertype"
        elif "cannot be applied to given types" in log:
            error = "cannot be applied to given types"
        elif "has private access" in log:
            error = "Class or method has private access"
        else:
            error = "Not detected"
        detailed_error = log

    return commit, error, error_type, detailed_error

In [15]:
errors = []
results = []
projects = [ 
     "Cli", "Closure", "Time", 
     "JacksonXml", "Collections", "Codec", "JxPath",
     "Compress", "Csv", "JacksonCore", "JacksonDatabind", 
     "Jsoup", "Lang", "Math", "Gson", "Mockito"
]
for project in projects:
    # FOR EACH BUG
    for bug_path in glob.glob("{root}/results/{project}/Bug_*/".format(root=root, project=project)):
        bug = re.search(r"Bug_(\d+)", bug_path).group(1)
        
        # Filter detected regressions
        with open(root+"/analysis/results/"+project+"/Bug_"+bug+"/bug_result.json") as jsonfile:
            result = json.loads(jsonfile.read())
            results.append(result)
        if result['category'] == "No regression is detected":
            
            commit, error, error_type, detailed_error = searchNoTransplantableReason(project,bug)
            errors.append({
                'project': project,
                'bug': bug,
                'error': error,
                'error_type': error_type,
                'count': 1,
#                 'detailed_error': detailed_error,
#                 'commit': commit
                'executionsOnPast': result['executionsOnPast']
            })

In [16]:
len(errors)

712

In [17]:
df = pd.DataFrame.from_dict(errors)
df.groupby(['error',"error_type"])[["count"]].count().sort_values(by='count', ascending=False)

Unnamed: 0_level_0,Unnamed: 1_level_0,count
error,error_type,Unnamed: 2_level_1
cannot find symbol,Test code not compatible with source code,347
Limited by Source Build,Source build error,116
cannot be applied to given types,Test code not compatible with source code,51
package X does not exist,Test code not compatible with source code,35
Always transplantable,Always transplantable,33
Java version error,Java version error,33
duplicate class,Test code not compatible with source code,27
no suitable method found,Test code not compatible with source code,21
incompatible types,Test code not compatible with source code,11
no suitable constructor found,Test code not compatible with source code,10


In [18]:
df[df['error']=="method does not override or implement a method from a supertype"]

Unnamed: 0,project,bug,error,error_type,count,executionsOnPast
364,JacksonDatabind,107,method does not override or implement a method...,Test code not compatible with source code,1,420
617,Math,29,method does not override or implement a method...,Test code not compatible with source code,1,45


In [19]:
df.groupby(['error_type'])[["count"]].count().sort_values(by='count', ascending=False)

Unnamed: 0_level_0,count
error_type,Unnamed: 1_level_1
Test code not compatible with source code,530
Source build error,116
Always transplantable,33
Java version error,33


In [20]:
searchNoTransplantableReason("Cli","14")

({'id': '111',
  'commit': '279e5a9c468a48574a55db31f0a6e33d9309a667',
  'Build': 'True',
  'BuildTest': 'False',
  'ExecuteTest': 'False',
  'HasTestReport': 'False',
  'State': 'TestBuildError',
  'parents': "['e2e9472412b3a88f698c80127c4883d9fe78c5f5']",
  'date': '2006-03-08 07:17:00 +0000'},
 'unreported exception',
 'Test code not compatible with source code',
 'error: unreported exception IOException; must be caught or declared to be thrown\n    [javac]                 .parseAndHelp(new String[] { "--file-name", fileName });\n    [javac]                              ^\n    [javac] Note: Some input files use unchecked or unsafe operations.\n    [javac] Note: Recompile with -Xlint:unchecked for details.')

## Analysis of errors

### Error: duplicate class

- After a past commit, the maven standard is used to name the directories, which causes that when transplanting, it is not done in the right directory and produces a duplication of classes.

In [11]:
# df[df['error']=="duplicate class"].sort_values(by='bug', ascending=False)

### Error: unreported exception

- Next commit add throws Exception, so previous ones: unreported exception Exception; must be caught or declared to be thrown

In [12]:
#df[df['error']=="unreported exception"].sort_values(by='bug', ascending=False)

In [14]:
#pd.DataFrame.from_dict(results)[['id', 'numCommits','transplantability_position']].sort_values(by='transplantability_position', ascending=False)