In [1]:
import csv
import pandas as pd
import re
import hashlib
import os
import json
import subprocess
import glob

root = "/home/jovyan/work"

In [2]:
def fromJsonList(list_str):
    return json.loads(list_str.replace("'","\""))

In [3]:
def getParent(commit, commits_dict):
    c_hash = fromJsonList(commit['parents'])[0]
    if c_hash == '':
        return None
    return commits_dict[c_hash]

In [4]:
def searchNoTransplantableReason(project, bug):

    commit_history_results_path = root+"/analysis/results/%s/Bug_%s/commit_history_results.csv"%(project,bug)

    with open(commit_history_results_path) as csvfile:
        commits = list(csv.DictReader(csvfile))
    
    commits_dict = {c['commit']:c for c in commits}
        
    last_commit_when_transplantation_works = None
    for commit in commits:
        if commit['State'] == "TestFail":
            last_commit_when_transplantation_works = commit

    try:
        first_commit_no_transplantable = getParent(last_commit_when_transplantation_works, commits_dict)
    except Exception as e:
        print(last_commit_when_transplantation_works, project, bug)
        raise e        
    
    if first_commit_no_transplantable is None:
        # Test always can be transplanted
        return None, "Always transplantable", "Always transplantable",""
    
    
    while first_commit_no_transplantable['State'] != "TestBuildError":
        first_commit_no_transplantable = getParent(first_commit_no_transplantable, commits_dict)
        # Reach last commit
        if first_commit_no_transplantable is None:
            first_commit_no_transplantable = getParent(last_commit_when_transplantation_works, commits_dict)
            
            # First not transplantable is last commit
            if getParent(first_commit_no_transplantable, commits_dict) is None:
                return None, "Always transplantable", "Always transplantable",""
            
            # Test stop be able to be transplanted due to source build problem
            commit = first_commit_no_transplantable  
            logs_path = root+"/results/%s/Bug_%s/commits/%s-%s/source-build.log"%(project,bug, commit['id'],commit['commit'])
            log = subprocess.check_output(['cat', logs_path]).decode('utf-8')
            
            error ="Other source build error"
            match = re.search("error: (.*)", log)
            if match is not None:
                error = match.group(1)
                if "unreported exception" in error:
                    error ="Other source build error"
                elif "cannot implement remove(Object,Object) in Map" in error:
                    error = "Java version error"
                elif "withFilterId(Object) in BeanSerializerBase" in error:
                    error = "Java version error"
                elif "as of release 5" in error:
                    error = "Java version error"
            else:
                if "BUILD FAILED" in log:
#                     match = re.search("BUILD FAILED\n(.*)", log)
#                     error = match.group(1)
                    if "does not exist" in log:
                        error = "File or directory not exist"
                else:
                    if "there is no POM in this directory" in log:
                        error = "No pom.xml file"
                    elif "Could not resolve dependencies" in log:
                        error = "Dependency resolution"
            return first_commit_no_transplantable, error, "Source build error",""
    
            
    commit = first_commit_no_transplantable       
    logs_path = root+"/results/%s/Bug_%s/commits/%s-%s/test-build.log"%(project,bug, commit['id'],commit['commit'])
    log = subprocess.check_output(['cat', logs_path]).decode('utf-8')

    match = re.search("error: (.*)", log)
    
    
    not_compatible_error = "Test code not compatible with source code"
    error_type = not_compatible_error
    
    if match is not None:
        error = match.group(1)
        if error == "cannot find symbol":
            pass
        elif error.startswith("duplicate class"):
            error = "duplicate class"
        elif error.startswith("incompatible types"):
            error = "incompatible types"
        elif error.startswith("no suitable method found"):
            error = "no suitable method found"
        elif error.startswith("no suitable constructor found"):
            error = "no suitable constructor found"
        elif re.search("package .* does not exist", error):
            error = "package X does not exist"
        elif "cannot be applied to given types" in error:
            error = "cannot be applied to given types"
        elif "is not abstract and does not override abstract method" in log:
            error = "Class X is not abstract and does not override abstract method Y"
        elif "not supported in " in error:
            error = "Java version error"
            #error_type = "Java version error"
        elif "unreported exception" in error:
            error = "unreported exception"
        elif "as of release 5" in error:
            error = "Java version error"
            #error_type = "Java version error"
        elif "has private access" in error:
            error = "has private access"
        
        detailed_error = re.search("error: (.*)\n(.*)\n(.*)\n(.*)\n(.*)", log).group(0)
    else:
        if "cannot find symbol" in log:
            error = "cannot find symbol"
        elif "not supported in " in log:
            error = "Java version error"
            #error_type = "Java version error"
        elif "incompatible types" in log:
            error = "incompatible types"
        elif re.search("method .* in class .* cannot be applied to given types", log):
            error = "method X in class Y cannot be applied to given types"
        elif re.search("reference to .* is ambiguous", log):
            error = "reference to X is ambiguous"
        elif "is not abstract and does not override abstract method" in log:
            error = "Class X is not abstract and does not override abstract method Y"
        elif "method does not override or implement a method from a supertype" in log:
            error = "method does not override or implement a method from a supertype"
        elif "cannot be applied to given types" in log:
            error = "cannot be applied to given types"
        elif "has private access" in log:
            error = "Class or method has private access"
        else:
            error = "Not detected"
        detailed_error = log

    return commit, error, error_type, detailed_error

In [5]:
searchNoTransplantableReason("JacksonDatabind","1")

({'id': '546',
  'commit': 'c277641421be2b2fe3319fa75f54b9364ca91178',
  'Build': 'True',
  'BuildTest': 'False',
  'ExecuteTest': 'False',
  'HasTestReport': 'False',
  'State': 'TestBuildError',
  'parents': "['83bcd182408b0180049a7d201e09d996ec424593']",
  'date': '2012-05-14 19:40:57 -0700'},
 'annotation type not applicable to this kind of declaration',
 'Test code not compatible with source code',
 'error: annotation type not applicable to this kind of declaration\n[ERROR] /home/regseek/workdir/projects/JacksonDatabind_Bug_1/src/test/java/com/fasterxml/jackson/databind/struct/TestPOJOAsArray.java:[81,4] error: annotation type not applicable to this kind of declaration\n[ERROR] /home/regseek/workdir/projects/JacksonDatabind_Bug_1/src/test/java/com/fasterxml/jackson/databind/struct/TestPOJOAsArray.java:[87,4] error: annotation type not applicable to this kind of declaration\n[ERROR] /home/regseek/workdir/projects/JacksonDatabind_Bug_1/src/test/java/com/fasterxml/jackson/databind/st

In [6]:
errors = []
results = []
projects = [ 
     "Cli", "Closure", "Time", 
     "JacksonXml", "Collections", "Codec", "JxPath",
     "Compress", "Csv", "JacksonCore", "JacksonDatabind", 
     "Jsoup", "Lang", "Math", "Gson", "Mockito"
]
for project in projects:
    # FOR EACH BUG
    for bug_path in glob.glob("{root}/results/{project}/Bug_*/".format(root=root, project=project)):
        bug = re.search(r"Bug_(\d+)", bug_path).group(1)
        
        # Filter detected regressions
        with open(root+"/analysis/results/"+project+"/Bug_"+bug+"/bug_result.json") as jsonfile:
            result = json.loads(jsonfile.read())
            results.append(result)
        if result['category'] == "No regression is detected":
            
            commit, error, error_type, detailed_error = searchNoTransplantableReason(project,bug)
            errors.append({
                'project': project,
                'bug': bug,
                'error': error,
                'error_type': error_type,
                'count': 1,
#                 'detailed_error': detailed_error,
                'commit': commit,
                'executionsOnPast': result['executionsOnPast']
            })

In [7]:
len(errors)

710

In [8]:
df = pd.DataFrame.from_dict(errors)
df.groupby(['error',"error_type"])[["count"]].count().sort_values(by='count', ascending=False)

Unnamed: 0_level_0,Unnamed: 1_level_0,count
error,error_type,Unnamed: 2_level_1
cannot find symbol,Test code not compatible with source code,373
cannot be applied to given types,Test code not compatible with source code,58
Always transplantable,Always transplantable,55
package X does not exist,Test code not compatible with source code,42
Java version error,Test code not compatible with source code,33
duplicate class,Test code not compatible with source code,27
File or directory not exist,Source build error,25
no suitable method found,Test code not compatible with source code,21
Java version error,Source build error,19
incompatible types,Test code not compatible with source code,12


In [9]:
df[df['error_type']=="Test code not compatible with source code"].groupby(['error'])[["count"]].count().sort_values(by='count', ascending=False)

Unnamed: 0_level_0,count
error,Unnamed: 1_level_1
cannot find symbol,373
cannot be applied to given types,58
package X does not exist,42
Java version error,33
duplicate class,27
no suitable method found,21
incompatible types,12
no suitable constructor found,11
has private access,7
unreported exception,5


In [10]:
df[df['error_type']=="Source build error"].groupby(['error'])[["count"]].count().sort_values(by='count', ascending=False)

Unnamed: 0_level_0,count
error,Unnamed: 1_level_1
File or directory not exist,25
Java version error,19
Other source build error,3


In [11]:
df.groupby(['error_type'])[["count"]].count().sort_values(by='count', ascending=False)

Unnamed: 0_level_0,count
error_type,Unnamed: 1_level_1
Test code not compatible with source code,608
Always transplantable,55
Source build error,47


## Analysis of errors

### Error: duplicate class

- After a past commit, the maven standard is used to name the directories, which causes that when transplanting, it is not done in the right directory and produces a duplication of classes.

In [12]:
# df[df['error']=="duplicate class"].sort_values(by='bug', ascending=False)

### Error: unreported exception

- Next commit add throws Exception, so previous ones: unreported exception Exception; must be caught or declared to be thrown

In [13]:
#df[df['error']=="unreported exception"].sort_values(by='bug', ascending=False)

In [14]:
#pd.DataFrame.from_dict(results)[['id', 'numCommits','transplantability_position']].sort_values(by='transplantability_position', ascending=False)