In [10]:
import json
import git
from git import Repo


f = open("sstubs.json")
sstubs = json.load(f)

repo_list = [
    'https://github.com/spring-projects/spring-boot',
    'https://github.com/iluwatar/java-design-patterns',
    'https://github.com/square/retrofit',
    'https://github.com/square/okhttp',
    'https://github.com/zxing/zxing',
    'https://github.com/libgdx/libgdx',
    'https://github.com/google/guava',
    'https://github.com/alibaba/dubbo',
    'https://github.com/jfeinstein10/SlidingMenu',
    'https://github.com/netty/netty'
]

repo_name_list = [
    'spring-projects.spring-boot',
    'iluwatar.java-design-patterns',
    'square.retrofit',
    'square.okhttp',
    'zxing.zxing',
    'libgdx.libgdx',
    'google.guava',
    'alibaba.dubbo',
    'jfeinstein10.SlidingMenu',
    'netty.netty',
]

# get the fixing commits belong to the repo_list
# key: bug fixing commit's id
# value:
#  bugFilePath: the path to the fixed file and line_nums (dic, can be multiple)
#  fixCommitParentSHA1, parent_commit_id

filter_commits = {}

for bug in sstubs:
    if bug['projectName'] not in repo_name_list:
        continue
    cmt_id = bug["fixCommitSHA1"]
    parent_commit = bug["fixCommitParentSHA1"]
    bug_file_path = bug["bugFilePath"]
    bug_line_num = bug["bugLineNum"]
    project_name = bug["projectName"]
    
    if cmt_id in filter_commits:
        # old commit, check if new file path
        if bug_file_path in filter_commits[cmt_id]["bugFilePath"]:
            # old file path, check if new line num
            if bug_line_num in filter_commits[cmt_id]["bugFilePath"][bug_file_path]:
                # bug already exist
                continue
            else:
                # new bug in old file path
                filter_commits[cmt_id]["bugFilePath"][bug_file_path].add(bug_line_num)
        else:
            # new file path
            filter_commits[cmt_id]["bugFilePath"][bug_file_path] = {bug_line_num}
    else:
        # new commit
        filter_commits[cmt_id] = {
            'fixCommitParentSHA1': parent_commit,
            'bugFilePath': {bug_file_path: {bug_line_num}},
            'projectName': project_name
        }

print("the size of filtered bug fixing commit is {}".format(len(filter_commits)))
# print a item in dic
for x, y in filter_commits.items():
    print(x)
    print(y)
    break

the size of filtered bug fixing commit is 636
4acec50c8afb95544b08a6dbdda743264af69909
{'fixCommitParentSHA1': 'd56eaa8ac9e8eaab69e42124ae82bd7de42b8d74', 'bugFilePath': {'dubbo-registry/dubbo-registry-consul/src/main/java/org/apache/dubbo/registry/consul/ConsulRegistry.java': {278}}, 'projectName': 'alibaba.dubbo'}


In [12]:

fix_introduce_pairs = {}

for key, value in filter_commits.items():
    repo_name = value["projectName"]
    repo_path = "/Users/lixinkai/Desktop/" + repo_name.split(".")[1]
    repo = Repo(repo_path)
    repo.git.checkout('master')

    parent_commit = value["fixCommitParentSHA1"]
    for path_name, line_nums in value["bugFilePath"].items():
        for line_num in line_nums:
            line = 1
            for commit, lines in repo.blame(parent_commit, path_name):
                if line <= line_num < (line + len(lines)):
                    if key in fix_introduce_pairs:
                        if commit not in fix_introduce_pairs[key]:
                            fix_introduce_pairs[key].add(commit.hexsha)
                    else:
                        fix_introduce_pairs[key] = {commit.hexsha}
                line += len(lines)
    

print("the size of fixing-inducing pair is {}".format(len(fix_introduce_pairs)))
# print a item in dic
for x, y in fix_introduce_pairs.items():
    print(x)
    print(y)
    break



the size of filtered bug fixing commit is 636
4acec50c8afb95544b08a6dbdda743264af69909
{'096d1dae2adc86be71076a53c1ded690cc5b4997'}


In [13]:
data = []

for key, value in fix_introduce_pairs.items():
    commit_ids = list(value)
    data.append({
        'bug_fixing_commit_id': key,
        'bug_inducing_commit_ids': commit_ids
    })
    
with open('fix_introduce_pairs.json', 'w') as outfile:
    json.dump(data, outfile)


In [4]:
bug_fixing_commits = []
for key, value in filter_commits.items():
    bug_inducing_commit_ids = list(fix_introduce_pairs[key])
    locations = {}
    bugFilePath = value["bugFilePath"]
    for path_name, line_nums in bugFilePath.items():
        locations[path_name] = {
            'count': len(list(line_nums)),
            'locations': list(line_nums)
        }
    bug_fixing_commits.append({
        'bug_fixing_commit_id': key,
        'fixCommitParentSHA1': value["fixCommitParentSHA1"],
        'bug_inducing_commit_ids': bug_inducing_commit_ids,
        'bugFilePath': locations
    })
    
with open('bug_fixing_commits.json', 'w') as outfile:
    json.dump(bug_fixing_commits, outfile)

In [5]:
ln = 156
tlc = 1
commits = set()
for commit, lines in repo.blame('69fcae6b44cb9443bd970f89f7215258d6092bcc', 'spring-boot-actuator/src/main/java/org/springframework/boot/actuate/autoconfigure/HealthIndicatorAutoConfiguration.java'):
    if tlc <= ln < (tlc + len(lines)):
        commits.add(commit.hexsha)
        print(commit)
        print(lines)
    tlc += len(lines)
print(commits)

b569918db1c33847ce1d1464d7d78f692f289e99
['\t@ConditionalOnEnablednHealthIndicator("db")']
{'b569918db1c33847ce1d1464d7d78f692f289e99'}
