In [1]:
import json
import git
from git import Repo


f = open("sstubs.json")
sstubs = json.load(f)

repo_list = [
    'https://github.com/spring-projects/spring-boot',
    'https://github.com/checkstyle/checkstyle'
#     'https://github.com/iluwatar/java-design-patterns',
#     'https://github.com/square/retrofit',
#     'https://github.com/square/okhttp',
#     'https://github.com/zxing/zxing',
#     'https://github.com/libgdx/libgdx',
#     'https://github.com/google/guava',
#     'https://github.com/alibaba/dubbo',
#     'https://github.com/jfeinstein10/SlidingMenu',
#     'https://github.com/netty/netty'
]

repo_name_list = [
    'spring-projects.spring-boot',
    'checkstyle.checkstyle'
#     'iluwatar.java-design-patterns',
#     'square.retrofit',
#     'square.okhttp',
#     'zxing.zxing',
#     'libgdx.libgdx',
#     'google.guava',
#     'alibaba.dubbo',
#     'jfeinstein10.SlidingMenu',
#     'netty.netty',
]

# get the fixing commits belong to the repo_list
# key: bug fixing commit's id
# value:
#  bugFilePath: the path to the fixed file and line_nums (dic, can be multiple)
#  fixCommitParentSHA1, parent_commit_id


filtered_bug_fixing_commits = {}
for bug in sstubs:
    if bug['projectName'] not in repo_name_list:
        continue
    commit_id = bug["fixCommitSHA1"]
    parent_commit = bug["fixCommitParentSHA1"]
    bug_file_path = bug["bugFilePath"]
    bug_line_num = bug["bugLineNum"]
    
    if commit_id in filtered_bug_fixing_commits:
        # old commit, check if new file path
        if bug_file_path in filtered_bug_fixing_commits[commit_id]["bugFilePath"]:
            # old file path, check if new line num
            if bug_line_num in filtered_bug_fixing_commits[commit_id]["bugFilePath"][bug_file_path]:
                # bug already exist
                continue
            else:
                # new bug in old file path
                filtered_bug_fixing_commits[commit_id]["bugFilePath"][bug_file_path].add(bug_line_num)
        else:
            # new file path
            filtered_bug_fixing_commits[commit_id]["bugFilePath"][bug_file_path] = {bug_line_num}
    else:
        # new commit
        filtered_bug_fixing_commits[commit_id] = {
            'fixCommitParentSHA1': parent_commit,
            'bugFilePath': {bug_file_path: {bug_line_num}},
            'projectName': bug['projectName']
        }
        
# print(filtered_bug_fixing_commits)

print("the size of filtered bug fixing commit is {}".format(len(filtered_bug_fixing_commits)))
# print a item in dic
for x, y in filtered_bug_fixing_commits.items():
    print(x)
    print(y)
    break

the size of filtered bug fixing commit is 283
7e0199edf41b5a70390ce922cc6a8d14f0fad482
{'fixCommitParentSHA1': '3c9efede12cff4e4fdf5cf693d4f71c36cddbc4e', 'bugFilePath': {'src/test/java/com/puppycrawl/tools/checkstyle/gui/CodeSelectorPresentationTest.java': {81}}, 'projectName': 'checkstyle.checkstyle'}


In [2]:
fix_and_bug_introducing_pairs = {}
for key, value in filtered_bug_fixing_commits.items():
    repo_name = value["projectName"]
    repo_path = "/Users/air/Documents/CS846/" + repo_name.split(".")[1]
    repo = Repo(repo_path)
    repo.git.checkout('master')
    
    parent_commit = value["fixCommitParentSHA1"]
    for path_name, line_nums in value["bugFilePath"].items():
        for line_num in line_nums:
            line = 1
            for commit, lines in repo.blame(parent_commit, path_name):
                if line <= line_num < (line + len(lines)):
                    if key in fix_and_bug_introducing_pairs:
                        if commit not in fix_and_bug_introducing_pairs[key]['bug_inducing_commit_ids']:
                            fix_and_bug_introducing_pairs[key]['bug_inducing_commit_ids'].add(commit.hexsha)
                    else:
                        fix_and_bug_introducing_pairs[key] = {
                            'bug_inducing_commit_ids': {commit.hexsha},
                            'project_name': repo_name
                        }
#                     print(commit)
                line += len(lines)
    
# print(fix_and_bug_introducing_pairs)
print("the size of fixing-inducing pair is {}".format(len(fix_and_bug_introducing_pairs)))
# print a item in dic
for x, y in fix_and_bug_introducing_pairs.items():
    print(x)
    print(y)
    break

the size of fixing-inducing pair is 283
7e0199edf41b5a70390ce922cc6a8d14f0fad482
{'bug_inducing_commit_ids': {'0d792c39883fe3a41ee5875114d1efa6c4801b13'}, 'project_name': 'checkstyle.checkstyle'}


In [3]:
data = []
for key, value in fix_and_bug_introducing_pairs.items():
    project_name = value['project_name']
    commit_ids = list(value['bug_inducing_commit_ids'])
    data.append({
        'bug_fixing_commit_id': key,
        'bug_inducing_commit_ids': commit_ids,
        'project_name': project_name
    })
    
with open('fix_and_bug_introducing_pairs.json', 'w') as outfile:
    json.dump(data, outfile)

In [4]:
bug_fixing_commits = []
for key, value in filtered_bug_fixing_commits.items():
    bug_inducing_commit_ids = list(fix_and_bug_introducing_pairs[key])
    locations = {}
    bugFilePath = value["bugFilePath"]
    for path_name, line_nums in bugFilePath.items():
        locations[path_name] = {
            'count': len(list(line_nums)),
            'locations': list(line_nums)
        }
    bug_fixing_commits.append({
        'bug_fixing_commit_id': key,
        'fixCommitParentSHA1': value["fixCommitParentSHA1"],
        'bug_inducing_commit_ids': bug_inducing_commit_ids,
        'bugFilePath': locations,
        'projectName': value['projectName']
    })
    
with open('bug_fixing_commits.json', 'w') as outfile:
    json.dump(bug_fixing_commits, outfile)

In [5]:
ln = 156
tlc = 1
commits = set()
for commit, lines in repo.blame('69fcae6b44cb9443bd970f89f7215258d6092bcc', 'spring-boot-actuator/src/main/java/org/springframework/boot/actuate/autoconfigure/HealthIndicatorAutoConfiguration.java'):
    if tlc <= ln < (tlc + len(lines)):
        commits.add(commit.hexsha)
        print(commit)
        print(lines)
    tlc += len(lines)
print(commits)

b569918db1c33847ce1d1464d7d78f692f289e99
['\t@ConditionalOnEnablednHealthIndicator("db")']
{'b569918db1c33847ce1d1464d7d78f692f289e99'}
