In [1]:
import csv
import json
import requests

f = open("bug_inducing_commits.json")
fix_induce_map = json.load(f) # one to many

# build a bug inducing commit dictionary
# key: inducing commit id
# value: 
#   bug_fixing_commit_ids: a set, the fixing commits' ids
#   bug_num: the number of bugs this inducing commit has
bug_inducing_commits = {}

for item in fix_induce_map:
    fixing_id = item["bug_fixing_commit_id"] # one commit
    inducing_id_list = item["bug_inducing_commit_ids"] # list of commit
    for commit_id in inducing_id_list:
        if commit_id in bug_inducing_commits:
            if fixing_id not in bug_inducing_commits[commit_id]:
                # old inducing commit, but new fixing commit
                bug_inducing_commits[commit_id]['bug_fixing_commit_ids'].add(fixing_id)
                bug_inducing_commits[commit_id]['bug_num'] += 1
        else:
            # new inducing commit
            bug_inducing_commits[commit_id] = {
                'bug_fixing_commit_ids': {fixing_id},
                'bug_num': 1
            }

print("the size of bug inducing commit is {}".format(len(bug_inducing_commits)))
# print a item in dic
for x, y in bug_inducing_commits.items():
    print(x)
    print(y)
    break

the size of bug inducing commit is 150
b7e092e821021b7c0dbd91eacfe369181bfa28b8 {'bug_fixing_commit_ids': {'e3c87cba0bbed00c739e01599e29a5621c9ef3b3', 'ec57f0c31f8c41db175aa7dcaab5089c22b2a8a8'}, 'bug_num': 2}


In [6]:
# create the project URL list
url_list = []
with open('topJavaMavenProjects.csv') as csv_file:
    i = 0
    csv_reader = csv.reader(csv_file, delimiter=',')
    for row in csv_reader:
        if i == 0:
            i += 1
            continue
        url_list.append(row[0])

f = open("sstubs.json")
sstubs = json.load(f)

# print remaining access 
token = "xxx"
headers = {'Authorization': "Token " + token}
rate_limit_url = "https://api.github.com/rate_limit"
req = requests.get(rate_limit_url, headers=headers).json()
print(req)
print()
print(url_list[0:10])
print()

{'resources': {'core': {'limit': 5000, 'used': 3, 'remaining': 4997, 'reset': 1617424295}, 'search': {'limit': 30, 'used': 0, 'remaining': 30, 'reset': 1617422607}, 'graphql': {'limit': 5000, 'used': 0, 'remaining': 5000, 'reset': 1617426147}, 'integration_manifest': {'limit': 5000, 'used': 0, 'remaining': 5000, 'reset': 1617426147}, 'source_import': {'limit': 100, 'used': 0, 'remaining': 100, 'reset': 1617422607}, 'code_scanning_upload': {'limit': 500, 'used': 0, 'remaining': 500, 'reset': 1617426147}}, 'rate': {'limit': 5000, 'used': 3, 'remaining': 4997, 'reset': 1617424295}}

['https://github.com/spring-projects/spring-boot', 'https://github.com/iluwatar/java-design-patterns', 'https://github.com/square/retrofit', 'https://github.com/square/okhttp', 'https://github.com/zxing/zxing', 'https://github.com/libgdx/libgdx', 'https://github.com/google/guava', 'https://github.com/alibaba/dubbo', 'https://github.com/jfeinstein10/SlidingMenu', 'https://github.com/netty/netty']



In [11]:
url_list = url_list[0:1]

project_features = ['created_at', 'updated_at', 'size', 'stargazers_count', 'watchers_count',
                    'forks_count', 'network_count', 'subscribers_count']
user_features = ['followers', 'public_repos', 'created_at']
user_features_patch = ['contributions']
# build the project_info table where
# key: project_name
# value: project_info (dic)
projects = {}
# build the user_info table where
# key: project_user_names
# value: user_info (dic)
users = {}

for url in url_list:
    items = url.split("/")
    owner = items[len(items)-2]
    repo = items[len(items)-1]
    
    project_api = 'https://api.github.com/repos/' + owner + '/' + repo
    contributor_api = 'https://api.github.com/repos/' + owner + '/' + repo + '/' + "contributors"
    headers = {'Authorization': "Token " + token}
    project = requests.get(project_api, headers=headers).json()
    contributors = requests.get(contributor_api, headers=headers).json()

    # get project_info and save in a dictionary
    project_info = {}
    for i in range(len(project_features)):
        feature_str = project_features[i]
        project_info[feature_str] = project[feature_str]
    key = owner + '.' + repo
    projects[key] = project_info
    
    # get user_info and save in a dictionary
    for item in contributors:
        ## get contributor_info
        username = item['login']
        contributions = item['contributions']
        
        # get the user_info
        user_api = 'https://api.github.com/users/' + username
        user = requests.get(user_api, headers=headers).json()
        user_info = {}
        for i in range(len(user_features)):
            feature_str = user_features[i]
            user_info[feature_str] = user[feature_str]
        user_info['contributions'] = contributions
        key = username
        users[key] = user_info
    
print(projects.keys())
print(users.keys())
print("an item for project")
for x, y in projects.items():
    print(x)
    print(y)
    break
print("an item for user")
for x, y in users.items():
    print(x)
    print(y)
    break

dict_keys(['spring-projects.spring-boot'])
dict_keys(['wilkinsona', 'snicoll', 'philwebb', 'mbhave', 'dsyer', 'izeye', 'bclozel', 'dreis2211', 'scottfrederick', 'spring-buildmaster', 'vpavic', 'cdupuis', 'eddumelendez', 'nosan', 'garyrussell', 'sdeleuze', 'igor-suhorukov', 'ayudovin', 'Buzzardo', 'kazuki43zoo', 'michael-simons', 'artembilan', 'rwinch', 'gregturn', 'odrotbohm', 'htynkn', 'spring-operator', 'jkschneider', 'mp911de', 'mbenson'])
spring-projects.spring-boot
{'created_at': '2012-10-19T15:02:57Z', 'updated_at': '2021-04-03T04:09:17Z', 'size': 132584, 'stargazers_count': 54323, 'watchers_count': 54323, 'forks_count': 33278, 'network_count': 33278, 'subscribers_count': 3445}
wilkinsona
{'followers': 1240, 'public_repos': 146, 'created_at': '2011-07-14T09:41:59Z', 'contributions': 9920}


In [12]:
# build the commit_info table where
# key: commit_id
# value: commit_info
commits = {}
commit_features = ['author_id', 'LOC', 'Add', 'Delete', 'Files', 'bug_num']

for key, value in bug_inducing_commits.items():
    bug_num = value["bug_num"]
    commit_api = 'https://api.github.com/repos/spring-projects/spring-boot/commits/' + key
    headers = {'Authorization': "Token " + token}
    commit = requests.get(commit_api, headers=headers).json()
    
    # get commit_info and save in a dictionary
    author_id = commit['author']
    if author_id is None:
        continue
    author_id = author_id['login']
    loc = commit['stats']['total']
    additions = commit['stats']['additions']
    deletions = commit['stats']['deletions']
    files_in_change = len(commit['files'])
    commit_info = {
        'author_id': author_id,
        'LOC': loc,
        'Add': additions,
        'Delete': deletions,
        'Files': files_in_change,
        "bug_num": bug_num
    }
    #print(commit_info)
    commits[key] = commit_info

print("an item for commit")
for x, y in commits.items():
    print(x)
    print(y)
    break

{'author_id': 'philwebb', 'LOC': 4, 'Add': 2, 'Delete': 2, 'Files': 1, 'bug_num': 2}
{'author_id': 'ahrytsiuk', 'LOC': 69, 'Add': 66, 'Delete': 3, 'Files': 5, 'bug_num': 1}
{'author_id': 'snicoll', 'LOC': 259, 'Add': 122, 'Delete': 137, 'Files': 4, 'bug_num': 1}
{'author_id': 'mbhave', 'LOC': 89, 'Add': 54, 'Delete': 35, 'Files': 2, 'bug_num': 1}
{'author_id': 'wilkinsona', 'LOC': 63328, 'Add': 23855, 'Delete': 39473, 'Files': 300, 'bug_num': 2}
{'author_id': 'philwebb', 'LOC': 441, 'Add': 264, 'Delete': 177, 'Files': 15, 'bug_num': 2}
{'author_id': 'bclozel', 'LOC': 70, 'Add': 34, 'Delete': 36, 'Files': 5, 'bug_num': 1}
{'author_id': 'dreis2211', 'LOC': 5, 'Add': 4, 'Delete': 1, 'Files': 1, 'bug_num': 1}
{'author_id': 'ayudovin', 'LOC': 17, 'Add': 15, 'Delete': 2, 'Files': 2, 'bug_num': 1}
{'author_id': 'wilkinsona', 'LOC': 96, 'Add': 91, 'Delete': 5, 'Files': 3, 'bug_num': 2}
{'author_id': 'philwebb', 'LOC': 921, 'Add': 921, 'Delete': 0, 'Files': 20, 'bug_num': 2}
{'author_id': 'wilk

In [15]:
# go through the cm_pj_usr table
# use project_name to get project_info
# use pj_usr_name to get user_info
# use commits to get commit_info
# save data in format (bug_num, project_info, user_info, commit_info) into a csv
with open('data_copy3.csv', 'w') as f:
    csv_write = csv.writer(f)
    csv_head = project_features + user_features + user_features_patch + commit_features
    # csv_head = ["commit_id","bug_num","project_size","project_watchers","committer_contributions", "loc", "num_of_changed_files"]
    csv_write.writerow(csv_head)

    for key, value in commits.items():
        # get the three dictionary
        project_info = projects["spring-projects.spring-boot"]
        commit_info = value
        user_key = commit_info['author_id']
        if user_key not in users.keys():
            continue
        user_info = users[user_key]
        # get the data
        data_row = []
        for i in range(len(project_features)):
            feature_str = project_features[i]
            data_row.append(project_info[feature_str])

        for i in range(len(user_features)):
            feature_str = user_features[i]
            data_row.append(user_info[feature_str])

        for i in range(len(user_features_patch)):
            feature_str = user_features_patch[i]
            data_row.append(user_info[feature_str])

        for i in range(len(commit_features)):
            feature_str = commit_features[i]
            data_row.append(commit_info[feature_str])

        # write into csv
        csv_write.writerow(data_row)