In [65]:
import json
from datetime import datetime 
from collections import defaultdict
import csv
from tabulate import tabulate
import requests

In [66]:
url = 'https://api.github.com/repos/tensorflow/datasets' # url of repo to be analysed
info = requests.get(url)
if(info.ok):
    startyear = datetime.strptime(json.loads(info.text)['created_at'], "%Y-%m-%dT%H:%M:%SZ").year
    endyear = datetime.strptime(json.loads(info.text)['updated_at'], "%Y-%m-%dT%H:%M:%SZ").year
else : 
    info.raise_for_status()

In [67]:
# dictionary mapping month to quarter

indexes = []
for year in range(startyear, endyear+1):
    for quarter in ['Q1', 'Q2', 'Q3', 'Q4']:
        indexes.append(str(year) + '_' + quarter)

In [68]:
commits_count = {}
issue_count = {}
pr_count = {}

commiters_new = defaultdict(set)
iss_creators_new = defaultdict(set)
pr_senders_new = defaultdict(set)

for key in indexes:         
        commits_count[key] = 0
        issue_count[key] = 0
        pr_count [key] = 0

In [69]:
quarter={1:'Q1', 2:'Q1', 3:'Q1', 4:'Q2', 5:'Q2', 6:'Q2', 7:'Q3', 8:'Q3', 9:'Q3', 10:'Q4', 11:'Q4', 12:'Q4'}
with open("./tf_analysis.json") as f:
    for line in f:
        line = json.loads(line)
        if (line['category']=='commit'):
            date = datetime.strptime(line['data']['CommitDate'],"%a %b %d %H:%M:%S %Y %z")
            year = date.year
            month = date.month
            key = str(year) + "_" + quarter[month]
            commits_count[key]+= 1
            commiters_new[key].add(line['data']['Author'])
            
        elif(line['category']=='issue'):
            date = datetime.strptime(line['data']['created_at'],"%Y-%m-%dT%H:%M:%SZ")
            year = date.year
            month = date.month
            key = str(year) + '_' + quarter[month]
            issue_count[key]+=1
            iss_creators_new[key].add(line['data']['user_data']['login'])
        
        else:
            date = datetime.strptime(line['data']['created_at'],"%Y-%m-%dT%H:%M:%SZ")
            year = date.year
            month = date.month
            key = str(year) + '_' + quarter[month]
            pr_count[key]+=1
            pr_senders_new[key].add(line['data']['user_data']['login'])

In [70]:
quarter_count = len(indexes)
old_commiters = commiters_new[0]
old_pr_senders = pr_senders_new[0]
old_iss_creators = iss_creators_new[0]

for i in range (1, quarter_count):
    commiters_new[indexes[i]] = commiters_new[indexes[i]].difference(old_commiters)
    iss_creators_new[indexes[i]] = iss_creators_new[indexes[i]].difference(old_iss_creators)
    pr_senders_new[indexes[i]] = pr_senders_new[indexes[i]].difference(old_pr_senders)
    old_commiters.update(commiters_new[indexes[i]])
    old_pr_senders.update(pr_senders_new[indexes[i]])
    old_iss_creators.update(iss_creators_new[indexes[i]])        

In [71]:
header = ['Quarter','# Commits','# Issues','# PullRequests','# NewCommitters','# NewIssueSubmitters','# NewPRSubmitters' ]

with open('data.csv', 'w') as file:
    writer = csv.writer(file)
    writer.writerow(header)

    for i, key in enumerate(indexes):
        row = (indexes[i], commits_count[key], issue_count[key], pr_count[key], len(commiters_new[key]), len(iss_creators_new[key]), len(pr_senders_new[key]) )
        writer.writerow(row)
    

In [73]:
with open('data.csv') as csvfile:
    reader = csv.reader(csvfile, delimiter=',')
    print(tabulate(reader))

-------  ---------  --------  --------------  ---------------  --------------------  -----------------
Quarter  # Commits  # Issues  # PullRequests  # NewCommitters  # NewIssueSubmitters  # NewPRSubmitters
2018_Q1  0          0         0               0                0                     0
2018_Q2  0          0         0               0                0                     0
2018_Q3  10         4         1               3                3                     1
2018_Q4  272        7         1               9                6                     1
2019_Q1  414        312       175             30               71                    34
2019_Q2  0          0         0               0                0                     0
2019_Q3  0          0         0               0                0                     0
2019_Q4  0          0         0               0                0                     0
-------  ---------  --------  --------------  ---------------  --------------------  ------------