In [1]:
import pandas as pd
import json
import requests

from itertools import groupby

In [2]:
%pylab inline

Populating the interactive namespace from numpy and matplotlib


In [3]:
IP = 'http://129.125.5.170'
PORT = '8080'
COMMIT_STATS_API_URL = '/GitHubCommitStatsRest/webresources/commitstatsfiles/repo/'
REQUEST_URL = IP  + ':' + PORT + COMMIT_STATS_API_URL
REPO_ID = '51905353'

In [4]:
def get_date(date):
    return pd.Timestamp(date).date()

def get_min_date(commitStatsFiles):
    return get_date(commitStatsFiles[0].get('commitDate'))

def get_max_date(commitStatsFiles):
    return get_date(commitStatsFiles[len(commitStatsFiles) - 1].get('commitDate'))

def getCommitStatsFilesList(commitStatsFiles):
    commitStatsFilesList = []
    for item in commitStatsFiles:
        csf = CommitStatsFile(item.get('additions'), item.get('deletions'), item.get('changes'), item.get('status'), item.get('commitDate'))
        commitStatsFilesList.append(csf)
    return commitStatsFilesList

def get_date_range_index(min_project_date, max_project_date):
    return pd.date_range(start=min_project_date, end=max_project_date, freq='D')

class CommitStatsFile:
    def __init__(self, additions, deletions, changes, status, commitDate):
        self.additions = additions
        self.deletions = deletions
        self.changes = changes
        self.status = status
        self.commitDate = commitDate

In [5]:
response = requests.get(REQUEST_URL + REPO_ID)
json_data = json.loads(response.text)

In [6]:
commitStatsFiles = json_data['commitStatsFiles']
commitStatsFilesList = getCommitStatsFilesList(commitStatsFiles)

In [7]:
min_project_date = get_min_date(commitStatsFiles)
max_project_date = get_max_date(commitStatsFiles)

In [8]:
date_range_index = get_date_range_index(min_project_date, max_project_date)
df = pd.DataFrame(index=date_range_index)
groups = groupby(commitStatsFilesList, lambda csf: (get_date(csf.commitDate)))

In [9]:
added = pd.Series(index=date_range_index)
modified = pd.Series(index=date_range_index)
removed = pd.Series(index=date_range_index)
renamed = pd.Series(index=date_range_index)

for key, group in groups:
    grp = list(group)
    added[key] = 0
    modified[key] = 0
    removed[key] = 0
    renamed[key] = 0
    for item in grp:
        if(item.status == 'added'):
            added[key] +=1
        elif(item.status == 'modified'):
            modified[key] +=1
        elif(item.status == 'removed'):
            removed[key] +=1
        elif(item.status == 'renamed'):
            renamed[key] +=1

In [10]:
df['added'] = added
df['modified'] = modified
df['removed'] = removed
df['renamed'] = renamed

print(df)

            added  modified  removed  renamed
2016-02-05    1.0       0.0      0.0      0.0
2016-02-06    NaN       NaN      NaN      NaN
2016-02-07    NaN       NaN      NaN      NaN
2016-02-08    NaN       NaN      NaN      NaN
2016-02-09    NaN       NaN      NaN      NaN
2016-02-10    NaN       NaN      NaN      NaN
2016-02-11    NaN       NaN      NaN      NaN
2016-02-12    NaN       NaN      NaN      NaN
2016-02-13    NaN       NaN      NaN      NaN
2016-02-14    NaN       NaN      NaN      NaN
2016-02-15    NaN       NaN      NaN      NaN
2016-02-16    NaN       NaN      NaN      NaN
2016-02-17  198.0       9.0      0.0      0.0
2016-02-18    NaN       NaN      NaN      NaN
2016-02-19    NaN       NaN      NaN      NaN
2016-02-20    0.0      19.0      0.0      1.0
2016-02-21    NaN       NaN      NaN      NaN
2016-02-22    NaN       NaN      NaN      NaN
2016-02-23    NaN       NaN      NaN      NaN
2016-02-24    NaN       NaN      NaN      NaN
2016-02-25    NaN       NaN      N