In [8]:
import pandas as pd
from subprocess import run, PIPE
from watchtower import comments_, issues_, commits_, reviews_

## Projects we wish to run

In [16]:
projects = {'jupyter': ['repo2docker', 'notebook', 'nbconvert'],
            'jupyterhub': ['zero-to-jupyterhub-k8s', 'the-littlest-jupyterhub', 'jupyterhub',
                           'binderhub', 'binder', 'team-compass'],
            'jupyterlab': ["jupyterlab"]}

In [17]:
# If we wish to update the data
update_data = True
if update_data is True:
    try:
        for org, repos in projects.items():
            print(org)
            for repo in repos:
                print('    ' + repo)
                run("python ../scripts/update_data.py {} {}".format(org, repo).split(), check=True, stderr=PIPE, stdout=PIPE)
    except Exception as ee:
        print(ee.stderr.decode())
        

jupyter
    repo2docker
    notebook
    nbconvert
jupyterhub
    zero-to-jupyterhub-k8s
    the-littlest-jupyterhub
    jupyterhub
    binderhub
    binder
    team-compass
jupyterlab
    jupyterlab


## Munge

In [23]:
comments = pd.DataFrame([])
commits = pd.DataFrame([])
for org, repos in projects.items():
    for repo in repos:
        # Comments
        icomments = comments_.load_comments(org, repo)
        if icomments is not None:
            icomments['org'] = org
            icomments['repo'] = repo
            icomments['user'] = icomments['user'].map(lambda a: a['login'])
            icomments['date'] = icomments['created_at'].map(pd.to_datetime)
            icomments = icomments[['org', 'repo', 'user', 'date']].set_index(['date'])
            comments = pd.concat([comments, icomments]).query('date > 1990') 

        # Commits
        icommits = commits_.load_commits(org, repo)
        if icommits is not None:
            icommits = icommits.dropna(subset=['author'])
            icommits['user'] = icommits['author'].map(lambda a: a['login'])
            icommits['date'] = pd.to_datetime(icommits['date'])
            icommits = icommits[['date', 'user']].set_index('date')
            icommits['org'] = org
            icommits['repo'] = repo
            commits = pd.concat([commits, icommits]).query('date > 1990')    

comments['count'] = 1
commits['count'] = 1

In [45]:
# Convert into daily comments to reduce size
daily_comments = comments.groupby(['org', 'repo', 'user']).resample('D').sum().dropna()
daily_comments = daily_comments.reset_index().query('count != 0')
daily_comments = daily_comments.rename(columns={'count': 'comments'})

In [46]:
# Daily commits
daily_commits = commits.groupby(['org', 'repo', 'user']).resample('D').sum().dropna()
daily_commits = daily_commits.reset_index().query('count != 0')
daily_commits = daily_commits.rename(columns={'count': 'commits'})

In [47]:
# Merge them and save
daily = pd.merge(daily_commits, daily_comments, on=['org', 'repo', 'user', 'date'], how='outer')
daily.to_csv('../data/daily.csv')