In [None]:
import datetime

import pandas as pd
from astropy.table import Table
from astropy.time import Time
import numpy as np
import matplotlib.pyplot as plt
plt.style.use('apw-notebook')
%matplotlib inline

from scipy.stats import binned_statistic

import calendar
month_map = dict((v,k) for k,v in enumerate(calendar.month_abbr))

In [None]:
df = pd.read_csv('astropy_commits.csv')

In [None]:
df['name'].value_counts()[:16]

In [None]:
rows = []
for index, row in df.iterrows():
    ss = dict()
    
    # parse date
    month_name, day, hms, year, *_ = row['date'].split()
    hms = list(map(int, hms.split(':')))
    ss['hour'] = np.sum(np.array([1., 1/60., 1/3600]) * np.array(hms))
    ss['time'] = Time(datetime(int(year), month_map[month_name], int(day), hms[0], hms[1], hms[2]))
    
    rows.append(ss)
    
more_data = Table(rows)
more_data['time'] = Time(more_data['time'])
more_data['month'] = [x.datetime.month for x in more_data['time']]

In [None]:
df.columns

In [None]:
np.sum((np.cumsum(df['name'].value_counts()) / len(df)) < 0.8)

In [None]:
fig = plt.figure(figsize=(8,6))

plt.plot(np.array((np.cumsum(df['name'].value_counts()) / len(df))), 
         marker='', linestyle='-', lw=2)

plt.xscale('log')
plt.xlabel('number of contributors')
plt.ylabel('cum. fraction of commits')

fig.set_facecolor('w')

In [None]:
edges = [Time(datetime.datetime(2011+i, 1, 1)) for i in range(8)]

In [None]:
fig = plt.figure(figsize=(8,6))

frac_85 = []
frac_50 = []
date_range = []
for l, r in zip(edges[:-1], edges[1:]):
    idx = (more_data['time'] > l) & (more_data['time'] < r)
    
    label = '{0}–{1}'.format(l.datetime.year, r.datetime.year)
    
    sub_df = df[idx]
    plt.plot(np.array((np.cumsum(sub_df['name'].value_counts()) / len(sub_df))), 
             marker='', linestyle='-', lw=2, label=label)
    
    frac_50.append(np.sum((np.cumsum(sub_df['name'].value_counts()) / len(sub_df)) <= 0.50))
    frac_85.append(np.sum((np.cumsum(sub_df['name'].value_counts()) / len(sub_df)) <= 0.85))
    date_range.append(str(l.datetime.year))

plt.legend(loc='lower right', fontsize=18)
plt.xscale('log')
plt.xlabel('number of contributors')
plt.ylabel('cum. fraction of commits')

fig.set_facecolor('w')

In [None]:
fig,ax = plt.subplots(1,1,figsize=(8,6))

ax.plot(frac_50, marker='o', label=r'$\leq$50% commits')
ax.plot(frac_85, marker='o', label=r'$\leq$85% commits')

ax.set_xticks(list(range(len(date_range))))
ax.set_xticklabels(date_range);
ax.set_yticks(list(range(20+1)))
ax.set_ylim(0, 20)

ax.xaxis.set_tick_params(top='on')
ax.yaxis.set_tick_params(right='on')

ax.legend(loc='upper left', fontsize=20)
fig.set_facecolor('w')