### Extract additions, deletions, and commits of the last 52 weeks

In [1]:
import json
import time, datetime
import pandas as pd
import os
from dateutil import parser

In [5]:
stats = pd.read_csv('./data/df_stat_repo_main.csv', index_col='Unnamed: 0')

In [6]:
stats.head()

Unnamed: 0,owner,repo,a,c,d,date
0,jquery,jquery-ui,3946,7,3635,2008-05-18 00:00:00
1,jquery,jquery-ui,2512,42,1574,2008-05-25 00:00:00
2,jquery,jquery-ui,52853,83,49087,2008-06-01 00:00:00
3,jquery,jquery-ui,5406,41,1989,2008-06-08 00:00:00
4,jquery,jquery-ui,437,18,365,2008-06-15 00:00:00


In [7]:
# transform isodate into datetime
stats.date = stats.date.apply(lambda x : parser.parse(x))

In [8]:
stats52 = stats[stats['date'] >= '2015-09-20']

In [9]:
stats52.shape

(520, 6)

In [14]:
stats52['weekofyear'] = stats52.date.dt.weekofyear
stats52['year'] = stats52.date.dt.year

In [12]:
stats52.head()

Unnamed: 0,owner,repo,a,c,d,date,weekofyear
2011,IanLunn,Hover,0,0,0,2015-09-20,38
2012,JakeWharton,ViewPagerIndicator,0,0,0,2015-09-20,38
2013,carrierwaveuploader,carrierwave,0,0,0,2015-09-20,38
2014,eczarny,spectacle,1937,12,1469,2015-09-20,38
2015,inconshreveable,ngrok,0,0,0,2015-09-20,38


### Reconstruct the week of year

In [13]:
len(stats52['weekofyear'].unique())

52

In [15]:
stats52[stats52['year'] == 2015]['weekofyear'].unique()

array([38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52])

In [16]:
stats52[stats52['year'] == 2016]['weekofyear'].unique()

array([53,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
       34, 35, 36])

In [17]:
# weekofyear in 2015 - 37
# weekofyear in 2016 + 16
stats52['weekofyear'] = stats52['weekofyear'].apply(lambda i : i - 37 if i >= 38 else i + 16)

In [18]:
stats52.weekofyear.unique()

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
       18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,
       35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,
       52])

In [20]:
stats52.head()

Unnamed: 0,owner,repo,a,c,d,date,weekofyear,year
2011,IanLunn,Hover,0,0,0,2015-09-20,1,2015
2012,JakeWharton,ViewPagerIndicator,0,0,0,2015-09-20,1,2015
2013,carrierwaveuploader,carrierwave,0,0,0,2015-09-20,1,2015
2014,eczarny,spectacle,1937,12,1469,2015-09-20,1,2015
2015,inconshreveable,ngrok,0,0,0,2015-09-20,1,2015


In [37]:
stats52.columns = ['owner', 'repo', 'additions', 'commits', 'deletions', 'date', 'weekofyear', 'year']
stats52.drop(['date', 'year'], inplace=True, axis=1)

### Pivot additons, deletions, commits

In [73]:
additions52 = stats52[['repo', 'additions', 'weekofyear']].pivot(index='repo', columns='weekofyear', values='additions')

### Write out files

In [78]:
stats52.to_csv('./data/stats52_sample10.csv', index=False)