# Looking into `revision.csv`

Read in data from `revision.csv`. 
 * `date` column - convert from unix timestamp to datetime. 
 * `message` column - convert from hex to ascii.

In [1]:
import pandas as pd
import time
import numpy as np

def convert_hex(message):
    if not isinstance(message, float):
        return bytes.fromhex(message).decode(encoding="ISO-8859-1")
    
def convert_date(unixTS):
    return time.strftime("%m-%d-%Y %H:%M:%S", time.localtime(int(unixTS[:-3])))

revisions = pd.read_csv("../../revision.csv", converters={"date":convert_date,"message":convert_hex})
revisions["date"] = pd.to_datetime(revisions["date"], infer_datetime_format=True)

revisions

Unnamed: 0,id,date,message
0,02 ce e1 52 38 36 73 a9 73 e2 2d 37 7b 6a 72 1...,2009-07-13 17:24:28,"Make stdin for test scripts empty, so that tes..."
1,04 8b 6e 52 83 41 31 f9 27 76 9a 9b 65 0c ce 5...,2017-05-23 17:18:40,Update CONTRIBUTING.md\n\nFixing broken issues...
2,04 ea d6 c5 8f 91 39 a7 9e 0c 0f d9 b5 bc 74 4...,2005-01-20 11:07:47,sane timestamps by default\n\ngit-svn-id: svn:...
3,05 51 aa a2 44 f5 7d 2d cb ec d8 de 51 79 e9 3...,2015-04-07 14:32:36,"Merge ""Document ports creating configuration f..."
4,05 c6 18 7f aa 4b c1 46 43 94 2f 76 34 a3 0f 8...,2013-11-22 17:31:50,Fixed #21497 -- Forced conversion to bytes for...
...,...,...,...
5064052,a3 19 c1 66 60 09 a9 6d 65 31 aa fd 07 14 3b c...,2014-11-19 20:10:34,Merge pull request #809 from StackStorm/update...
5064053,a7 65 2b 76 8a 5d 2a 28 10 12 2e 37 9c d1 fc 4...,2016-10-09 13:04:55,Issue #28339: Remove ByteString.register(memor...
5064054,aa a8 b4 3d 1f 54 d2 6c bd ad de ef b8 69 84 c...,2014-07-31 10:35:24,Merge pull request #661 from pitrou/tests_prof...
5064055,ac 5d 1f 2d 2c d8 3d 11 60 45 79 70 36 3c ea 0...,2014-04-07 05:45:04,Updated openstack/openstack\nProject: openstac...


### Looking at the first 10 entries.

In [2]:
for index, row in revisions.head(10).iterrows():
    print(index, "id:", row['id'], "\ndate:", row['date'], "\nmessage:", row['message'])
    print("*******************************************************************************************************")

0 id: 02 ce e1 52 38 36 73 a9 73 e2 2d 37 7b 6a 72 1a 70 68 2a 71 
date: 2009-07-13 17:24:28 
message: Make stdin for test scripts empty, so that tests don't accidentally hang waiting
for stdin.


git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@75506 91177308-0d34-0410-b5e6-96231b3b80d8

*******************************************************************************************************
1 id: 04 8b 6e 52 83 41 31 f9 27 76 9a 9b 65 0c ce 5b 19 a0 80 ac 
date: 2017-05-23 17:18:40 
message: Update CONTRIBUTING.md

Fixing broken issues link.
*******************************************************************************************************
2 id: 04 ea d6 c5 8f 91 39 a7 9e 0c 0f d9 b5 bc 74 4c 1e db 17 f6 
date: 2005-01-20 11:07:47 
message: sane timestamps by default

git-svn-id: svn://svn.twistedmatrix.com/svn/Twisted/trunk@12890 bbbe8e31-12d6-0310-92fd-ac37d47ddeeb

*******************************************************************************************************
3 id

### Which days and time has the most commits?
* Top 20 listed below.

In [3]:
revisions.date.value_counts().head(20)

2017-05-23 17:18:40    262161
2017-10-23 16:49:07    262152
2016-04-08 06:44:50       216
2004-08-17 11:34:28       156
2014-06-02 05:05:46       148
2015-12-23 11:26:14       145
2004-01-29 19:01:24       144
2018-05-13 10:36:58       137
2014-06-20 15:00:00       130
2016-12-09 00:03:32       114
2005-05-03 05:13:17       112
2016-08-30 06:22:36       109
2015-11-23 22:40:29       102
2016-12-08 23:45:40        99
2015-06-17 10:50:10        98
2016-06-15 20:20:40        97
2015-12-02 04:28:43        96
2015-06-18 08:05:21        94
2016-10-29 11:18:00        92
2016-10-12 19:05:17        91
Name: date, dtype: int64

### List the count of each commit down to the milisecond of each day.

In [4]:
revisions.groupby("date").date.agg(["count"]);

### List the number of commits each year.
* Note that years 2002 and 2013 do not have complete data.

In [5]:
revisions.groupby(revisions.date.dt.year)["date"].agg(["count"])

Unnamed: 0_level_0,count
date,Unnamed: 1_level_1
2002,19
2003,29244
2004,27638
2005,25241
2006,37533
2007,56921
2008,82258
2009,112824
2010,159198
2011,215333


### List number of commits by month and year for the last 5 years.

In [6]:
revisions.groupby([revisions.date.dt.year, revisions.date.dt.month])["date"].agg(["count"]).tail(59)

Unnamed: 0_level_0,Unnamed: 1_level_0,count
date,date,Unnamed: 2_level_1
2014,1,48200
2014,2,45404
2014,3,49234
2014,4,43893
2014,5,40031
2014,6,43870
2014,7,50766
2014,8,48062
2014,9,48201
2014,10,48052
