# Looking into `revision.csv`

Read in data from `revision.csv`. 
 * `date` column - convert from unix timestamp to datetime. 
 * `message` column - convert from hex to ascii.

In [1]:
import pandas as pd
import time
import numpy as np

def convert_hex(message):
    if not isinstance(message, float):
        return bytes.fromhex(message).decode(encoding="ISO-8859-1")
    
def convert_date(unixTS):
    return time.strftime("%m-%d-%Y %H:%M:%S", time.localtime(int(unixTS[:-3])))

revisions = pd.read_csv("../../revision.csv", converters={"date":convert_date,"message":convert_hex})
revisions["date"] = pd.to_datetime(revisions["date"], infer_datetime_format=True)

revisions

Unnamed: 0,id,date,message
0,02 ce e1 52 38 36 73 a9 73 e2 2d 37 7b 6a 72 1...,2009-07-13 17:24:28,"Make stdin for test scripts empty, so that tes..."
1,04 8b 6e 52 83 41 31 f9 27 76 9a 9b 65 0c ce 5...,2017-05-23 17:18:40,Update CONTRIBUTING.md\n\nFixing broken issues...
2,04 ea d6 c5 8f 91 39 a7 9e 0c 0f d9 b5 bc 74 4...,2005-01-20 11:07:47,sane timestamps by default\n\ngit-svn-id: svn:...
3,05 51 aa a2 44 f5 7d 2d cb ec d8 de 51 79 e9 3...,2015-04-07 14:32:36,"Merge ""Document ports creating configuration f..."
4,05 c6 18 7f aa 4b c1 46 43 94 2f 76 34 a3 0f 8...,2013-11-22 17:31:50,Fixed #21497 -- Forced conversion to bytes for...
...,...,...,...
5064052,a3 19 c1 66 60 09 a9 6d 65 31 aa fd 07 14 3b c...,2014-11-19 20:10:34,Merge pull request #809 from StackStorm/update...
5064053,a7 65 2b 76 8a 5d 2a 28 10 12 2e 37 9c d1 fc 4...,2016-10-09 13:04:55,Issue #28339: Remove ByteString.register(memor...
5064054,aa a8 b4 3d 1f 54 d2 6c bd ad de ef b8 69 84 c...,2014-07-31 10:35:24,Merge pull request #661 from pitrou/tests_prof...
5064055,ac 5d 1f 2d 2c d8 3d 11 60 45 79 70 36 3c ea 0...,2014-04-07 05:45:04,Updated openstack/openstack\nProject: openstac...


### Looking at the first 10 entries.

In [2]:
for index, row in revisions.head(10).iterrows():
    print(index, "id:", row['id'], "\ndate:", row['date'], "\nmessage:", row['message'])
    print("*******************************************************************************************************")

0 id: 02 ce e1 52 38 36 73 a9 73 e2 2d 37 7b 6a 72 1a 70 68 2a 71 
date: 2009-07-13 17:24:28 
message: Make stdin for test scripts empty, so that tests don't accidentally hang waiting
for stdin.


git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@75506 91177308-0d34-0410-b5e6-96231b3b80d8

*******************************************************************************************************
1 id: 04 8b 6e 52 83 41 31 f9 27 76 9a 9b 65 0c ce 5b 19 a0 80 ac 
date: 2017-05-23 17:18:40 
message: Update CONTRIBUTING.md

Fixing broken issues link.
*******************************************************************************************************
2 id: 04 ea d6 c5 8f 91 39 a7 9e 0c 0f d9 b5 bc 74 4c 1e db 17 f6 
date: 2005-01-20 11:07:47 
message: sane timestamps by default

git-svn-id: svn://svn.twistedmatrix.com/svn/Twisted/trunk@12890 bbbe8e31-12d6-0310-92fd-ac37d47ddeeb

*******************************************************************************************************
3 id

### Which days and time has the most commits?
* Top 20 listed below.

In [3]:
revisions.date.value_counts().head(20)

2017-05-23 17:18:40    262161
2017-10-23 16:49:07    262152
2016-04-08 06:44:50       216
2004-08-17 11:34:28       156
2014-06-02 05:05:46       148
2015-12-23 11:26:14       145
2004-01-29 19:01:24       144
2018-05-13 10:36:58       137
2014-06-20 15:00:00       130
2016-12-09 00:03:32       114
2005-05-03 05:13:17       112
2016-08-30 06:22:36       109
2015-11-23 22:40:29       102
2016-12-08 23:45:40        99
2015-06-17 10:50:10        98
2016-06-15 20:20:40        97
2015-12-02 04:28:43        96
2015-06-18 08:05:21        94
2016-10-29 11:18:00        92
2016-10-12 19:05:17        91
Name: date, dtype: int64

### List the count of each commit down to the milisecond of each day.

In [4]:
revisions.groupby("date").date.agg(["count"]);

### List the number of commits each year.
* Note that years 2002 and 2013 do not have complete data.

In [5]:
revisions.groupby(revisions.date.dt.year)["date"].agg(["count"])

Unnamed: 0_level_0,count
date,Unnamed: 1_level_1
2002,19
2003,29244
2004,27638
2005,25241
2006,37533
2007,56921
2008,82258
2009,112824
2010,159198
2011,215333


### List number of commits by month and year for the last 5 years.

In [6]:
revisions.groupby([revisions.date.dt.year, revisions.date.dt.month])["date"].agg(["count"]).tail(59)

Unnamed: 0_level_0,Unnamed: 1_level_0,count
date,date,Unnamed: 2_level_1
2014,1,48200
2014,2,45404
2014,3,49234
2014,4,43893
2014,5,40031
2014,6,43870
2014,7,50766
2014,8,48062
2014,9,48201
2014,10,48052


In [12]:
#revisions.loc[revisions['id'] == '402f3d967c0a905ec5b9ca9c240783d3f2c15724']

Unnamed: 0,id,date,message


### Looking at commits between 10-1-2017 and 10-2-2017

In [40]:
revisions.loc[(revisions.date >= "2017-10-1") & (revisions.date <= "2017-10-2")]

Unnamed: 0,id,date,message
483,cd b1 eb f6 66 9a b9 58 38 11 2a 91 05 d1 27 5...,2017-10-01 18:02:06,pep8 fixes\n
2723,40 f8 52 20 07 25 d3 e9 a8 d2 ca 3c ee 3e a6 0...,2017-10-01 11:39:35,Changed the daemon check approach in other pla...
4977,03 f8 73 a1 d7 dd 63 67 db b0 93 76 77 42 ff d...,2017-10-01 18:01:55,Add an option to spawn vcsim in esx mode\n
6226,36 04 3b 95 30 57 2e 03 77 fa 94 cb e7 43 b3 f...,2017-10-01 02:25:06,Enhancement for distributed sparse linear regr...
8042,72 b7 5c 90 e5 48 65 3c c1 33 66 cd b9 7b 30 c...,2017-10-01 23:58:34,Merge pull request #9259 from QuLogic/whatsnew...
...,...,...,...
5030317,df a4 ba 96 7c 9d 1e 23 4b 65 24 a0 83 7b 5f 9...,2017-10-01 04:35:42,Skip Jupyter PGO test on Windows - not current...
5033272,87 ec 30 4e 12 c8 4c dd 15 cf 2c ea ce a4 cd f...,2017-10-01 23:54:17,little note showing required version\n
5035414,c0 2f ad bc cb d3 ed f9 30 24 4e 0a ab 5c 21 b...,2017-10-01 11:35:08,corrects requirements\n
5049348,e9 80 71 5b fa 95 09 58 64 81 43 aa e7 82 8b a...,2017-10-01 14:43:48,[X86][SSE] Add faux shuffle combining support ...


In [22]:
for index, row in revisions.loc[(revisions.date >= "2017-10-1") & (revisions.date <= "2017-10-2")][400:500].iterrows():
    print(index, "id:", row['id'], "\ndate:", row['date'], "\nmessage:", row['message'])
    print("*******************************************************************************************************")

2095567 id: 51 1e 6b bc 30 4f 3c 55 aa 0c 03 52 a3 9d 02 a0 de fc 5c 94 
date: 2017-10-01 15:08:30 
message: Make get_prog return the actual executable name used

*******************************************************************************************************
2096293 id: 03 e6 56 79 af 71 78 4a ce 3e a5 b0 f0 9d 3b 8c 79 79 4a 54 
date: 2017-10-01 21:28:26 
message: Concluded test and added fallback return value

*******************************************************************************************************
2100518 id: ba 99 e4 4b 42 3f 64 22 7b 94 e0 f2 01 c4 c3 40 a9 89 dc 11 
date: 2017-10-01 07:19:34 
message: Catch AttributeError if regex is not found. (#30990)


*******************************************************************************************************
2107579 id: 39 e3 d6 44 2c a5 2b 88 56 5e b3 e2 0f 39 0f 67 9c d6 26 6f 
date: 2017-10-01 10:30:30 
message: # commit çæ¬å·ä¿®æ¹

*********************************************************************

### Looking for commit messages containing "CVE-2017"

In [34]:
revisions.loc[(revisions.message.str.contains("CVE-2017"))]

Unnamed: 0,id,date,message
24721,bf 6b 9e 94 44 56 10 a3 d8 4c f9 52 10 32 fa b...,2017-08-07 10:03:42,Don't allow logging in with empty password.\n\...
160072,58 e0 8e 80 e3 62 db 79 eb 0f d7 75 dc 81 fa a...,2017-08-02 16:22:35,[1.10.x] Fixed CVE-2017-12794 -- Fixed XSS pos...
162353,1c dd 45 1d 70 3a 9c 1b 0a 39 5d 81 da ca db f...,2017-01-22 23:22:40,streams: Fix autosubscribe security bug (CVE-2...
166300,ed 56 f5 1f 18 5a 1f fd 7e a5 71 30 d2 60 09 8...,2017-05-08 11:37:10,Fixing security issue with lookup returns not ...
204314,d6 98 4d d8 82 ff 81 d1 ad 71 ea bb 0d 0a 4b 3...,2017-02-02 04:29:56,# This is a combination of 1 commit.\n# This i...
...,...,...,...
4569068,ea b8 63 ac cc 05 0d 95 7f 6a ed 85 9c 15 cf 8...,2017-03-21 19:28:11,Update notes for submitted changes\n\n* Add re...
4683287,0b 07 4f 5c 16 6f 09 1f d0 bd 62 cc 43 30 04 7...,2017-01-26 09:55:39,OSSA-2017-001 (CVE-2017-2592)\n\nCatchErrors l...
4830787,14 21 7a 35 d7 7d e1 10 69 73 d4 a5 fd 7e 4e 7...,2017-09-20 10:17:00,Add exploits for CVE-2017-8225 and preauth RCE...
4916550,fd 86 14 c5 c5 46 6a 14 a9 45 db 5b 05 9c 10 c...,2017-12-08 16:34:12,bpo-30657: Fix CVE-2017-1000158 (#4664)\n\nFix...


In [35]:
for index, row in revisions.loc[(revisions.message.str.contains("CVE-2017"))].iterrows():
    print(index, "id:", row['id'], "\ndate:", row['date'], "\nmessage:", row['message'])
    print("*******************************************************************************************************")

24721 id: bf 6b 9e 94 44 56 10 a3 d8 4c f9 52 10 32 fa b9 93 f9 6f d6 
date: 2017-08-07 10:03:42 
message: Don't allow logging in with empty password.

Some authentication methods allowed it, others did not. In the client-side,
libpq does not even try to authenticate with an empty password, which makes
using empty passwords hazardous: an administrator might think that an
account with an empty password cannot be used to log in, because psql
doesn't allow it, and not realize that a different client would in fact
allow it. To clear that confusion and to be be consistent, disallow empty
passwords in all authentication methods.

All the authentication methods that used plaintext authentication over the
wire, except for BSD authentication, already checked that the password
received from the user was not empty. To avoid forgetting it in the future
again, move the check to the recv_password_packet function. That only
forbids using an empty password with plaintext authentication, however.
MD5 a

### Narrowing down commits that fix known security bugs, specifically CVE's

In [57]:
revisions.loc[(revisions.message.str.contains("This is a security fix")) & (revisions.message.str.contains("CVE-"))]

Unnamed: 0,id,date,message
104039,48 44 d8 6c 77 28 c1 a5 a3 bb ce 4a d3 36 a8 d...,2016-10-24 15:22:00,[1.9.x] Fixed CVE-2016-9013 -- Generated a ran...
153121,45 ac d6 d8 36 89 5a 4c 36 57 5f 48 b3 fb 36 a...,2016-10-17 12:14:49,[1.9.x] Fixed CVE-2016-9014 -- Validated Host ...
156203,67 b4 6b a7 01 6d a2 d2 59 c1 ec c7 d6 66 d1 1...,2016-02-13 15:09:46,Fixed CVE-2016-2513 -- Fixed user enumeration ...
160072,58 e0 8e 80 e3 62 db 79 eb 0f d7 75 dc 81 fa a...,2017-08-02 16:22:35,[1.10.x] Fixed CVE-2017-12794 -- Fixed XSS pos...
279920,2a 9f 6e f7 1b 8e 23 fd 26 7e e2 be 1b e2 6d d...,2017-03-14 12:33:15,[1.10.x] Fixed CVE-2017-7234 -- Fixed open red...
557294,d1 bc 98 0d b1 c0 ff fd 6d 60 67 7e 62 f7 0b e...,2016-03-11 21:36:08,[1.9.x] Fixed CVE-2016-7401 -- Fixed CSRF prot...
642460,61 18 ab 7d 06 76 f0 d6 22 27 8e 5b e2 15 f1 4...,2016-03-11 21:36:08,[1.8.x] Fixed CVE-2016-7401 -- Fixed CSRF prot...
707010,4e ec 95 4e 2a d3 30 d7 cd 44 29 45 0f 2d 49 b...,2014-12-03 16:14:00,[1.5.x] Fixed is_safe_url() to handle leading ...
763801,57 b9 5f ed ad 5e 0b 83 fc 9c 81 46 6b 7d 17 5...,2018-01-23 13:20:18,[1.11.x] Fixed CVE-2018-6188 -- Fixed informat...
854637,c4 01 ae 9a 7d fb 1a 94 a8 a6 19 27 ed 54 1d 6...,2016-10-17 12:14:49,[1.8.x] Fixed CVE-2016-9014 -- Validated Host ...


In [39]:
for index, row in revisions.loc[(revisions.message.str.contains("This is a security fix")) & (revisions.message.str.contains("CVE-"))].iterrows():
    print(index, "id:", row['id'], "\ndate:", row['date'], "\nmessage:", row['message'])
    print("*******************************************************************************************************")

104039 id: 48 44 d8 6c 77 28 c1 a5 a3 bb ce 4a d3 36 a8 d3 23 04 07 2b 
date: 2016-10-24 15:22:00 
message: [1.9.x] Fixed CVE-2016-9013 -- Generated a random database user password when running tests on Oracle.

This is a security fix.

*******************************************************************************************************
153121 id: 45 ac d6 d8 36 89 5a 4c 36 57 5f 48 b3 fb 36 a3 da e9 8d 19 
date: 2016-10-17 12:14:49 
message: [1.9.x] Fixed CVE-2016-9014 -- Validated Host header when DEBUG=True.

This is a security fix.

*******************************************************************************************************
156203 id: 67 b4 6b a7 01 6d a2 d2 59 c1 ec c7 d6 66 d1 1f 5e 1c fa ab 
date: 2016-02-13 15:09:46 
message: Fixed CVE-2016-2513 -- Fixed user enumeration timing attack during login.

This is a security fix.

*******************************************************************************************************
160072 id: 58 e0 8e 80 e3 62 db 79 eb 0

In [49]:
revisions.loc[(revisions.message.str.contains("Fixed CVE-"))].count()

id         39
date       39
message    39
dtype: int64

In [45]:
for index, row in revisions.loc[(revisions.message.str.contains("Fixed CVE-"))].iterrows():
    print(index, "id:", row['id'], "\ndate:", row['date'], "\nmessage:", row['message'])
    print("*******************************************************************************************************")

104039 id: 48 44 d8 6c 77 28 c1 a5 a3 bb ce 4a d3 36 a8 d3 23 04 07 2b 
date: 2016-10-24 15:22:00 
message: [1.9.x] Fixed CVE-2016-9013 -- Generated a random database user password when running tests on Oracle.

This is a security fix.

*******************************************************************************************************
153121 id: 45 ac d6 d8 36 89 5a 4c 36 57 5f 48 b3 fb 36 a3 da e9 8d 19 
date: 2016-10-17 12:14:49 
message: [1.9.x] Fixed CVE-2016-9014 -- Validated Host header when DEBUG=True.

This is a security fix.

*******************************************************************************************************
156203 id: 67 b4 6b a7 01 6d a2 d2 59 c1 ec c7 d6 66 d1 1f 5e 1c fa ab 
date: 2016-02-13 15:09:46 
message: Fixed CVE-2016-2513 -- Fixed user enumeration timing attack during login.

This is a security fix.

*******************************************************************************************************
160072 id: 58 e0 8e 80 e3 62 db 79 eb 0

In [55]:
revisions.loc[(revisions.message.str.contains("Fixed CVE-")) | (revisions.message.str.contains("fixed CVE-")) | (revisions.message.str.contains("Fix CVE-")) | (revisions.message.str.contains("fix CVE-"))]

Unnamed: 0,id,date,message
104039,48 44 d8 6c 77 28 c1 a5 a3 bb ce 4a d3 36 a8 d...,2016-10-24 15:22:00,[1.9.x] Fixed CVE-2016-9013 -- Generated a ran...
153121,45 ac d6 d8 36 89 5a 4c 36 57 5f 48 b3 fb 36 a...,2016-10-17 12:14:49,[1.9.x] Fixed CVE-2016-9014 -- Validated Host ...
156203,67 b4 6b a7 01 6d a2 d2 59 c1 ec c7 d6 66 d1 1...,2016-02-13 15:09:46,Fixed CVE-2016-2513 -- Fixed user enumeration ...
160072,58 e0 8e 80 e3 62 db 79 eb 0f d7 75 dc 81 fa a...,2017-08-02 16:22:35,[1.10.x] Fixed CVE-2017-12794 -- Fixed XSS pos...
279920,2a 9f 6e f7 1b 8e 23 fd 26 7e e2 be 1b e2 6d d...,2017-03-14 12:33:15,[1.10.x] Fixed CVE-2017-7234 -- Fixed open red...
...,...,...,...
4930316,89 42 b9 2c 7c b5 fa 14 4b d7 9b 76 07 b4 59 d...,2014-05-16 08:00:45,Fix CVE-2014-0221\n\nUnnecessary recursion whe...
4940834,d2 1e b7 a9 a4 7e 2f 4c b6 8c f5 f5 2f 6b 50 b...,2013-08-16 18:54:47,Issue #18709: Fix CVE-2013-4238. The SSL modul...
4947826,c4 e5 ff 7f db 5f ce 44 76 75 e9 02 91 fd 33 f...,2018-07-24 16:18:17,[2.1.x] Fixed CVE-2018-14574 -- Fixed open red...
4961740,d0 6e dd d1 5c c9 40 23 a9 a3 fe df 7b 1c 28 a...,2014-06-20 15:00:00,Fix CVE-2014-3470\n\nCheck session_cert is not...


### 83 commit messages containing: "Fixed CVE-", "fixed CVE-", "Fix CVE-", "fix CVE-"

In [58]:
for index, row in revisions.loc[(revisions.message.str.contains("Fixed CVE-")) | (revisions.message.str.contains("fixed CVE-")) | (revisions.message.str.contains("Fix CVE-")) | (revisions.message.str.contains("fix CVE-"))].iterrows():
    print(index, "id:", row['id'], "\ndate:", row['date'], "\nmessage:", row['message'])
    print("*******************************************************************************************************")

104039 id: 48 44 d8 6c 77 28 c1 a5 a3 bb ce 4a d3 36 a8 d3 23 04 07 2b 
date: 2016-10-24 15:22:00 
message: [1.9.x] Fixed CVE-2016-9013 -- Generated a random database user password when running tests on Oracle.

This is a security fix.

*******************************************************************************************************
153121 id: 45 ac d6 d8 36 89 5a 4c 36 57 5f 48 b3 fb 36 a3 da e9 8d 19 
date: 2016-10-17 12:14:49 
message: [1.9.x] Fixed CVE-2016-9014 -- Validated Host header when DEBUG=True.

This is a security fix.

*******************************************************************************************************
156203 id: 67 b4 6b a7 01 6d a2 d2 59 c1 ec c7 d6 66 d1 1f 5e 1c fa ab 
date: 2016-02-13 15:09:46 
message: Fixed CVE-2016-2513 -- Fixed user enumeration timing attack during login.

This is a security fix.

*******************************************************************************************************
160072 id: 58 e0 8e 80 e3 62 db 79 eb 0