In [1]:
from datetime import datetime, timedelta
from elasticsearch import Elasticsearch
from pprint import pprint
from dateutil.relativedelta import relativedelta
from calendar import monthrange, month_name
from collections import defaultdict, OrderedDict

es = Elasticsearch("localhost:9200")

In [2]:
def get_all_commit_records(index=None, custom_source=False):
    temp_res = es.search(index=index, body={"query":{"match_all":{}}})
    size = temp_res["hits"]["total"]
    query = {
                "size":size, 
                "query":
                        {
                            "match_all":{}
                        }
    }
    if custom_source:
        query["_source"] = ["author_name", "committer_name", "commit_date", "is_git_commit", "lines_added", 
                            "lines_changed", "lines_removed", "utc_commit", "grimoire_creation_date"]
        
    res = es.search(index=index, body=query)
    return [res["hits"]["hits"][i]["_source"] for i in range(res["hits"]["total"])]

In [3]:
result = get_all_commit_records("aima_python_git", True)

In [4]:
result[0]

{'author_name': 'spottedMetal',
 'commit_date': '2007-07-13T21:12:24',
 'committer_name': 'spottedMetal',
 'grimoire_creation_date': '2007-07-13T21:12:24+00:00',
 'is_git_commit': 1,
 'lines_added': 181,
 'lines_changed': 247,
 'lines_removed': 66,
 'utc_commit': '2007-07-13T21:12:24'}

In [5]:
def parse_date(date):
    return datetime.strptime(date, "%Y-%m-%dT%H:%M:%S")

In [6]:
def get_end_date_of_month(date):
    return date + relativedelta(days = +(monthrange(date.year, date.month)[1] - date.day))

In [7]:
def get_start_date_of_month(date):
    return date - relativedelta(days = +date.day-1)

In [8]:
def get_bucket_name(date):
    return month_name[date.month] + str(date.year)

In [9]:
def get_extreme_commits_dates(commit_list):
    fc_date = min(parse_date(item['commit_date']) for item in result)
    lc_date = max(parse_date(item['commit_date']) for item in result)
    return fc_date, lc_date

In [10]:
def make_buckets(commit_list):
    buckets = OrderedDict()
    
    first_date, last_date = get_extreme_commits_dates(commit_list)
    
    month_start_date = get_start_date_of_month(first_date)
    month_end_date = get_end_date_of_month(first_date)
    bucket_name = get_bucket_name(first_date)
    
    while month_end_date <= last_date:
        commit = {}
        commit["start"] = month_start_date
        commit["end"] = month_end_date
        commit['new_committers'] = defaultdict(int)
        commit['old_committers'] = defaultdict(int)
        commit['commits'] = []
        buckets[bucket_name] = commit
        
        month_start_date = month_end_date + relativedelta(days=+1)
        month_end_date = get_end_date_of_month(month_start_date)
        bucket_name = get_bucket_name(month_start_date)
    
    commit = {}
    commit["start"] = month_start_date
    commit["end"] = month_end_date
    commit['new_committers'] = defaultdict(int)
    commit['old_committers'] = defaultdict(int)
    commit['commits'] = []
    buckets[bucket_name] = commit
    
    return buckets

In [14]:
def distribute_commits_in_buckets(commit_list):
    months = make_buckets(commit_list)
    
    for commit in commit_list:
        month = get_bucket_name(parse_date(commit['commit_date']))
        months[month]["commits"].append(commit)
    
    return months

In [19]:
for item in distribute_commits_in_buckets(result).items():
    pprint(item)

('June2007',
 {'commits': [{'author_name': 'peter.norvig',
               'commit_date': '2007-06-13T16:34:47',
               'committer_name': 'peter.norvig',
               'grimoire_creation_date': '2007-06-13T16:34:47+00:00',
               'is_git_commit': 1,
               'lines_added': 5654,
               'lines_changed': 5654,
               'lines_removed': 0,
               'utc_commit': '2007-06-13T16:34:47'}],
  'end': datetime.datetime(2007, 6, 30, 16, 34, 47),
  'new_committers': defaultdict(<class 'int'>, {}),
  'old_committers': defaultdict(<class 'int'>, {}),
  'start': datetime.datetime(2007, 6, 1, 16, 34, 47)})
('July2007',
 {'commits': [{'author_name': 'spottedMetal',
               'commit_date': '2007-07-13T21:12:24',
               'committer_name': 'spottedMetal',
               'grimoire_creation_date': '2007-07-13T21:12:24+00:00',
               'is_git_commit': 1,
               'lines_added': 181,
               'lines_changed': 247,
               'lines

  'start': datetime.datetime(2009, 12, 1, 16, 34, 47)})
('January2010',
 {'commits': [],
  'end': datetime.datetime(2010, 1, 31, 16, 34, 47),
  'new_committers': defaultdict(<class 'int'>, {}),
  'old_committers': defaultdict(<class 'int'>, {}),
  'start': datetime.datetime(2010, 1, 1, 16, 34, 47)})
('February2010',
 {'commits': [],
  'end': datetime.datetime(2010, 2, 28, 16, 34, 47),
  'new_committers': defaultdict(<class 'int'>, {}),
  'old_committers': defaultdict(<class 'int'>, {}),
  'start': datetime.datetime(2010, 2, 1, 16, 34, 47)})
('March2010',
 {'commits': [],
  'end': datetime.datetime(2010, 3, 31, 16, 34, 47),
  'new_committers': defaultdict(<class 'int'>, {}),
  'old_committers': defaultdict(<class 'int'>, {}),
  'start': datetime.datetime(2010, 3, 1, 16, 34, 47)})
('April2010',
 {'commits': [],
  'end': datetime.datetime(2010, 4, 30, 16, 34, 47),
  'new_committers': defaultdict(<class 'int'>, {}),
  'old_committers': defaultdict(<class 'int'>, {}),
  'start': datetime.da

               'lines_changed': 4,
               'lines_removed': 2,
               'utc_commit': '2011-09-15T01:27:10'},
              {'author_name': 'withal',
               'commit_date': '2011-09-27T05:07:27',
               'committer_name': 'withal',
               'grimoire_creation_date': '2011-09-27T05:07:27+00:00',
               'is_git_commit': 1,
               'lines_added': 13,
               'lines_changed': 19,
               'lines_removed': 6,
               'utc_commit': '2011-09-27T05:07:27'},
              {'author_name': 'withal',
               'commit_date': '2011-09-01T20:14:10',
               'committer_name': 'withal',
               'grimoire_creation_date': '2011-09-01T20:14:10+00:00',
               'is_git_commit': 1,
               'lines_added': 6,
               'lines_changed': 6,
               'lines_removed': 0,
               'utc_commit': '2011-09-01T20:14:10'},
              {'author_name': 'withal',
               'commit_date': '2011-09-02

               'lines_changed': 1,
               'lines_removed': 0,
               'utc_commit': '2011-10-12T18:05:53'},
              {'author_name': 'withal',
               'commit_date': '2011-10-13T19:30:49',
               'committer_name': 'withal',
               'grimoire_creation_date': '2011-10-13T19:30:49+00:00',
               'is_git_commit': 1,
               'lines_added': 16,
               'lines_changed': 36,
               'lines_removed': 20,
               'utc_commit': '2011-10-13T19:30:49'},
              {'author_name': 'withal',
               'commit_date': '2011-10-23T07:15:45',
               'committer_name': 'withal',
               'grimoire_creation_date': '2011-10-23T07:15:45+00:00',
               'is_git_commit': 1,
               'lines_added': 1,
               'lines_changed': 2,
               'lines_removed': 1,
               'utc_commit': '2011-10-23T07:15:45'},
              {'author_name': 'withal',
               'commit_date': '2011-10-2

              {'author_name': 'withal',
               'commit_date': '2011-10-29T03:34:21',
               'committer_name': 'withal',
               'grimoire_creation_date': '2011-10-29T03:34:21+00:00',
               'is_git_commit': 1,
               'lines_added': 101,
               'lines_changed': 144,
               'lines_removed': 43,
               'utc_commit': '2011-10-29T03:34:21'},
              {'author_name': 'withal',
               'commit_date': '2011-10-29T20:05:31',
               'committer_name': 'withal',
               'grimoire_creation_date': '2011-10-29T20:05:31+00:00',
               'is_git_commit': 1,
               'lines_added': 25,
               'lines_changed': 48,
               'lines_removed': 23,
               'utc_commit': '2011-10-29T20:05:31'},
              {'author_name': 'withal',
               'commit_date': '2011-10-30T06:48:21',
               'committer_name': 'withal',
               'grimoire_creation_date': '2011-10-30T06:48:21+

              {'author_name': 'withal',
               'commit_date': '2011-11-14T00:08:51',
               'committer_name': 'withal',
               'grimoire_creation_date': '2011-11-14T00:08:51+00:00',
               'is_git_commit': 1,
               'lines_added': 17,
               'lines_changed': 35,
               'lines_removed': 18,
               'utc_commit': '2011-11-14T00:08:51'}],
  'end': datetime.datetime(2011, 11, 30, 16, 34, 47),
  'new_committers': defaultdict(<class 'int'>, {}),
  'old_committers': defaultdict(<class 'int'>, {}),
  'start': datetime.datetime(2011, 11, 1, 16, 34, 47)})
('December2011',
 {'commits': [],
  'end': datetime.datetime(2011, 12, 31, 16, 34, 47),
  'new_committers': defaultdict(<class 'int'>, {}),
  'old_committers': defaultdict(<class 'int'>, {}),
  'start': datetime.datetime(2011, 12, 1, 16, 34, 47)})
('January2012',
 {'commits': [],
  'end': datetime.datetime(2012, 1, 31, 16, 34, 47),
  'new_committers': defaultdict(<class 'int'>, {}),

 {'commits': [{'author_name': 'norvig',
               'commit_date': '2016-03-01T11:42:42',
               'committer_name': 'norvig',
               'grimoire_creation_date': '2016-03-01T11:42:42-08:00',
               'is_git_commit': 1,
               'lines_added': 25,
               'lines_changed': 28,
               'lines_removed': 3,
               'utc_commit': '2016-03-01T19:42:42'},
              {'author_name': 'greyshadows',
               'commit_date': '2016-03-04T05:18:39',
               'committer_name': 'greyshadows',
               'grimoire_creation_date': '2016-03-04T05:18:39+05:30',
               'is_git_commit': 1,
               'lines_added': 1,
               'lines_changed': 2,
               'lines_removed': 1,
               'utc_commit': '2016-03-03T23:48:39'},
              {'author_name': 'norvig',
               'commit_date': '2016-03-04T14:38:56',
               'committer_name': 'norvig',
               'grimoire_creation_date': '2016-03-04T14:38

               'commit_date': '2016-03-19T21:04:19',
               'committer_name': 'SnShine',
               'grimoire_creation_date': '2016-03-19T21:04:19+05:30',
               'is_git_commit': 1,
               'lines_added': 18,
               'lines_changed': 36,
               'lines_removed': 18,
               'utc_commit': '2016-03-19T15:34:19'},
              {'author_name': 'Chipe1',
               'commit_date': '2016-03-20T22:43:16',
               'committer_name': 'Chipe1',
               'grimoire_creation_date': '2016-03-20T22:43:16+05:30',
               'is_git_commit': 1,
               'lines_added': 664,
               'lines_changed': 666,
               'lines_removed': 2,
               'utc_commit': '2016-03-20T17:13:16'},
              {'author_name': 'Peter Norvig',
               'commit_date': '2016-03-20T21:32:03',
               'committer_name': 'Peter Norvig',
               'grimoire_creation_date': '2016-03-20T21:32:03-07:00',
               'is_g

               'lines_added': 0,
               'lines_changed': 0,
               'lines_removed': 0,
               'utc_commit': '2016-03-13T11:18:53'},
              {'author_name': 'norvig',
               'commit_date': '2016-03-13T12:30:45',
               'committer_name': 'norvig',
               'grimoire_creation_date': '2016-03-13T12:30:45-07:00',
               'is_git_commit': 1,
               'lines_added': 1,
               'lines_changed': 46,
               'lines_removed': 45,
               'utc_commit': '2016-03-13T19:30:45'},
              {'author_name': 'norvig',
               'commit_date': '2016-03-13T12:35:45',
               'committer_name': 'norvig',
               'grimoire_creation_date': '2016-03-13T12:35:45-07:00',
               'is_git_commit': 1,
               'lines_added': 9,
               'lines_changed': 9,
               'lines_removed': 0,
               'utc_commit': '2016-03-13T19:35:45'},
              {'author_name': 'Chirag Vartak',
 

               'utc_commit': '2016-03-13T19:27:24'},
              {'author_name': 'Chirag Vartak',
               'commit_date': '2016-03-14T01:26:23',
               'committer_name': 'Chirag Vartak',
               'grimoire_creation_date': '2016-03-14T01:26:23+05:30',
               'is_git_commit': 1,
               'lines_added': 836,
               'lines_changed': 839,
               'lines_removed': 3,
               'utc_commit': '2016-03-13T19:56:23'},
              {'author_name': 'norvig',
               'commit_date': '2016-03-13T18:11:03',
               'committer_name': 'norvig',
               'grimoire_creation_date': '2016-03-13T18:11:03-07:00',
               'is_git_commit': 1,
               'lines_added': 0,
               'lines_changed': 0,
               'lines_removed': 0,
               'utc_commit': '2016-03-14T01:11:03'},
              {'author_name': 'Chirag Vartak',
               'commit_date': '2016-03-14T17:33:02',
               'committer_name': 'C

               'lines_changed': 1,
               'lines_removed': 0,
               'utc_commit': '2016-03-14T01:12:41'},
              {'author_name': 'norvig',
               'commit_date': '2016-03-15T13:51:42',
               'committer_name': 'norvig',
               'grimoire_creation_date': '2016-03-15T13:51:42-07:00',
               'is_git_commit': 1,
               'lines_added': 0,
               'lines_changed': 0,
               'lines_removed': 0,
               'utc_commit': '2016-03-15T20:51:42'},
              {'author_name': 'norvig',
               'commit_date': '2016-03-15T16:44:38',
               'committer_name': 'norvig',
               'grimoire_creation_date': '2016-03-15T16:44:38-07:00',
               'is_git_commit': 1,
               'lines_added': 1,
               'lines_changed': 2,
               'lines_removed': 1,
               'utc_commit': '2016-03-15T23:44:38'},
              {'author_name': 'Chipe1',
               'commit_date': '2016-03-16T2

               'lines_added': 1,
               'lines_changed': 3,
               'lines_removed': 2,
               'utc_commit': '2016-03-13T12:02:14'},
              {'author_name': 'norvig',
               'commit_date': '2016-03-13T12:30:15',
               'committer_name': 'norvig',
               'grimoire_creation_date': '2016-03-13T12:30:15-07:00',
               'is_git_commit': 1,
               'lines_added': 23,
               'lines_changed': 23,
               'lines_removed': 0,
               'utc_commit': '2016-03-13T19:30:15'},
              {'author_name': 'Chirag Vartak',
               'commit_date': '2016-03-14T04:57:34',
               'committer_name': 'Chirag Vartak',
               'grimoire_creation_date': '2016-03-14T04:57:34+05:30',
               'is_git_commit': 1,
               'lines_added': 1,
               'lines_changed': 2,
               'lines_removed': 1,
               'utc_commit': '2016-03-13T23:27:34'},
              {'author_name': 'nor

               'committer_name': 'SnShine',
               'grimoire_creation_date': '2016-04-01T10:45:41+05:30',
               'is_git_commit': 1,
               'lines_added': 16,
               'lines_changed': 32,
               'lines_removed': 16,
               'utc_commit': '2016-04-01T05:15:41'},
              {'author_name': 'Tarun Kumar Vangani',
               'commit_date': '2016-04-01T21:30:46',
               'committer_name': 'Peter Norvig',
               'grimoire_creation_date': '2016-04-02T10:00:46+05:30',
               'is_git_commit': 1,
               'lines_added': 155,
               'lines_changed': 155,
               'lines_removed': 0,
               'utc_commit': '2016-04-02T04:30:46'},
              {'author_name': 'Surya Teja Cheedella',
               'commit_date': '2016-04-04T12:00:10',
               'committer_name': 'Peter Norvig',
               'grimoire_creation_date': '2016-04-05T00:30:10+05:30',
               'is_git_commit': 1,
           

               'utc_commit': '2016-06-12T23:22:43'},
              {'author_name': 'SnShine',
               'commit_date': '2016-06-14T20:31:14',
               'committer_name': 'SnShine',
               'grimoire_creation_date': '2016-06-14T20:31:14+05:30',
               'is_git_commit': 1,
               'lines_added': 554,
               'lines_changed': 583,
               'lines_removed': 29,
               'utc_commit': '2016-06-14T15:01:14'},
              {'author_name': 'Tarun Kumar Vangani',
               'commit_date': '2016-06-30T22:13:00',
               'committer_name': 'Tarun Kumar Vangani',
               'grimoire_creation_date': '2016-06-30T22:13:00+05:30',
               'is_git_commit': 1,
               'lines_added': 141,
               'lines_changed': 145,
               'lines_removed': 4,
               'utc_commit': '2016-06-30T16:43:00'},
              {'author_name': 'Tarun Kumar Vangani',
               'commit_date': '2016-06-30T22:36:41',
          

              {'author_name': 'Tarun Kumar Vangani',
               'commit_date': '2016-08-06T20:31:21',
               'committer_name': 'Tarun Kumar Vangani',
               'grimoire_creation_date': '2016-08-06T20:31:21+05:30',
               'is_git_commit': 1,
               'lines_added': 9,
               'lines_changed': 12,
               'lines_removed': 3,
               'utc_commit': '2016-08-06T15:01:21'},
              {'author_name': 'Tarun Kumar Vangani',
               'commit_date': '2016-08-06T20:37:48',
               'committer_name': 'Tarun Kumar Vangani',
               'grimoire_creation_date': '2016-08-06T20:37:48+05:30',
               'is_git_commit': 1,
               'lines_added': 11,
               'lines_changed': 11,
               'lines_removed': 0,
               'utc_commit': '2016-08-06T15:07:48'},
              {'author_name': 'Rahul Patel',
               'commit_date': '2016-08-25T08:47:27',
               'committer_name': "Ciaran O'Reilly",
 

               'grimoire_creation_date': '2017-03-07T21:48:07+02:00',
               'is_git_commit': 1,
               'lines_added': 333,
               'lines_changed': 760,
               'lines_removed': 427,
               'utc_commit': '2017-03-07T19:48:07'},
              {'author_name': 'VladKha',
               'commit_date': '2017-03-18T01:10:00',
               'committer_name': 'Peter Norvig',
               'grimoire_creation_date': '2017-03-18T10:10:00+02:00',
               'is_git_commit': 1,
               'lines_added': 93,
               'lines_changed': 214,
               'lines_removed': 121,
               'utc_commit': '2017-03-18T08:10:00'},
              {'author_name': 'Antonis Maronikolakis',
               'commit_date': '2017-03-18T01:21:40',
               'committer_name': 'Peter Norvig',
               'grimoire_creation_date': '2017-03-18T10:21:40+02:00',
               'is_git_commit': 1,
               'lines_added': 12,
               'lines_change

               'utc_commit': '2017-05-24T05:14:31'},
              {'author_name': 'C.G.Vedant',
               'commit_date': '2017-05-23T22:16:16',
               'committer_name': 'Peter Norvig',
               'grimoire_creation_date': '2017-05-24T10:46:16+05:30',
               'is_git_commit': 1,
               'lines_added': 58,
               'lines_changed': 58,
               'lines_removed': 0,
               'utc_commit': '2017-05-24T05:16:16'},
              {'author_name': 'Antonis Maronikolakis',
               'commit_date': '2017-05-28T11:13:22',
               'committer_name': 'Peter Norvig',
               'grimoire_creation_date': '2017-05-28T21:13:22+03:00',
               'is_git_commit': 1,
               'lines_added': 527,
               'lines_changed': 991,
               'lines_removed': 464,
               'utc_commit': '2017-05-28T18:13:22'},
              {'author_name': 'Antonis Maronikolakis',
               'commit_date': '2017-05-30T21:20:28',
      

 {'commits': [{'author_name': 'Anthony Marakis',
               'commit_date': '2017-08-09T00:14:01',
               'committer_name': 'Peter Norvig',
               'grimoire_creation_date': '2017-08-09T10:14:01+03:00',
               'is_git_commit': 1,
               'lines_added': 159,
               'lines_changed': 165,
               'lines_removed': 6,
               'utc_commit': '2017-08-09T07:14:01'},
              {'author_name': 'Anthony Marakis',
               'commit_date': '2017-08-16T22:36:47',
               'committer_name': 'Peter Norvig',
               'grimoire_creation_date': '2017-08-17T08:36:47+03:00',
               'is_git_commit': 1,
               'lines_added': 279,
               'lines_changed': 323,
               'lines_removed': 44,
               'utc_commit': '2017-08-17T05:36:47'},
              {'author_name': 'Luis Martí',
               'commit_date': '2017-08-20T02:47:23',
               'committer_name': 'Peter Norvig',
               'grimo

              {'author_name': 'surya saini',
               'commit_date': '2018-01-10T18:54:53',
               'committer_name': 'Peter Norvig',
               'grimoire_creation_date': '2018-01-11T08:24:53+05:30',
               'is_git_commit': 1,
               'lines_added': 288,
               'lines_changed': 344,
               'lines_removed': 56,
               'utc_commit': '2018-01-11T02:54:53'},
              {'author_name': 'surya saini',
               'commit_date': '2018-01-25T22:13:54',
               'committer_name': 'Peter Norvig',
               'grimoire_creation_date': '2018-01-26T06:13:54+00:00',
               'is_git_commit': 1,
               'lines_added': 230,
               'lines_changed': 460,
               'lines_removed': 230,
               'utc_commit': '2018-01-26T06:13:54'},
              {'author_name': 'Rishav1',
               'commit_date': '2018-01-28T19:03:43',
               'committer_name': 'Peter Norvig',
               'grimoire_creat

In [22]:
def MAIN(commit_list):
    months = distribute_commits_in_buckets(commit_list)
    all_committers = []
    
    for name, month in months.items():
        for commit in month['commits']:
            committer = commit['committer_name']
            if committer in all_committers:
                month['old_committers'][committer] += 1
            else:
                month['new_committers'][committer] += 1
        all_committers = list(set(all_committers + list(month['old_committers'].keys()) + 
                                  list(month['new_committers'].keys())))
                
    return months

In [23]:
Output = MAIN(result)

In [24]:
Output

OrderedDict([('June2007',
              {'commits': [{'author_name': 'peter.norvig',
                 'commit_date': '2007-06-13T16:34:47',
                 'committer_name': 'peter.norvig',
                 'grimoire_creation_date': '2007-06-13T16:34:47+00:00',
                 'is_git_commit': 1,
                 'lines_added': 5654,
                 'lines_changed': 5654,
                 'lines_removed': 0,
                 'utc_commit': '2007-06-13T16:34:47'}],
               'end': datetime.datetime(2007, 6, 30, 16, 34, 47),
               'new_committers': defaultdict(int, {'peter.norvig': 1}),
               'old_committers': defaultdict(int, {}),
               'start': datetime.datetime(2007, 6, 1, 16, 34, 47)}),
             ('July2007',
              {'commits': [{'author_name': 'spottedMetal',
                 'commit_date': '2007-07-13T21:12:24',
                 'committer_name': 'spottedMetal',
                 'grimoire_creation_date': '2007-07-13T21:12:24+00:00',
    