In [1]:
from datetime import datetime, timedelta
from elasticsearch import Elasticsearch
from pprint import pprint
from dateutil.relativedelta import relativedelta
from calendar import monthrange, month_name

es = Elasticsearch("localhost:9200")

In [2]:
def get_all_commit_records(index=None, custom_source=False):
    temp_res = es.search(index=index, body={"query":{"match_all":{}}})
    size = temp_res["hits"]["total"]
    query = {
                "size":size, 
                "query":
                        {
                            "match_all":{}
                        }
    }
    if custom_source:
        query["_source"] = ["author_name", "committer_name", "commit_date", "is_git_commit", "lines_added", 
                            "lines_changed", "lines_removed", "utc_commit", "grimoire_creation_date"]
        
    res = es.search(index=index, body=query)
    return [res["hits"]["hits"][i]["_source"] for i in range(res["hits"]["total"])]

In [3]:
result = get_all_commit_records("aima_python_git", True)

In [4]:
def parse_date(date):
    return datetime.strptime(date, "%Y-%m-%dT%H:%M:%S")

In [5]:
def get_end_date_of_month(date):
    return date + relativedelta(days = +(monthrange(date.year, date.month)[1] - date.day))

In [6]:
def get_start_date_of_month(date):
    return date - relativedelta(days = +date.day-1)

In [7]:
original = parse_date(result[0]['commit_date'])
print(original)
print(original + relativedelta(days = +monthrange(original.year, original.month)[1] - original.day))
print(get_end_date_of_month(original))
print(get_start_date_of_month(original))

2007-07-13 21:12:24
2007-07-31 21:12:24
2007-07-31 21:12:24
2007-07-01 21:12:24


In [23]:
def make_buckets(first_commit, last_commit):
    buckets = []
    fc_date = parse_date(first_commit['commit_date'])
    lc_date = parse_date(last_commit['commit_date'])
    
    month_start_date = get_start_date_of_month(fc_date)
    month_end_date = get_end_date_of_month(fc_date)
    bucket_name = month_name[fc_date.month] + str(fc_date.year)
    
    while month_end_date <= lc_date:
        commit = {}
        commit["month"] = bucket_name
        commit["start"] = month_start_date
        commit["end"] = month_end_date
        commit['committers'] = {}
        buckets.append(commit)
        month_start_date = month_end_date + relativedelta(days=+1)
        month_end_date = get_end_date_of_month(month_start_date)
        bucket_name = month_name[month_start_date.month] + str(month_start_date.year)
        
    commit = {}
    commit["month"] = bucket_name
    commit["start"] = month_start_date
    commit["end"] = month_end_date
    commit['committers'] = {}
    buckets.append(commit)
    
    return buckets

In [24]:
make_buckets(result[0], result[-1])

[{'committers': {},
  'end': datetime.datetime(2007, 7, 31, 21, 12, 24),
  'month': 'July2007',
  'start': datetime.datetime(2007, 7, 1, 21, 12, 24)},
 {'committers': {},
  'end': datetime.datetime(2007, 8, 31, 21, 12, 24),
  'month': 'August2007',
  'start': datetime.datetime(2007, 8, 1, 21, 12, 24)},
 {'committers': {},
  'end': datetime.datetime(2007, 9, 30, 21, 12, 24),
  'month': 'September2007',
  'start': datetime.datetime(2007, 9, 1, 21, 12, 24)},
 {'committers': {},
  'end': datetime.datetime(2007, 10, 31, 21, 12, 24),
  'month': 'October2007',
  'start': datetime.datetime(2007, 10, 1, 21, 12, 24)},
 {'committers': {},
  'end': datetime.datetime(2007, 11, 30, 21, 12, 24),
  'month': 'November2007',
  'start': datetime.datetime(2007, 11, 1, 21, 12, 24)},
 {'committers': {},
  'end': datetime.datetime(2007, 12, 31, 21, 12, 24),
  'month': 'December2007',
  'start': datetime.datetime(2007, 12, 1, 21, 12, 24)},
 {'committers': {},
  'end': datetime.datetime(2008, 1, 31, 21, 12, 24