In [None]:
import pandas as pd
import numpy as np
from datetime import datetime as DateTime
import calendar
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

In [None]:
# Use this command to get the commit logs from your repo
# git log --oneline --pretty=^"%h,%an,%ae,%aI,\"%s\"," --shortstat |grep -v \| |  tr "\n" " " | tr "^" "\n" | sed -E 's/insertion.*\+\)//g' | sed -E 's/files? changed//g' | sed -E 's/deletion.*\-\)//g' > commits.csv

In [None]:
df = pd.read_csv('commits.csv')
df['files changed'] = pd.to_numeric(df['files changed'], errors='coerce')

In [None]:
names = {}
df['Name'] = df['name'].apply(lambda x: names.get(x, x))

In [None]:
df['datetime'] = pd.to_datetime(df['datetime'])
df = df.set_index('datetime')
df = df.tz_localize('UTC').tz_convert('Asia/Singapore')

In [None]:
dates = matplotlib.dates.date2num(df.index.date)
plt.plot_date(dates, df['files changed'])
plt.show()

In [None]:
# years = mdates.YearLocator()   # every year
# months = mdates.MonthLocator()  # every month
# yearsFmt = mdates.DateFormatter('%Y')

counts = df.groupby('Name')['Commit'].count()
contributors = counts[counts > 5].index.values
fdf = df[df['Name'].isin(contributors)]
xticks = pd.date_range(start=fdf.index.min(), end=fdf.index.max(), freq='AS')
commitsByNameOverTime = fdf[fdf['Name'].isin(contributors)].groupby(
        [fdf.index.year, fdf.index.month, fdf['Name']]
    )['Name'].count().unstack('Name')

commitsByNameOverTime.index.names = ['year','month']
ax = commitsByNameOverTime.plot(kind="bar", stacked=True)

# format the ticks
ax.set_xlabel('time')
ax.set_ylabel('commits')
# ax.set_xticks()
# ax.set_xticklabels([x.strftime('%Y') for x in xticks])
# ax.set_xticklabels([], minor=True)
# ax.xaxis.set_major_locator(years)
# ax.xaxis.set_major_formatter(yearsFmt)
# ax.xaxis.set_minor_locator(months)
# ax.xaxis.grid(True)

plt.legend(loc=(1.04,0))
plt.show()

In [None]:
commitsByNameByHour = fdf.groupby(
        [fdf.index.hour, fdf['Name']]
    )['Name'].count().unstack('Name')

commitsByNameByHour.index.names = ['hour']
ax = commitsByNameByHour.plot(kind="bar", stacked=True)

ax.set_xlabel('hour')
ax.set_ylabel('commits')

plt.legend(loc=(1.04,0))
plt.show()

In [None]:
commitsByNameByHour = fdf.groupby(
        [fdf.index.weekday, fdf['Name']]
    )['Name'].count().unstack('Name')

commitsByNameByHour.index.names = ['hour']
ax = commitsByNameByHour.plot(kind="bar", stacked=True)

ax.set_xlabel('day of week')
ax.set_xticklabels([calendar.day_name[i] for i in range(0, 7)])
ax.set_ylabel('commits')

plt.legend(loc=(1.04,0))
plt.show()