# Welcome to Gleeo time analysis notebook

To get started, copy the file **"Time Tracker data export.csv"** next to this notebook and run the cells.

In [None]:
%autosave 0
%matplotlib inline
import pandas as pd
import numpy as np
# Set ipython's max row display
pd.set_option('display.max_row', 10)

# Set iPython's max column width
pd.set_option('display.max_columns', 10)

# Matplotlib defaults
from pylab import rcParams
rcParams['figure.figsize'] = (15,7)
rcParams['font.size'] = 15

In [None]:
time_entries = pd.read_csv("Time Tracker data export.csv")
time_entries['Start'] = pd.to_datetime(time_entries['Start'])
time_entries['End'] = pd.to_datetime(time_entries['End'])

In [None]:
print("columns: {}".format(time_entries.columns))
time_entries

# Total time spent on projects

All times are displayed in **hours**.

In [None]:
project_total = time_entries.groupby('Project').sum()
project_total["Decimal Duration"]

In [None]:
ax = project_total.plot.pie(y='Decimal Duration', autopct='%.1f')
ax.set_title('Total time spent on projects')
ax.set_ylabel('')
ax.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)

# Time spent on projects every month

In [None]:
projects = time_entries['Project'].unique()
tasks = time_entries['Task'].unique()
start = time_entries.iloc[0]['Start'].round('D')
end = time_entries.iloc[-1]['End'].round('D')
years = range(start.year, end.year+1)
print("Tracking period from {} to {}".format(start, end))

In [None]:
def get_time_spent_per(time_unit):
    plt_time_index = pd.date_range(start, end, freq = time_unit)
    range_df = pd.DataFrame(plt_time_index, index=plt_time_index, columns=['Date'])
    range_df['End'] = range_df['Date'].shift(-1)
    range_df = range_df.iloc[:-1] # drop last range

    series = [ time_entries.loc[(time_entries['Start'] > row['Date']) & (time_entries['End'] < row['End'])]\
        .groupby('Project')['Decimal Duration'].sum()
     for _, row in range_df.iterrows() ]
    organized_durations = pd.DataFrame(series, index=range_df['Date']).fillna(0)
    return organized_durations

In [None]:
df_month = get_time_spent_per("BM")
ax = df_month.plot.area(title="Time spent on projects every month")
average_hours_per_month = df_month.mean().sum()
print("Average hours/month: {}".format(average_hours_per_month))
ax.axhline(average_hours_per_month, linestyle="--", linewidth=3)

# Time spent on projects every week


In [None]:
df_week = get_time_spent_per("W")
ax = df_week.plot.area(title="Time spent on projects every week")
average_hours_per_week = df_week.mean().sum()
print("Average hours/week: {}".format(average_hours_per_week))
ax.axhline(average_hours_per_week, linestyle="--", linewidth=3)

# Overhours

To calculate the amount of overhours, append your vacations in the file `personal/holidays.yaml`.
Provide your state vacations to the holidays library.

In [None]:
import holidays
import yaml

state_vacation = holidays.Germany(prov='BW', years=years)
state_vacation_dates = list(state_vacation.keys())

vacation_dates = [] + state_vacation_dates
personal_vacations = yaml.load(open('personal/holidays.yaml', 'r'))
vacation_count = 0
for vacation in personal_vacations:
    date_range = pd.bdate_range(start=vacation['from'], end=vacation['to'], freq='C',
                                name=vacation['name'], holidays=state_vacation_dates)
    print('{} used up {} vacation days'.format(vacation['name'], len(date_range)))
    vacation_count += len(date_range)
    vacation_dates += date_range.tolist()
print('Total number of holiday days taken: {}'.format(vacation_count))

In [None]:
business_dates = pd.bdate_range(start, end, freq='C', holidays=vacation_dates)
business_df = pd.DataFrame(39.5 / 5., columns=['Expected hours'], index=business_dates)

worked_days = get_time_spent_per('D').drop('break', axis=1)
worked_df = pd.DataFrame(worked_days.sum(axis=1), columns=['Worked hours'])

overhours_df = pd.merge(business_df, worked_df, how='right', left_index=True, right_index=True).fillna(0)
overhours_df['Overhours'] = overhours_df['Worked hours'] - overhours_df['Expected hours']
overhours_df['Accumulated Overhours'] = overhours_df['Overhours'].cumsum()
overhours_df.plot(title="Accumulated Overhours", y=['Accumulated Overhours'])
