In [9]:
## Import necessary modules
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.dates import date2num, num2date, AutoDateFormatter, AutoDateLocator, WeekdayLocator, MonthLocator, DayLocator, DateLocator, DateFormatter
from matplotlib.dates import MO, TU, WE, TH, FR, SA, SU
from matplotlib.ticker import AutoMinorLocator, AutoLocator, FormatStrFormatter, ScalarFormatter
from matplotlib import dates
import numpy as np
import datetime, calendar
from datetime import timedelta
import matplotlib.patches as mpatches
%matplotlib tk

data = pd.read_pickle('/home/keuch/gits/keuch/code_box/pyt/spreadsheetparsing/doe_pickle_1458.pkl') # this one for average times

In [14]:
# timestamp is our index so far
data.index.name='stamp'

data.columns

Index([  'month',    'year',         0,         1,         2,         3,
               4,         5,         6,         7,         8,         9,
              10,        11,        12,        13,        14,        15,
              16,        17,        18,        19,        20,        21,
              22,        23,        24,   'summa',   'xlday',     'day',
          'date',    'week', 'weekday'],
      dtype='object')

In [12]:
# build a dataframe for date values only. only use is to fill subsequent dfs from this
ndcolorder=['dt','yy','mm','dd','wd','ww']
nurdatum=data.groupby('xl').agg({'mm':'first','dt':'first','ww':'first','dd':'first','wd':'first','yy':'first'})
nurdatum=nurdatum[ndcolorder]

KeyError: 'xl'

In [None]:
# groupby functions: time columns are reduced to the first value found, timedelta will be summed, hours dropped
colfunx_collapse_dt={'yy':'first', 'mm':'first', 'ww':'first', 'wd':'first', 'dd':'first', 'dt':'first', 'an':'sum', 'vb':'sum', 'vl':'sum', 'ht':'sum', 'tt':'sum', 'acw':'sum', 'decht':'sum', 'dectt':'sum', 'decacw':'sum'}
dgrp=data.groupby(['xl','bz'])
dg=dgrp.agg(colfunx_collapse_dt).copy()

In [None]:
d=dg.unstack().fillna(0).copy() # this will give every column a 'k' and 'n' sub-index, which is what wh need

In [None]:
# date-related columns will have both subindices filled from "nurdatum" dataframe

d.loc[:,('dt','k')]=nurdatum.dt
d.loc[:,('dt','n')]=nurdatum.dt
d.loc[:,('wd','k')]=nurdatum.wd
d.loc[:,('wd','n')]=nurdatum.wd
d.loc[:,('dd','k')]=nurdatum.dd
d.loc[:,('dd','n')]=nurdatum.dd
d.loc[:,('yy','k')]=nurdatum.yy
d.loc[:,('yy','n')]=nurdatum.yy
d.loc[:,('mm','k')]=nurdatum.mm
d.loc[:,('mm','n')]=nurdatum.mm
d.loc[:,('ww','k')]=nurdatum.ww
d.loc[:,('ww','n')]=nurdatum.ww
#d

In [None]:
# k & n index stacked behind the xl-date index and
# column order rearranged
c_order2=['dt','yy','mm','ww','wd','dd', 'an', 'vb', 'vl','ht', 'tt', 'acw', 'decht', 'dectt', 'decacw']
grouped_by_day=d.stack('bz').copy()
grouped_by_day=grouped_by_day[c_order2]

# split in two frames
kernzeit_byday=grouped_by_day.xs('k',level=1).copy()
nebnzeit_byday=grouped_by_day.xs('n',level=1).copy()

In [None]:
############## grouping, average ####################

In [None]:
# function for adding the columns with averages later
def add_averages(frame):
    frame['av_ht_xlfloat']=(frame['ht']/frame['vb'])#.apply(pd.to_timedelta,unit='d')
    frame['av_tt_xlfloat']=(frame['tt']/frame['vb'])#.apply(pd.to_timedelta,unit='d')
    frame['av_ac_xlfloat']=(frame['acw']/frame['vb'])#.apply(pd.to_timedelta,unit='d')
    frame[['av_ht_tdelta','av_tt_tdelta','av_ac_tdelta']]=frame[['av_ht_xlfloat','av_tt_xlfloat','av_ac_xlfloat']].apply(pd.to_timedelta,unit='d').fillna(0)
    frame['av_ht_float']=frame['av_ht_tdelta'].apply(timedelta.total_seconds).div(60)
    frame['av_tt_float']=frame['av_tt_tdelta'].apply(timedelta.total_seconds).div(60)
    frame['av_ac_float']=frame['av_ac_tdelta'].apply(timedelta.total_seconds).div(60)
    
    return frame

In [None]:
# group by calendar week for 2017
funx_week={'yy':'first', 'an':'sum', 'vb':'sum', 'vl':'sum', 'ht':'sum', 'tt':'sum', 'acw':'sum'}

nebn2107=nebnzeit_byday.loc[(nebnzeit_byday['yy'] == 2017)].iloc[1:].copy() # skip first day bcause ww=52 of old year, only valid for 2017!
kern2107=kernzeit_byday.loc[(kernzeit_byday['yy'] == 2017)].iloc[1:].copy() # skip first day bcause ww=52 of old year, only valid for 2017!

weeks_2017_nebn=nebn2107.groupby('ww').agg(funx_week).copy() # group by week
weeks_2017_kern=kern2107.groupby('ww').agg(funx_week).copy() # group by week

w17_nebn_av=add_averages(weeks_2017_nebn) # add averages as columns
w17_kern_av=add_averages(weeks_2017_kern) # add averages as columns

In [None]:
#w17_nebn_av.tail(5)

In [None]:
############## plotting ####################

In [None]:
def decminutes_to_mmss(decimal):
    tdelta=timedelta(minutes=decimal)
    sekunden=tdelta.seconds
    minuten=(sekunden % 3600) // 60
    restsekunden=str(sekunden %60).zfill(2)
    mmssstring='{}:{}'.format(minuten, restsekunden)
    return mmssstring

In [None]:
def maptix2labels(ticks):
    ylabelz=list()
    for tic in ticks:
        tic=abs(tic)
        sstr=decminutes_to_mmss(tic)
        ylabelz.append(sstr)
    return ylabelz

In [None]:
# colors
bgkern='#FFF7F2'
bgnebn='#F8FFF2'
aht="#21a9ff"
att="#ceecff"
aac="#c4c4c4"
zielzeit="#FF006E"
bars="#A06A00"

In [None]:
f, (ax1, ax2) = plt.subplots(1, 2, sharey=False, figsize=(12,5))

commonmax=(w17_kern_av['av_ht_float'].max())+0.5
commonmin=-0.25

htmean_k=w17_kern_av['av_ht_float'].replace(0,np.NaN).mean()
htmean_n=w17_nebn_av['av_ht_float'].replace(0,np.NaN).mean() # decent mean value without the zeroes jan-mar

### plots

ax3 = ax1.twinx()
ax3.tick_params('y', labelsize=6, labelcolor=bars)

ax4 = ax2.twinx()
ax4.tick_params('y', labelsize=6, labelcolor=bars)

kcalls=ax3.bar(w17_kern_av.index, w17_kern_av['vb'], width=0.8, alpha=0.1, color=bars, label='calls')
ncalls=ax4.bar(w17_nebn_av.index, w17_nebn_av['vb'], width=0.8, alpha=0.1, color=bars, label='calls')

kaht,=ax1.plot(w17_kern_av.index,w17_kern_av['av_ht_float'],color=aht,label="aht")
katt,=ax1.plot(w17_kern_av.index,w17_kern_av['av_tt_float'],color=att,label="att")
kacw,=ax1.plot(w17_kern_av.index,w17_kern_av['av_ac_float'],color=aac,label="acw")
naht,=ax2.plot(w17_nebn_av.index,w17_nebn_av['av_ht_float'],color=aht,label="aht")
natt,=ax2.plot(w17_nebn_av.index,w17_nebn_av['av_tt_float'],color=att,label="att")
nacw,=ax2.plot(w17_nebn_av.index,w17_nebn_av['av_ac_float'],color=aac,label="acw")

kziel=ax1.axhline(y=3.5,color=zielzeit,ls=':',alpha=0.75, label='3:30 min')
kreal=ax1.axhline(y=htmean_k,color=aht,ls='--',alpha=0.9, label=str(decminutes_to_mmss(htmean_k)))
nziel=ax2.axhline(y=1.5,color=zielzeit,ls=':',alpha=0.75, label='1:30 min')
nreal=ax2.axhline(y=htmean_n,color=aht,ls='--',alpha=0.9, label=str(decminutes_to_mmss(htmean_n)))

### ax1 labels
ax1.set_ylim(commonmin,commonmax)

minloc=AutoMinorLocator(4)
ax1.yaxis.set_minor_locator(minloc)
ax1.yaxis.set_minor_formatter(ScalarFormatter()) # is the same as major formatter

left_tix_mj=ax1.get_yticks()
left_tix_mn=ax1.get_yticks(minor=True)
left_lbl_mj=maptix2labels(left_tix_mj)
left_lbl_mn=maptix2labels(left_tix_mn)

ax1.yaxis.set_ticklabels(left_lbl_mj)
ax1.yaxis.set_ticklabels(left_lbl_mn,minor=True,size=6)


### ax2 labels
ax2.set_ylim(ax1.get_ylim())

ax2.yaxis.set_minor_locator(minloc)
ax2.yaxis.set_minor_formatter(ScalarFormatter()) # is the same as major formatter

left_tix_mj=ax2.get_yticks()
left_tix_mn=ax2.get_yticks(minor=True)
left_lbl_mj=maptix2labels(left_tix_mj)
left_lbl_mn=maptix2labels(left_tix_mn)

ax2.yaxis.set_ticklabels(left_lbl_mj)
ax2.yaxis.set_ticklabels(left_lbl_mn,minor=True,size=6)

### color adjustments, titles, legend
ax1.set_facecolor(bgkern)
ax2.set_facecolor(bgnebn)

ax1.set_title('Kernzeit', size=9)
ax2.set_title('Nebenzeit', size=9)

ax1.set_xlabel('Woche', size=7)
ax2.set_xlabel('Woche', size=7)
ax1.tick_params('x', labelsize=8)
ax2.tick_params('x', labelsize=8)

ax1.set_ylabel('Minuten', rotation=90)
ax4.set_ylabel('Calls',rotation=90,color=bars)

f.suptitle('Bearbeitungszeiten 2017 nach KW und Durchschnitt')

f.legend((kaht,katt,kacw,kziel,kreal,kcalls),('handling','talk','afterwork','zielzeit','Ø-Zeit real','calls'),fontsize=7,ncol=2,loc='upper right',borderaxespad=2)

path='/home/keuch/gits/keuch/code_box/pyt/spreadsheetparsing/ce_teamleitung/plots/durchschn_Zeiten_getrennt'
f.savefig(path,ext='png')

In [None]:
print('todo: function from plot with appropriate arguments, 1459 as well')

In [None]:
#### from here on: only test fpr the agentplots
w17_kern_av['av_ht_float']