In [439]:
## Import necessary modules
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.dates import date2num, num2date, AutoDateFormatter, AutoDateLocator, WeekdayLocator, MonthLocator, DayLocator, DateLocator, DateFormatter
from matplotlib.dates import MO, TU, WE, TH, FR, SA, SU
from matplotlib.ticker import AutoMinorLocator
import numpy as np
import datetime, calendar
from datetime import timedelta
import matplotlib.patches as mpatches
%matplotlib tk

data = pd.read_pickle('/home/keuch/gits/keuch/code_box/pyt/spreadsheetparsing/entwuerfe/xls_testruns/doe_pickle_1458.pkl') # this one for average times

In [440]:
# timestamp is our index so far
data.index.name='stamp'

In [441]:
#data.drop(['tt','ht','acw'],axis=1, inplace=True)
#data.rename(columns={'dectt': 'tt', 'decht':'ht', 'decacw':'acw'}, inplace=True)

In [442]:
# build a dataframe for date values only. only use is to fill subsequent dfs from this
ndcolorder=['dt','yy','mm','dd','wd','ww']
nurdatum=data.groupby('xl').agg({'mm':'first','dt':'first','ww':'first','dd':'first','wd':'first','yy':'first'})
nurdatum=nurdatum[ndcolorder]

In [443]:
# groupby functions: time columns are reduced to the first value found, timedelta will be summed, hours dropped
colfunx_collapse_dt={'yy':'first', 'mm':'first', 'ww':'first', 'wd':'first', 'dd':'first', 'dt':'first', 'an':'sum', 'vb':'sum', 'vl':'sum', 'ht':'sum', 'tt':'sum', 'acw':'sum', 'decht':'sum', 'dectt':'sum', 'decacw':'sum'}
dgrp=data.groupby(['xl','bz'])
dg=dgrp.agg(colfunx_collapse_dt).copy()

In [444]:
d=dg.unstack().fillna(0).copy() # this will give every column a 'k' and 'n' sub-index, which is what wh need
#d

In [445]:
# date-related columns will have both subindices filled from "nurdatum" dataframe

d.loc[:,('dt','k')]=nurdatum.dt
d.loc[:,('dt','n')]=nurdatum.dt
d.loc[:,('wd','k')]=nurdatum.wd
d.loc[:,('wd','n')]=nurdatum.wd
d.loc[:,('dd','k')]=nurdatum.dd
d.loc[:,('dd','n')]=nurdatum.dd
d.loc[:,('yy','k')]=nurdatum.yy
d.loc[:,('yy','n')]=nurdatum.yy
d.loc[:,('mm','k')]=nurdatum.mm
d.loc[:,('mm','n')]=nurdatum.mm
d.loc[:,('ww','k')]=nurdatum.ww
d.loc[:,('ww','n')]=nurdatum.ww
#d

In [446]:
# k & n index stacked behind the xl-date index and
# column order rearranged
c_order2=['dt','yy','mm','ww','wd','dd', 'an', 'vb', 'vl','ht', 'tt', 'acw', 'decht', 'dectt', 'decacw']
grouped_by_day=d.stack('bz').copy()
grouped_by_day=grouped_by_day[c_order2]

# split in two frames
kernzeit_byday=grouped_by_day.xs('k',level=1).copy()
nebnzeit_byday=grouped_by_day.xs('n',level=1).copy()

In [447]:
############## grouping, average ####################

In [448]:
# decimal minute from a timedelta
def decminute(delta):
    secs=delta.total_seconds()
    decmin=secs/60
    return decmin

In [449]:
# create columns for daily averages, apply function to convert them to timedeltas
kernzeit_byday['av_ht_float']=(kernzeit_byday['ht']/kernzeit_byday['vb'])#.apply(pd.to_timedelta,unit='d')
kernzeit_byday['av_tt_float']=(kernzeit_byday['tt']/kernzeit_byday['vb'])#.apply(pd.to_timedelta,unit='d')
kernzeit_byday['av_ac_float']=(kernzeit_byday['acw']/kernzeit_byday['vb'])#.apply(pd.to_timedelta,unit='d')
kernzeit_byday['av_ht_dec']=(kernzeit_byday['decht']/kernzeit_byday['vb'])#.apply(pd.to_timedelta,unit='d')
kernzeit_byday['av_tt_dec']=(kernzeit_byday['dectt']/kernzeit_byday['vb'])#.apply(pd.to_timedelta,unit='d')
kernzeit_byday['av_ac_dec']=(kernzeit_byday['decacw']/kernzeit_byday['vb'])#.apply(pd.to_timedelta,unit='d')
kernzeit_byday[['av_ht_tdelta','av_tt_tdelta','av_ac_tdelta']]=kernzeit_byday[['av_ht_float','av_tt_float','av_ac_float']].apply(pd.to_timedelta,unit='d')

nebnzeit_byday['av_ht_float']=(nebnzeit_byday['ht']/nebnzeit_byday['vb'])#.apply(pd.to_timedelta,unit='d')
nebnzeit_byday['av_tt_float']=(nebnzeit_byday['tt']/nebnzeit_byday['vb'])#.apply(pd.to_timedelta,unit='d')
nebnzeit_byday['av_ac_float']=(nebnzeit_byday['acw']/nebnzeit_byday['vb'])#.apply(pd.to_timedelta,unit='d')
nebnzeit_byday['av_ht_dec']=(nebnzeit_byday['decht']/nebnzeit_byday['vb'])#.apply(pd.to_timedelta,unit='d')
nebnzeit_byday['av_tt_dec']=(nebnzeit_byday['dectt']/nebnzeit_byday['vb'])#.apply(pd.to_timedelta,unit='d')
nebnzeit_byday['av_ac_dec']=(nebnzeit_byday['decacw']/nebnzeit_byday['vb'])#.apply(pd.to_timedelta,unit='d')
nebnzeit_byday[['av_ht_tdelta','av_tt_tdelta','av_ac_tdelta']]=nebnzeit_byday[['av_ht_float','av_tt_float','av_ac_float']]

kernzeit_byday=kernzeit_byday.fillna(0)
nebnzeit_byday=nebnzeit_byday.fillna(0)

In [450]:
def add_averages(frame):
    frame['av_ht_float']=(frame['ht']/frame['vb'])
    frame['av_tt_float']=(frame['tt']/frame['vb'])
    frame['av_ac_float']=(frame['acw']/frame['vb'])
    frame['av_ht_dec']=(frame['decht']/frame['vb'])#.apply(pd.to_timedelta,unit='d')
    frame['av_tt_dec']=(frame['dectt']/frame['vb'])#.apply(pd.to_timedelta,unit='d')
    frame['av_ac_dec']=(frame['decacw']/frame['vb'])#.apply(pd.to_timedelta,unit='d')
    frame[['av_ht_tdelta','av_tt_tdelta','av_ac_tdelta']]=frame[['av_ht_float','av_tt_float','av_ac_float']].apply(pd.to_timedelta,unit='d')
    
    frame=frame.fillna(0)
    
    return frame


In [451]:
# group by calendar week for 2017
funx_week={'yy':'first', 'an':'sum', 'vb':'sum', 'vl':'sum', 'ht':'sum', 'tt':'sum', 'acw':'sum', 'decht':'sum', 'dectt':'sum', 'decacw':'sum'}

nebn2107=nebnzeit_byday.loc[(nebnzeit_byday['yy'] == 2017)].iloc[1:].copy() # skip first day bcause ww=52 of old year
kern2107=kernzeit_byday.loc[(kernzeit_byday['yy'] == 2017)].iloc[1:].copy() # skip first day bcause ww=52 of old year
weeks_2017_nebn=nebn2107.groupby('ww').agg(funx_week).copy()
weeks_2017_kern=kern2107.groupby('ww').agg(funx_week).copy()

weeks_2017_nebn_av=add_averages(weeks_2017_nebn)
weeks_2017_kern_av=add_averages(weeks_2017_kern)

In [452]:
weeks_2017_nebn_av.tail(5)

Unnamed: 0_level_0,ht,tt,vb,decht,acw,decacw,dectt,an,vl,yy,av_ht_float,av_tt_float,av_ac_float,av_ht_dec,av_tt_dec,av_ac_dec,av_ht_tdelta,av_tt_tdelta,av_ac_tdelta
ww,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
36,0.030556,0.016528,40.0,00:00:44,0.014028,00:00:20.200000,00:00:23.800001,46.0,6.0,2017,0.000764,0.000413,0.000351,00:00:01.100000,00:00:00.595000,00:00:00.505000,00:01:06.009600,00:00:35.683200,00:00:30.326400
37,0.05228,0.039688,46.0,00:01:15.283332,0.012593,00:00:18.133333,00:00:57.150000,55.0,9.0,2017,0.001137,0.000863,0.000274,00:00:01.636594,00:00:01.242391,00:00:00.394202,00:01:38.236800,00:01:14.563200,00:00:23.673600
38,0.031493,0.021944,26.0,00:00:45.349999,0.009549,00:00:13.749999,00:00:31.599999,30.0,4.0,2017,0.001211,0.000844,0.000367,00:00:01.744230,00:00:01.215384,00:00:00.528846,00:01:44.630400,00:01:12.921600,00:00:31.708799
39,0.041979,0.029711,34.0,00:01:00.449999,0.012269,00:00:17.666667,00:00:42.783334,41.0,7.0,2017,0.001235,0.000874,0.000361,00:00:01.777941,00:00:01.258333,00:00:00.519607,00:01:46.704000,00:01:15.513600,00:00:31.190400
40,0.049132,0.035451,45.0,00:01:10.750003,0.013681,00:00:19.699999,00:00:51.050000,55.0,10.0,2017,0.001092,0.000788,0.000304,00:00:01.572222,00:00:01.134444,00:00:00.437777,00:01:34.348800,00:01:08.083200,00:00:26.265600


In [453]:
############## plotting ####################

In [486]:
#fig=plt.figure#(figsize=(fs))
#fig.suptitle(tit)
    
ax=plt.subplot(111)
mini=pd.to_timedelta(0)
max_handling=(weeks_2017_kern_av['av_ht_dec']).max()+(weeks_2017_kern_av['av_ht_dec'].max())/10
max_handling_label=(weeks_2017_kern_av['av_ht_tdelta']).max()



ax.plot(weeks_2017_nebn_av.index,weeks_2017_nebn_av['av_ht_dec'])
ax.plot(weeks_2017_kern_av.index,weeks_2017_kern_av['av_ht_dec'])

minmaxindex=pd.to_timedelta([0,max_handling])
print (type(mini))
print (type(max_handling))
#ax.set_ylim([mini,max_handling])

<class 'pandas._libs.tslib.Timedelta'>
<class 'pandas._libs.tslib.Timedelta'>


In [460]:
weeks_2017_kern_av['av_ht_dec']

ww
1    00:00:04.057136
2    00:00:03.815267
3    00:00:03.692989
4    00:00:03.779990
5    00:00:03.859131
6    00:00:03.944225
7    00:00:03.412066
8    00:00:03.670955
9    00:00:04.029772
10   00:00:03.983621
11   00:00:03.535897
12   00:00:03.820037
13   00:00:04.008662
14   00:00:03.632112
15   00:00:03.804698
16   00:00:04.609425
17   00:00:04.421985
18   00:00:03.742600
19   00:00:04.027892
20   00:00:04.048102
21   00:00:03.849907
22   00:00:04.116714
23   00:00:03.963435
24   00:00:03.741349
25   00:00:03.765119
26   00:00:03.860940
27   00:00:03.987763
28   00:00:04.072837
29   00:00:03.622623
30   00:00:04.081643
31   00:00:04.267220
32   00:00:04.134514
33   00:00:03.859546
34   00:00:04.064489
35   00:00:04.254410
36   00:00:04.233017
37   00:00:04.277372
38   00:00:04.106802
39   00:00:03.951325
40   00:00:04.139618
Name: av_ht_dec, dtype: timedelta64[ns]

In [472]:
pd.to_timedelta([0,max_handling])

TimedeltaIndex(['00:00:00', '00:00:05.070367'], dtype='timedelta64[ns]', freq=None)

In [478]:
xx=max_handling.to_pytimedelta()
xx

datetime.timedelta(0, 5, 70368)

In [480]:
pd.timedelta_range(0,max_handling)

TimedeltaIndex(['0 days'], dtype='timedelta64[ns]', freq='D')

In [489]:
max_handling.components

Components(days=0, hours=0, minutes=0, seconds=5, milliseconds=70, microseconds=367, nanoseconds=817)

In [504]:
tdelta_example=weeks_2017_kern_av.loc[1,['av_ht_tdelta']][0]

In [507]:
tdelta_example.to_timedelta64()

numpy.timedelta64(243388800000,'ns')