In [275]:
## Import necessary modules
import os,sys
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.dates import date2num, AutoDateFormatter, AutoDateLocator, WeekdayLocator, MonthLocator, DayLocator, DateLocator, DateFormatter
from matplotlib.dates import MO, TU, WE, TH, FR, SA, SU
from matplotlib.ticker import AutoMinorLocator, AutoLocator, FormatStrFormatter, ScalarFormatter
import numpy as np
import datetime, calendar
from datetime import timedelta
import matplotlib.patches as mpatches
from itertools import tee

sys.path.append(os.path.abspath('/home/keuch/gits/keuch/code_box/pyt/spreadsheetparsing/entwuerfe/xls_testruns/'))
from ce_funclib import determine_kernzeit as dtkz
from ce_funclib import continuity_check

%matplotlib inline
#%matplotlib tk


## Import data frome pickle generated from muß ein file mit agentenstats sein
arcpth='/home/keuch/gits/keuch/code_box/pyt/spreadsheetparsing/test_stats/archiv/'


In [276]:
######## GET A LIST OF MATCHING .xls FILES FROM THE GIVEN DIRECTORY

In [277]:
def collectxlfiles(arcpath):
    xlfilelist=list()

    for xlfile in os.listdir(arcpath):
        if xlfile.startswith('CE_al'):
            xlfileabs=os.path.join(arcpath,xlfile)
            xlfilelist.append(xlfileabs)
    return sorted(xlfilelist)

xlfilelist=collectxlfiles(arcpth)
#xlfilelist
#examplefile=xlfilelist[233]

In [278]:
###### TEST FOR DATA IN FILE, SORT OUT EMPTY FILES 

In [279]:
def filetoframe(exfile):
    exframe=pd.read_excel(exfile) # this is a regular pd.DataFrame
    datecell=exframe.iloc[0,1]
    sheet_datetime=pd.to_datetime(datecell,format='%d.%m %Y : %H')
    sheet_date=sheet_datetime.date()
    
    integritycheck=exframe.iloc[2,1] # files with data have "agenten" here, files with no calls have a 'nan'

    if integritycheck != 'Agenten':
        # if it's empty, keep date for filling it later
        print('Exception: ', end='')
        except_status='ex'
        
        usefulcols={0:'tstamp',1:'agent',3:'an',4:'be',22:'vl',24:'ht_float',29:'tt_float'} # map cols to decent names
        exframe=exframe.reindex(columns=sorted(usefulcols.keys()))
        exframe.rename(columns=usefulcols,inplace=True)        
        exframe=exframe[0:1] # strip text rows and the mangled sum row
        print(sheet_datetime)
        
        exframe['tstamp']=sheet_datetime
        exframe['date']=sheet_date
        exframe['agent']='platzhalter'
        exframe[['wd','ww','mm','yy']]=exframe['tstamp'].dt.strftime('%a,%W,%m,%Y').str.split(',',expand=True) # make ww,yy,mm,wd columns
        exframe['bz']=exframe['tstamp'].apply(dtkz)
        exframe['ort']=exframe['agent'].str[0] # split the identifier into useable columns
        exframe['id']='foobar' # split the identifier into useable columns
        
        # integers should be of appropriate datatype, we received them as strings
        #exframe[['vl','an','be','ww','mm','yy']]=exframe[['vl','an','be','ww','mm','yy']].astype(np.int64) #just for the beauty of it
        exframe[['ww','mm','yy']]=exframe[['ww','mm','yy']].astype(np.int64) #just for the beauty of it
        exframe.fillna(0, inplace=True) 
        return exframe,except_status
        
    else:
        except_status='reg'
        
        exframe.columns=range(0,30) # rename columns to a temporarily more readable format, fancy rename later
        usefulcols={0:'tstamp',1:'agent',3:'an',4:'be',22:'vl',24:'ht_float',29:'tt_float'} # map cols to decent names
        exframe=exframe[sorted(usefulcols.keys())] # skip cols and keep the ones we need
        exframe.rename(columns=usefulcols,inplace=True) # rename cols
        exframe=exframe[3:-1] # strip text rows and the mangled sum row
        exframe['tstamp']=pd.to_datetime(exframe['tstamp'],format=' %d.%m.%Y %H:%M ')
        exframe['date']=exframe['tstamp'].dt.date
        exframe[['wd','ww','mm','yy']]=exframe['tstamp'].dt.strftime('%a,%W,%m,%Y').str.split(',',expand=True) # make ww,yy,mm,wd columns
        exframe['bz']=exframe['tstamp'].apply(dtkz)
        
        exframe['ort']=exframe['agent'].str[0] # split the identifier into useable columns
        exframe['id']=exframe['agent'].str[-6:] # split the identifier into useable columns
        exframe['agent']=exframe['agent'].str[2:-7] # split the identifier into useable columns
        
        # integers should be of appropriate datatype, we received them as strings
        exframe[['vl','an','be','ww','mm','yy']]=exframe[['vl','an','be','ww','mm','yy']].astype(np.int64) #just for the beauty of it

        return exframe,except_status

In [280]:
framelist=list()
exceptionlist=list()
for xfile in xlfilelist:
    
    #print('file:',xfile)
    frame_from_file,except_status=filetoframe(xfile)
    #print(frame_from_file.columns)
    #print(frame_from_file['date'])
    if except_status=='ex':
        exceptionlist.append(xfile)
    framelist.append(frame_from_file)

exceptionlist
    

Exception: 2017-04-17 00:00:00
Exception: 2017-05-14 00:00:00


['/home/keuch/gits/keuch/code_box/pyt/spreadsheetparsing/test_stats/archiv/CE_alle_Agenten_taeglich_2017-04-17.xls',
 '/home/keuch/gits/keuch/code_box/pyt/spreadsheetparsing/test_stats/archiv/CE_alle_Agenten_taeglich_2017-05-14.xls']

In [281]:
#### produce a unified frame with all data and sort it by timstamp and agentname
bigframeii=pd.concat(framelist)

bigframeii.sort_values(['tstamp','agent'],inplace=True)
bigframeii.reset_index(drop=True,inplace=True) # there you go

In [282]:
#bigframeii
# die exklusivlogins müssen zusammengelegt werden
unify_id={'gesinst':'995887','stanzju':'878457','papkeda':'891914'}
bigframeii.loc[bigframeii['id'] == unify_id['gesinst'],'agent'] = 'gesinst'
bigframeii.loc[bigframeii['id'] == unify_id['stanzju'],'agent'] = 'stanzju'
bigframeii.loc[bigframeii['id'] == unify_id['papkeda'],'agent'] = 'papkeda'

In [283]:
### some date locator play, can conveniently be checked against a single xls file
def check_single_day(day):
    dayvalues=bigframeii.loc[bigframeii['date'] == day]
    print('htsum',dayvalues['ht_float'].sum(), end=', ')
    print('bearbeitete sum',dayvalues['be'].sum())
check_single_day(datetime.date(2017,4,17)) # shows that days wihtout calls are in the frame, too
check_single_day(datetime.date(2017,4,18)) # shows that days wihtout calls are in the frame, too

htsum 0.0, bearbeitete sum 0.0
htsum 190.40000236034393, bearbeitete sum 50.0


In [284]:
#### get all dates and check whether they're contiguous
datenserie_uniq=bigframeii['date'].unique().tolist()
tage_bestand=len(datenserie_uniq)
tage_start=datenserie_uniq[0]
tage_ende=datenserie_uniq[-1:]

missing_dates=continuity_check(datenserie_uniq)
if not missing_dates:
    print('no dates are missing')
else:
    print('the following dates are not within the frame:')
    print(missing_dates)

no dates are missing


In [285]:
### PARSE AGENT DATA
### What I want:
### * get a list of all agents that have worked in the period 
### * get data for each agent
### * get average of all agents as a reference
### per agent:
### ** get all calls that have lasted longer than x times the average of all agents
### ** get a plot of all calls (by timestamp)
### ** get a plot of all days (by date)
### ** get their tendencies over the weeks (? vacation dates missing and so on)
bigframeii.tail(10)

Unnamed: 0,tstamp,agent,an,be,vl,ht_float,tt_float,date,wd,ww,mm,yy,bz,ort,id
8384,2017-10-26 17:00:00,neumath,6.0,6.0,0.0,27.8333,21.8333,2017-10-26,Thu,43,10,2017,k,B,977333
8385,2017-10-26 17:00:00,radtkke,7.0,7.0,0.0,35.35,30.5333,2017-10-26,Thu,43,10,2017,k,H,390932
8386,2017-10-26 17:00:00,steffci,6.0,6.0,0.0,29.7,25.5,2017-10-26,Thu,43,10,2017,k,B,292174
8387,2017-10-26 18:00:00,diessro,2.0,2.0,0.0,5.5167,3.5167,2017-10-26,Thu,43,10,2017,k,B,442023
8388,2017-10-26 18:00:00,neumath,1.0,1.0,0.0,1.3833,0.3833,2017-10-26,Thu,43,10,2017,k,B,977333
8389,2017-10-26 18:00:00,radtkke,3.0,3.0,0.0,13.9333,10.9333,2017-10-26,Thu,43,10,2017,k,H,390932
8390,2017-10-26 18:00:00,steffci,4.0,4.0,0.0,10.8167,6.8167,2017-10-26,Thu,43,10,2017,k,B,292174
8391,2017-10-26 19:00:00,diessro,1.0,1.0,0.0,3.45,2.45,2017-10-26,Thu,43,10,2017,k,B,442023
8392,2017-10-26 19:00:00,radtkke,1.0,1.0,0.0,5.3333,4.6,2017-10-26,Thu,43,10,2017,k,H,390932
8393,2017-10-26 19:00:00,steffci,2.0,2.0,0.0,7.6333,5.8333,2017-10-26,Thu,43,10,2017,k,B,292174


In [286]:
# get all agents available and create frames for kern and neben
allagents_list=sorted(bigframeii['agent'].unique())
allagents_list.extend(['Hagenow','Berlin','Alle'])
standorte=bigframeii.ort.unique().tolist()

bigk=bigframeii.loc[bigframeii['bz']=='k']
bign=bigframeii.loc[bigframeii['bz']=='n']

**we can't figure out individual calls anyway, since raw data calls have been grouped by hours already  
so we can go on and group by days to figure out averages**

In [287]:
def group_and_add_average(agentname,frame,gruppierung):
    # step one: filter by agent; if agent is a location-bound group, filter by location and change agent name to group name

    if agentname == 'Hagenow':
        nur_agent=frame.loc[frame['ort']=='H'].copy()
        nur_agent['agent']='Hagenow'
        nur_agent['id']='000001'
    elif agentname == 'Berlin':
        nur_agent=frame.loc[frame['ort']=='B'].copy()
        nur_agent['agent']='Berlin'
        nur_agent['id']='000002'
    elif agentname == 'Alle':
        nur_agent=frame.loc[frame['ort'].isin(standorte)].copy()
        nur_agent['agent']='Alle'
        nur_agent['id']='000000'    
    else:
        nur_agent=frame.loc[frame['agent']==agentname]
    
    # step 2: split into kern and neben
    k=nur_agent.loc[nur_agent['bz']=='k']
    n=nur_agent.loc[nur_agent['bz']=='n']

    # step 3: group by day (instead of hour, as it is now) and add average ht,tt
    def group_and_average(agframe):
        ### ttstamp is dropped and date will be the new index; all others summed or reduced
        colfx_day={'agent':'first','an':'sum','be':'sum','vl':'sum','ht_float':'sum','tt_float':'sum','wd':'first','ww':'first', 'mm':'first','yy':'first','bz':'first','ort':'first','id':'first'}
        ### ttstamp is dropped, date is dropped and ww will be the new index; all others summed or reduced
        colfx_week={'agent':'first','an':'sum','be':'sum','vl':'sum','ht_float':'sum','tt_float':'sum','wd':'first','mm':'first','yy':'first','bz':'first','ort':'first','id':'first'}
        
        if gruppierung=='tag':
            grpd=agframe.groupby('date').agg(colfx_day)
        elif gruppierung=='woche':
            grpd=agframe.groupby('ww').agg(colfx_week)
        elif gruppierung=='nursplit':
            grpd=agframe.copy()
        
        grpd['aht']=grpd['ht_float']/grpd['be']
        grpd['att']=grpd['tt_float']/grpd['be']
        grpd['acw']=grpd['aht']-grpd['att']

        return grpd

    # step 4 get stats grouped by day and with the average column
    k_agent=group_and_average(k)
    n_agent=group_and_average(n)

    return k_agent,n_agent

In [288]:
### generate frames grouped by day and by week for every agent, put them in a dictionary
zeiten={}
print('collecting and grouping times (neben, kern) for')
#allagents_list=list(['dehnsil']) leads to division by zero...
for namen in allagents_list:
    print(namen, end=' ')

    kern_byday,neben_byday=group_and_add_average(namen,bigframeii,'tag')
    kern_byweek,neben_byweek=group_and_add_average(namen,bigframeii,'woche')
    #kern_ungruppiert,neben_ungruppiert=group_and_add_average(namen,bigframeii,'nursplit')
    
    zeiten[namen]={'k_day':kern_byday,'k_week':kern_byweek,'n_day':neben_byday,'n_week':neben_byweek}

collecting and grouping times (neben, kern) for
beckeca beckesi beckfra bretsal dehnsil diessro gesinst geyerme gorushe haenthe haustst henniju jakobir meyergu neumath papkeda pfeifpe pinnkat platzhalter pletaan plichba rabechr radtkke reismat ruschfi scholan stanzju steffci strausi tetzlva tonnroy Hagenow Berlin Alle 

In [289]:
#zeiten['dehnsil']['k_raw']

In [290]:
zeiten['Alle']['k_week']

Unnamed: 0_level_0,mm,id,bz,tt_float,vl,ort,yy,be,an,agent,ht_float,wd,aht,att,acw
ww,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
10,3,0,k,745.499602,0.0,B,2017,236.0,236.0,Alle,929.532701,Mon,3.938698,3.158897,0.779801
11,3,0,k,605.783595,0.0,H,2017,209.0,209.0,Alle,725.966405,Mon,3.473523,2.898486,0.575037
12,3,0,k,586.3169,1.0,B,2017,182.0,183.0,Alle,681.283298,Mon,3.743315,3.221521,0.521793
13,3,0,k,612.0994,0.0,B,2017,182.0,182.0,Alle,714.133096,Mon,3.923808,3.363184,0.560625
14,4,0,k,713.900299,0.0,B,2017,230.0,230.0,Alle,820.900198,Mon,3.569131,3.103914,0.465217
15,4,0,k,615.5829,0.0,B,2017,188.0,188.0,Alle,708.583001,Mon,3.769059,3.274377,0.494681
16,4,0,k,596.583402,0.0,H,2017,152.0,152.0,Alle,679.099908,Tue,4.467763,3.924891,0.542872
17,4,0,k,746.433701,2.0,B,2017,187.0,189.0,Alle,843.149806,Mon,4.508822,3.991624,0.517198
18,5,0,k,783.583099,1.0,B,2017,246.0,247.0,Alle,904.049802,Mon,3.674999,3.185297,0.489702
19,5,0,k,849.5996,0.0,H,2017,255.0,255.0,Alle,985.950101,Mon,3.866471,3.331763,0.534708


In [291]:
def decminutes_to_mmss(decimal):
    #print(decimal)
    tdelta=timedelta(minutes=decimal)
    sekunden=tdelta.seconds
    minuten=(sekunden % 3600) // 60
    restsekunden=str(sekunden %60).zfill(2)
    mmssstring='{}:{}'.format(minuten, restsekunden)
    return mmssstring

def maptix2labels(ticks):
    ylabelz=list()
    for tic in ticks:
        #print(tic)
        tic=abs(tic)
        sstr=decminutes_to_mmss(tic)
        ylabelz.append(sstr)
    return ylabelz

In [292]:
# colors
bgkern='#FFF7F2'
bgnebn='#F8FFF2'
aht="#21a9ff"
att="#ceecff"
aac="#c4c4c4"
zielzeit="#FF006E"
bars="#A06A00"
aav='#000C00'

In [305]:
def plotit(agent,ww_or_dd):
    f, (ax1, ax2) = plt.subplots(1, 2, sharey=False, figsize=(17,7))
    
    ### preliminary deduction from parameters
    if ww_or_dd.lower() == 'woche':
        kzeit=zeiten[agent]['k_week'].copy()
        nzeit=zeiten[agent]['n_week'].copy()
    elif ww_or_dd.lower() == 'tage':
        kzeit=zeiten[agent]['k_day'].copy()
        nzeit=zeiten[agent]['n_day'].copy()
        
    ### check empty frames
    if (kzeit.empty and nzeit.empty):
        print('ueberhaupt keine Calls')
    elif kzeit.empty:
        print('keine calls in der Kernzeit')
        kzeit=kzeit.reindex(nzeit.index).fillna(0)
    elif nzeit.empty:
        print('keine calls in der Nebenzeit')
        nzeit=nzeit.reindex(kzeit.index).fillna(0)


    kmax=(kzeit['aht'].max())+0.5
    nmax=(nzeit['aht'].max())+0.5
    commonmax=max(kmax,nmax)
    commonmin=-0.25
    
    ersterZeitpunkt=min(min(kzeit.index),min(nzeit.index))
    letzterZeitpunkt=max(kzeit.index[-1],nzeit.index[-1])
    StartStr=str(ersterZeitpunkt)
    EndeStr=str(letzterZeitpunkt)
    
    calls_zeitraum_k=kzeit['be'].sum()
    calls_zeitraum_n=nzeit['be'].sum()
    

    htmean_k=kzeit['aht'].replace(0,np.NaN).mean()
    if np.isnan(htmean_k):
        print('this is nan')
        htmean_k=0
    htmean_n=nzeit['aht'].replace(0,np.NaN).mean() # decent mean value without the zeroes jan-mar
    if np.isnan(htmean_n):
        print('this is nan')
        htmean_n=0
    av_all_k=zeiten['Alle']['k_week']['aht'].replace(0,np.NaN).mean()
    av_all_n=zeiten['Alle']['n_week']['aht'].replace(0,np.NaN).mean()

    ### plots

    ax3 = ax1.twinx()
    ax3.tick_params('y', labelsize=6, labelcolor=bars)

    ax4 = ax2.twinx()
    ax4.tick_params('y', labelsize=6, labelcolor=bars)

    kcalls=ax3.bar(kzeit.index, kzeit['be'], width=0.7, alpha=0.1, color=bars, label='calls')
    ncalls=ax4.bar(nzeit.index, nzeit['be'], width=0.7, alpha=0.1, color=bars, label='calls')

    kaht,=ax1.plot(kzeit.index,kzeit['aht'],color=aht,label="aht")
    katt,=ax1.plot(kzeit.index,kzeit['att'],color=att,label="att")
    kacw,=ax1.plot(kzeit.index,kzeit['acw'],color=aac,label="acw")
    naht,=ax2.plot(nzeit.index,nzeit['aht'],color=aht,label="aht")
    natt,=ax2.plot(nzeit.index,nzeit['att'],color=att,label="att")
    nacw,=ax2.plot(nzeit.index,nzeit['acw'],color=aac,label="acw")

    kziel=ax1.axhline(y=3.5,color=zielzeit,ls=':',alpha=0.75, label='3:30 min')
    kreal=ax1.axhline(y=htmean_k,color=aht,ls='--',alpha=0.9, label=str(decminutes_to_mmss(htmean_k)))
    kalle=ax1.axhline(y=av_all_k,color=aav,ls='-.',alpha=0.2, label=str(decminutes_to_mmss(av_all_k)))
    nziel=ax2.axhline(y=1.5,color=zielzeit,ls=':',alpha=0.75, label='1:30 min')
    nreal=ax2.axhline(y=htmean_n,color=aht,ls='--',alpha=0.9, label=str(decminutes_to_mmss(htmean_n)))
    nalle=ax2.axhline(y=av_all_n,color=aav,ls='-.',alpha=0.2, label=str(decminutes_to_mmss(av_all_n)))

    ### ax1 labels
    ax1.set_ylim(commonmin,commonmax)

    minloc=AutoMinorLocator(4)
    ax1.yaxis.set_minor_locator(minloc)
    ax1.yaxis.set_minor_formatter(ScalarFormatter()) # is the same as major formatter

    left_tix_mj=ax1.get_yticks()
    left_tix_mn=ax1.get_yticks(minor=True)
    left_lbl_mj=maptix2labels(left_tix_mj)
    left_lbl_mn=maptix2labels(left_tix_mn)

    ax1.yaxis.set_ticklabels(left_lbl_mj)
    ax1.yaxis.set_ticklabels(left_lbl_mn,minor=True,size=6)


    ### ax2 labels
    ax2.set_ylim(ax1.get_ylim())

    ax2.yaxis.set_minor_locator(minloc)
    ax2.yaxis.set_minor_formatter(ScalarFormatter()) # is the same as major formatter

    left_tix_mj=ax2.get_yticks()
    left_tix_mn=ax2.get_yticks(minor=True)
    #print('convert major tix')
    left_lbl_mj=maptix2labels(left_tix_mj)
    #print('convert major tix')
    left_lbl_mn=maptix2labels(left_tix_mn)

    ax2.yaxis.set_ticklabels(left_lbl_mj)
    ax2.yaxis.set_ticklabels(left_lbl_mn,minor=True,size=6)

    ### color adjustments, titles, legend
    ax1.set_facecolor(bgkern)
    ax2.set_facecolor(bgnebn)

    desc_k,desc_n=str(int(calls_zeitraum_k)),str(int(calls_zeitraum_n))
    ax1.set_title('Kernzeit'+' Calls gesamt: '+desc_k, size=9)
    ax2.set_title('Nebenzeit'+' Calls gesamt: '+desc_n, size=9)

    ax1.set_xlabel(ww_or_dd, size=7)
    ax2.set_xlabel(ww_or_dd, size=7)
    ax1.tick_params('x', labelsize=8)
    ax2.tick_params('x', labelsize=8)

    ax1.set_ylabel('Minuten', rotation=90)
    ax4.set_ylabel('Calls',rotation=90,color=bars)

    f.suptitle('Bearbeitungszeiten '+agent+' nach '+ww_or_dd+' ab März 2017 bis '+ww_or_dd+' '+EndeStr)

    f.legend((kaht,katt,kacw,kziel,kreal,kalle,kcalls),('handling','talk','afterwork','zielzeit','Øzeit agent','Øzeit alle','calls'),fontsize=7,ncol=2,loc='upper right',borderaxespad=2)

    
    ### Abspeichern
    heute=datetime.date.today().strftime('%Y_%m_%d')
    bild_filename=str(heute+'_'+agent+'_'+ww_or_dd+'_'+StartStr+'-'+EndeStr)
    savepath='/home/keuch/gits/keuch/code_box/pyt/spreadsheetparsing/ce_teamleitung/plots/agenten_und_standorte/'
    speichernin=os.path.join(savepath,bild_filename)
    print(speichernin)
    f.savefig(speichernin,ext='png')
    plt.close()

In [306]:
for person in allagents_list:
    print(person)
    plotit(person,'Woche')


beckeca
/home/keuch/gits/keuch/code_box/pyt/spreadsheetparsing/ce_teamleitung/plots/agenten_und_standorte/2017_11_01_beckeca_Woche_11-42
beckesi
/home/keuch/gits/keuch/code_box/pyt/spreadsheetparsing/ce_teamleitung/plots/agenten_und_standorte/2017_11_01_beckesi_Woche_10-39
beckfra
/home/keuch/gits/keuch/code_box/pyt/spreadsheetparsing/ce_teamleitung/plots/agenten_und_standorte/2017_11_01_beckfra_Woche_9-43
bretsal
/home/keuch/gits/keuch/code_box/pyt/spreadsheetparsing/ce_teamleitung/plots/agenten_und_standorte/2017_11_01_bretsal_Woche_11-42
dehnsil
/home/keuch/gits/keuch/code_box/pyt/spreadsheetparsing/ce_teamleitung/plots/agenten_und_standorte/2017_11_01_dehnsil_Woche_9-41
diessro
/home/keuch/gits/keuch/code_box/pyt/spreadsheetparsing/ce_teamleitung/plots/agenten_und_standorte/2017_11_01_diessro_Woche_10-43
gesinst
/home/keuch/gits/keuch/code_box/pyt/spreadsheetparsing/ce_teamleitung/plots/agenten_und_standorte/2017_11_01_gesinst_Woche_9-43
geyerme
/home/keuch/gits/keuch/code_box/pyt/

In [295]:
emptyframe=zeiten['haustst']['n_week']
onvalframe=zeiten['haustst']['k_week']
print(emptyframe.empty)
emptyframe.fillna(0, inplace=True)
valix=onvalframe.index
emptyframe=emptyframe.reindex(valix).fillna(0)
emptyframe

True


Unnamed: 0_level_0,mm,id,bz,tt_float,vl,ort,yy,be,an,agent,ht_float,wd,aht,att,acw
ww,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
25,0.0,0,0,0.0,0.0,0,0.0,0.0,0.0,0,0.0,0,0.0,0.0,0.0


In [296]:
#### isin function is pretty neat thing for filtering
#### obviously, ww is another datatype than mm, normalization required!
zeiten['gesinst']['n_day'].loc[zeiten['gesinst']['n_day']['ww'].isin([32,33,34,35,36,37,38,39,40,41])]


Unnamed: 0_level_0,mm,id,bz,tt_float,vl,ort,yy,be,an,agent,ht_float,ww,wd,aht,att,acw
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2017-08-12,8,770628,n,14.7834,0.0,B,2017,14.0,14.0,gesinst,21.6668,32,Sat,1.547629,1.055957,0.491671
2017-08-26,8,770628,n,3.8333,0.0,B,2017,2.0,2.0,gesinst,4.9833,34,Sat,2.49165,1.91665,0.575
2017-09-02,9,770628,n,3.65,0.0,B,2017,4.0,4.0,gesinst,4.7833,35,Sat,1.195825,0.9125,0.283325
2017-09-08,9,770628,n,0.1167,0.0,B,2017,1.0,1.0,gesinst,0.15,36,Fri,0.15,0.1167,0.0333
2017-09-09,9,770628,n,4.6833,0.0,B,2017,7.0,7.0,gesinst,6.7834,36,Sat,0.969057,0.669043,0.300014
2017-09-28,9,770628,n,4.3167,0.0,B,2017,1.0,1.0,gesinst,5.3167,39,Thu,5.3167,4.3167,1.0
2017-10-05,10,770628,n,0.3833,0.0,B,2017,1.0,1.0,gesinst,0.4167,40,Thu,0.4167,0.3833,0.0334
2017-10-14,10,770628,n,3.8167,0.0,B,2017,6.0,6.0,gesinst,7.3167,41,Sat,1.21945,0.636117,0.583333
