In [169]:
## Import necessary modules
import os,sys
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.dates import date2num, AutoDateFormatter, AutoDateLocator, WeekdayLocator, MonthLocator, DayLocator, DateLocator, DateFormatter
from matplotlib.dates import MO, TU, WE, TH, FR, SA, SU
from matplotlib.ticker import AutoMinorLocator, AutoLocator, FormatStrFormatter, ScalarFormatter
import numpy as np
import datetime, calendar
from datetime import timedelta
import matplotlib.patches as mpatches
from itertools import tee
from traitlets import traitlets

sys.path.append(os.path.abspath('/home/keuch/gits/keuch/code_box/pyt/spreadsheetparsing/entwuerfe/xls_testruns/lib/'))
from ce_funclib import determine_kernzeit as dtkz
from ce_funclib import continuity_check


from ipywidgets import widgets, interact, interactive, fixed, interact_manual, Layout
from IPython.display import display
#%matplotlib inline
%matplotlib tk


## Import data frome pickle generated from muß ein file mit agentenstats sein
arcpth='/home/keuch/gits/keuch/code_box/pyt/spreadsheetparsing/test_stats/archiv/'


In [170]:
######## GET A LIST OF MATCHING .xls FILES FROM THE GIVEN DIRECTORY

def collectxlfiles(arcpath):
    xlfilelist=list()

    for xlfile in os.listdir(arcpath):
        if xlfile.startswith('CE_al'):
            xlfileabs=os.path.join(arcpath,xlfile)
            xlfilelist.append(xlfileabs)
    return sorted(xlfilelist)

xlfilelist=collectxlfiles(arcpth)
#xlfilelist
#examplefile=xlfilelist[233]

In [171]:
###### TEST FOR DATA IN FILE, SORT OUT EMPTY FILES

## good dataframes do per definition not contain any zero values
## fill bad DFs with nan?

def filetoframe(exfile):
    exframe=pd.read_excel(exfile) # this is a regular pd.DataFrame
    datecell=exframe.iloc[0,1]
    sheet_datetime=pd.to_datetime(datecell,format='%d.%m %Y : %H')
    sheet_date=sheet_datetime.date()
    
    integritycheck=exframe.iloc[2,1] # files with data have "agenten" here, files with no calls have a 'nan'

    if integritycheck != 'Agenten':
        # if it's empty, keep date for filling it later
        print('Exception: ', end='')
        except_status='ex'
        
        usefulcols={0:'tstamp',1:'agent',3:'an',4:'be',22:'vl',24:'ht_float',29:'tt_float'} # map cols to decent names
        exframe=exframe.reindex(columns=sorted(usefulcols.keys()))
        exframe.rename(columns=usefulcols,inplace=True)        
        exframe=exframe[0:1] # strip text rows and the mangled sum row
        print(sheet_datetime)
        
        exframe['tstamp']=sheet_datetime
        exframe['date']=sheet_date
        exframe['agent']='nocalls_datum'
        exframe[['wd','ww','mm','yy']]=exframe['tstamp'].dt.strftime('%a,%W,%m,%Y').str.split(',',expand=True) # make ww,yy,mm,wd columns
        exframe['bz']=exframe['tstamp'].apply(dtkz)
        exframe['ort']=exframe['agent'].str[0] # split the identifier into useable columns
        exframe['id']='foobar' # split the identifier into useable columns
        
        # integers should be of appropriate datatype, we received them as strings
        # exframe[['vl','an','be','ww','mm','yy']]=exframe[['vl','an','be','ww','mm','yy']].astype(np.int64) #just for the beauty of it
        exframe.fillna(0, inplace=True) 
        exframe[['ww','mm','yy']]=exframe[['ww','mm','yy']].astype(np.int64) #just for the beauty of it
        #exframe.fillna(0, inplace=True) 
        return exframe,except_status
        
    else:
        except_status='reg'
        
        exframe.columns=range(0,30) # rename columns to a temporarily more readable format, fancy rename later
        usefulcols={0:'tstamp',1:'agent',3:'an',4:'be',22:'vl',24:'ht_float',29:'tt_float'} # map cols to decent names
        exframe=exframe[sorted(usefulcols.keys())] # skip cols and keep the ones we need
        exframe.rename(columns=usefulcols,inplace=True) # rename cols
        exframe=exframe[3:-1] # strip text rows and the mangled sum row
        exframe['tstamp']=pd.to_datetime(exframe['tstamp'],format=' %d.%m.%Y %H:%M ')
        exframe['date']=exframe['tstamp'].dt.date
        exframe[['wd','ww','mm','yy']]=exframe['tstamp'].dt.strftime('%a,%W,%m,%Y').str.split(',',expand=True) # make ww,yy,mm,wd columns
        exframe['bz']=exframe['tstamp'].apply(dtkz)
        
        exframe['ort']=exframe['agent'].str[0] # split the identifier into useable columns
        exframe['id']=exframe['agent'].str[-6:] # split the identifier into useable columns
        exframe['agent']=exframe['agent'].str[2:-7] # split the identifier into useable columns
        
        # integers should be of appropriate datatype, we received them as strings
        exframe[['vl','an','be','ww','mm','yy']]=exframe[['vl','an','be','ww','mm','yy']].astype(np.int64) #just for the beauty of it

        return exframe,except_status

In [172]:
framelist=list()
exceptionlist=list()
for xfile in xlfilelist:
    frame_from_file,except_status=filetoframe(xfile)
    if except_status=='ex':
        exceptionlist.append(xfile)
    framelist.append(frame_from_file)

Exception: 2017-04-17 00:00:00
Exception: 2017-05-14 00:00:00
Exception: 2017-11-19 00:00:00
Exception: 2017-12-03 00:00:00
Exception: 2017-12-10 00:00:00


In [173]:
#### produce a unified frame with all data and sort it by timstamp and agentname
bigframeii=pd.concat(framelist)
bigframeii.sort_values(['tstamp','agent'],inplace=True)
bigframeii.reset_index(drop=True,inplace=True) # there you go

In [174]:
# die exklusivlogins müssen zusammengelegt werden
unify_id={'gesinst':'995887','stanzju':'878457','papkeda':'891914'}
bigframeii.loc[bigframeii['id'] == unify_id['gesinst'],'agent'] = 'gesinst'
bigframeii.loc[bigframeii['id'] == unify_id['stanzju'],'agent'] = 'stanzju'
bigframeii.loc[bigframeii['id'] == unify_id['papkeda'],'agent'] = 'papkeda'

In [175]:
#### check, ob alle Daten(Tage) lückenlos sind
datenserie_uniq=bigframeii['date'].unique().tolist()
tage_bestand=len(datenserie_uniq)
tage_start=datenserie_uniq[0]
tage_ende=datenserie_uniq[-1:]

missing_dates=continuity_check(datenserie_uniq)
if not missing_dates:
    print('no dates are missing')
else:
    print('the following dates are not within the frame:')
    print(missing_dates)

no dates are missing


In [176]:
# get all agents available and create frames for kern and neben
allagents_list=sorted(bigframeii['agent'].unique())
allagents_list.extend(['Hagenow','Berlin','Alle'])
standorte=bigframeii.ort.unique().tolist()

bigk=bigframeii.loc[bigframeii['bz']=='k']
bign=bigframeii.loc[bigframeii['bz']=='n']

**we can't figure out individual calls anyway, since raw data calls have been grouped by hours already  
so we can go on and group by days to figure out averages**

In [177]:
bigframeii.head(2)

Unnamed: 0,tstamp,agent,an,be,vl,ht_float,tt_float,date,wd,ww,mm,yy,bz,ort,id
0,2017-03-04 08:00:00,beckfra,1.0,1.0,0.0,2.3667,2.1333,2017-03-04,Sat,9,3,2017,n,H,216694
1,2017-03-04 08:00:00,tetzlva,1.0,1.0,0.0,2.6833,2.6167,2017-03-04,Sat,9,3,2017,n,B,613887


In [178]:
def group_and_add_average(agentname,frame,gruppierung):
    # step one: filter by agent; if agent is a location-bound group, filter by location and change agent name to group name
    if agentname == 'Hagenow':
        nur_agent=frame.loc[frame['ort']=='H'].copy()
        nur_agent['agent']='Hagenow'
        nur_agent['id']='000001'
    elif agentname == 'Berlin':
        nur_agent=frame.loc[frame['ort']=='B'].copy()
        nur_agent['agent']='Berlin'
        nur_agent['id']='000002'
    elif agentname == 'Alle':
        #nur_agent=frame.loc[frame['ort'].isin(standorte)].copy()
        nur_agent=frame.copy()
        nur_agent['agent']='Alle'
        nur_agent['id']='000000'    
    else:
        nur_agent=frame.loc[frame['agent']==agentname]
    

    # step 2: split into kern and neben
    k=nur_agent.loc[nur_agent['bz']=='k']
    if k.empty:
        print('.',end=' ')
        #print(agentname,end=' ')
        #print('keine Kernzeit group_and_add_average')
        #print('###')
        
    n=nur_agent.loc[nur_agent['bz']=='n']
    if n.empty:
        print('.',end=' ')
        #print(agentname,end=' ')
        #print('keine Nebenzeit group_and_add_average')
        #print('###')

    # step 3: group by day (instead of hour, as it is now) and add average ht,tt
    def group_and_average(agframe):
        ### ttstamp is dropped and date will be the new index; all others summed or reduced
        colfx_day={'agent':'first','an':'sum','be':'sum','vl':'sum','ht_float':'sum','tt_float':'sum','wd':'first','ww':'first', 'mm':'first','yy':'first','bz':'first','ort':'first','id':'first'}
        ### ttstamp is dropped, date is dropped and ww will be the new index; all others summed or reduced
        colfx_week={'agent':'first','an':'sum','be':'sum','vl':'sum','ht_float':'sum','tt_float':'sum','wd':'first','mm':'first','yy':'first','bz':'first','ort':'first','id':'first'}
        
        if gruppierung=='tag':
            grpd=agframe.groupby('date').agg(colfx_day)
        elif gruppierung=='woche':
            grpd=agframe.groupby('ww').agg(colfx_week)
        elif gruppierung=='nursplit':
            grpd=agframe.copy()
        
        grpd['aht']=grpd['ht_float']/grpd['be']
        grpd['att']=grpd['tt_float']/grpd['be']
        grpd['acw']=grpd['aht']-grpd['att']

        return grpd

    # step 4 get stats grouped by day and with the average column
    k_agent=group_and_average(k)
    n_agent=group_and_average(n)

    return k_agent,n_agent

**dictionary of frames**

In [179]:
### generate frames grouped by day and by week for every agent, put them in a dictionary

#zeiten={}
#print('collecting and grouping times (neben, kern) for')
#for namen in allagents_list:
#    kern_byday,neben_byday=group_and_add_average(namen,bigframeii,'tag')
#    kern_byweek,neben_byweek=group_and_add_average(namen,bigframeii,'woche')
#    zeiten[namen]={'k_day':kern_byday,'k_week':kern_byweek,'n_day':neben_byday,'n_week':neben_byweek}

In [180]:
#### isin function is pretty neat thing for filtering
#### obviously, ww is another datatype than mm, normalization required!

#zeiten['gesinst']['n_day'].loc[zeiten['gesinst']['n_day']['ww'].isin([32,33,34,35,36,37,38,39,40,41])]

In [181]:
def get_sortlist(frame,sortby):
    print(sortby.lower())
    overall_funx1={'be':'sum','ht_float':'sum'}
    gesframe=frame.groupby('agent').agg(overall_funx1).copy()
    gesframe['aht']=(gesframe['ht_float']/gesframe['be'])

    overall_funx2={'be':'sum','ht_float':'sum'}
    ortsframe=frame.loc[bigframeii['ort'].isin(['H','B'])].groupby('ort').agg(overall_funx2).copy()
    ortsframe['aht']=(ortsframe['ht_float']/ortsframe['be'])

    newf=ortsframe.rename(index={'B':'berlin','H':'hagenow'})
    newf.index.names=['agent']
    newfall=pd.concat([gesframe,newf]).fillna(0)
    if sortby.lower() == 'calls':
        newfall.sort_values('be',ascending=False,inplace=True)
    elif sortby.lower() == 'aht':
        newfall.sort_values('aht',ascending=False,inplace=True)
    
    #print(newfall)
    
    return newfall.index.tolist()

In [235]:
# hier erstmal die Daten
print('daten kommen von bigframeii')
dats=sorted(bigframeii.date.unique())

# Button einrichten, der einen Wiedergabewert hat
class ReturnValueButton(widgets.Button):
    """A button that can holds a value as a attribute."""

    def __init__(self, value=None, *args, **kwargs):
        super(ReturnValueButton, self).__init__(*args, **kwargs)
        # Create the value attribute.
        self.add_traits(value=traitlets.Any(value))


# aufsetzen der Widgets, die in die Boxen kommen: 
agtsortmethod=widgets.RadioButtons(options=['Calls', 'avAHT'],value='Calls',description='Agenten sortiert nach:',disabled=False)
agent_chooser=widgets.SelectMultiple(options=get_sortlist(bigframeii,agtsortmethod.value),layout=Layout(display="flex", flex_flow='column'),description='Agents',disabled=False)
ww_dd_chooser=widgets.RadioButtons(options=['Wochen', 'Einzeltage'],value='Wochen',description='Gruppierung:',disabled=False)
whichweeks=widgets.IntRangeSlider(step=1,disabled=False,min=1,max=52,value=[1,52],description='Wochen')
fromdt=widgets.SelectionSlider(options=dats,description='Von:')
tilldt=widgets.SelectionSlider(options=dats, min=fromdt.value,max=dats[-1],description='Bis:')
gobutton = ReturnValueButton(description="Click me",disabled=False,button_style='',tooltip='Click me',icon='check',value=1)

# layout der widget-boxen
overbox=widgets.HBox(description='outer box',title='outer box', name='outer box', layout=Layout(border='2px solid black'))             # Das ist der Hauptcontainer, in den die weiteren Boxen kommen
leftbox_agents=widgets.VBox(layout=Layout(border='2px solid blue'))      # linke Box innerhalb
rightbox_timeranges=widgets.VBox(layout=Layout(border='2px solid purple')) # rechte Box innerhalb
overbox.children=[leftbox_agents,rightbox_timeranges]               # so werden die Boxen im Container platziert
leftbox_agents.children=[agtsortmethod,agent_chooser]               # widgets für die linke Box
rightbox_timeranges.children=[ww_dd_chooser,whichweeks,gobutton]    # widgets für die rechte Box

# 'observe'-Funktionen für die widgets:
def shift_tilldt(args):
    farom=dats.index(args['new'])
    tilldt.options=dats[farom:]
def switchflick(args):
    wd=args['new']
    #print(wd)
    #print(rightbox_timeranges)
    if wd=='Einzeltage':
        rightbox_timeranges.children=[ww_dd_chooser,fromdt,tilldt,gobutton]
    elif wd=='Wochen':
        rightbox_timeranges.children=[ww_dd_chooser,whichweeks,gobutton]
def agtsort(args):
    sor=(args['new'])
    #print(sor)
    if sor.lower() == 'calls':
        agent_chooser.options=get_sortlist(bigframeii,'calls')
    elif sor.lower() == 'avaht':
        agent_chooser.options=get_sortlist(bigframeii,'aht')
def passvalues(args):
    agenten=list(agent_chooser.value)
    agenten.append('Alle')
    print(type(agenten))
    wwdd=ww_dd_chooser.value
    zeitrahmen=tuple()
    if wwdd.lower()=='wochen':
        zeitrahmen=('ww',range(whichweeks.value[0],whichweeks.value[1]+1)) #soll als range ausgegeben werden, daher +1 auf den letzten Wert
    elif wwdd.lower()=='einzeltage':
        zeitrahmen=('date',pd.date_range(fromdt.value,tilldt.value))
    args.value=tuple([agenten,zeitrahmen])


# Zuweisung/Bindung der 'observe'-Funktionen an die widgets
agtsortmethod.observe(agtsort,'value') 
# Erklärung: das widget 'agtsortmethod' hat als potentielle Values die beiden Werte,
# die oben beim Start des Widgets als "Options" hinterlegt wurden ("Calls" und "avAHT")
# wird das widget betätigt, wird sie funktion "agtsort" mit dem gerade gewählten Wert als Parameter aufgerufen
# die Funktion setzt in einem anderen widget (agent_chooser) die zur Auswahl stehenden Werte direkt
ww_dd_chooser.observe(switchflick,'value')
fromdt.observe(shift_tilldt,'value')

display(overbox)

gobutton.on_click(passvalues)




daten kommen von bigframeii
calls


<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>


In [292]:
filterparams=gobutton.value

namefilter=filterparams[0]
timecol=filterparams[1][0]
if timecol=='ww':
    timerng=filterparams[1][1]
elif timecol=='date':
    timerng=filterparams[1][1].date

#print(namefilter,timecol,timerng)

In [293]:
## hier der dataframe, der nur die gewünschten Agenten + Zeitraeume beinhaltet
sframe=bigframeii.loc[(bigframeii['agent'].isin(namefilter)) & (bigframeii[timecol].isin(timerng))]

In [294]:
#sframe

In [295]:
# die plotfunktion arbeitet mit Dictionary, also aus dem Frame noch ein dict erzeugen, das nach Kern- und Nebenzeit trennt

zeiten2={}
print('collecting and grouping filtered times (neben, kern) for')
for namen in namefilter:
    print(namen)
    kern_byday,neben_byday=group_and_add_average(namen,sframe,'tag')
    kern_byweek,neben_byweek=group_and_add_average(namen,sframe,'woche')
    zeiten2[namen]={'k_day':kern_byday,'k_week':kern_byweek,'n_day':neben_byday,'n_week':neben_byweek}

collecting and grouping filtered times (neben, kern) for
gesinst
tonnroy
hennisi
laeweul
Alle


In [296]:
### paramter für plotit: "woche" und "tage"
from ce_funclib import decminutes_to_mmss, maptix2labels, plotit
for i in namefilter[:-1]:
    print(i)
    plotit(zeiten2,i,'tage')

gesinst
tonnroy
hennisi
laeweul
