In [718]:
## Import necessary modules
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.dates import date2num, AutoDateFormatter, AutoDateLocator, WeekdayLocator, MonthLocator, DayLocator, DateLocator, DateFormatter
from matplotlib.dates import MO, TU, WE, TH, FR, SA, SU
from matplotlib.ticker import AutoMinorLocator
import numpy as np
import datetime, calendar
from datetime import datetime
%matplotlib inline

## Import data frome pickle generated from hotline_daily.py
data = pd.read_pickle('/home/keuch/gits/keuch/code_box/pyt/spreadsheetparsing/entwuerfe/pickle_.pkl')

In [719]:
def rearrange(df):
    index_filter=df.loc[['tix']] ## select all hours-index rows
    
    hourse=list(range(0,25))  ## these is from 0 to 24 (25 entries!) and maps the hour indices
    mh=index_filter[hourse]  ## columns with half- and hour steps were named 0-24
    oldcols=mh.columns.values.tolist() ## list of the names of the hourse
    newcols=mh.loc['tix'].drop_duplicates().loc['tix'].values.tolist() ## one row of the hour indices provides the new column names
    cols=dict(zip(oldcols,newcols)) ## old column names get mapped to new columns names

    mhnew=df.rename(columns=cols).drop('tix') ## new dataframe has the old 'tix' entries as column names; 'tix' rows are dropped since not needed any more
    mhnew=mhnew.reset_index().set_index(['xlday']).rename(columns={'index':'status'}) ## new index will be the excel date numbers, old index is moved to a column named 'status'
    
    return mhnew, newcols

In [720]:
def timerange(*,df, years, months=range(1,13), weeks=range(1,53), weekdays=['Mon','Tue','Wed','Thu','Fri','Sat','Sun']):
    resultat=df.loc[(df['year'].isin(years)) & (df['month'].isin(months)) & (df['week'].isin(weeks)) & (df['weekday'].isin(weekdays))]
    frame_range=(resultat.date.min(),resultat.date.max())
    return resultat,frame_range

In [721]:
def summarize(frame):                ## at this place, we dont need the hourly figures, only day sums
    sumframe=frame.drop(hcols,axis=1)
    return sumframe

In [722]:
## group a given timeframe by weekday for an analysis of the average week
def av_week(d_frame):
    avweek = d_frame[['weekday','angekommen','verbunden','verloren']].groupby(['weekday']).sum()
    day_counter=dayf['weekday'].value_counts().reindex(list(calendar.day_abbr))
    avweek = avweek.reindex(list(calendar.day_abbr)).reset_index()
    avweek['daycount']=day_counter.values
    avweek['av_angekommen']=np.round(avweek['angekommen']/avweek['daycount'],decimals=2)
    return avweek

In [723]:
def fplot(frame,style='default'):

    x_=pf.loc[frame['status'] == 'angekommen'].date.values
    x2_=pf.loc[frame['status'] == 'angekommen'].date
    y_an=pf.loc[frame['status'] == 'angekommen'].summa.values
    y_vb=pf.loc[frame['status'] == 'verbunden'].summa.values
    y_vl=pf.loc[frame['status'] == 'verloren'].summa.values
    y_sla=pf.loc[frame['status'] == 'servicelevel'].summa.values*100
    sla_c="#FF4D7D"
    an_c="#B5B5B5"
    vb_c="#008EC4"
    vl_c="#AC003A"
    slamin=y_sla.min()
    anmax=y_an.max()
    fstyles={'woche':(5,3),'monat':(12,6),'jahr':(20,5)}
    lsz={'woche':8,'monat':10}
    fs=fstyles[style]
    
    def titgen(style):
        if style == 'woche':
            mi=pf.date.min().strftime('Anzahl Calls KW%W / %Y')
            ma=pf.date.max().strftime('Anzahl Calls KW%W / %Y')
            if mi == ma:
                tit=str(mi)
            else:
                tit=str(mi+' bis '+ma) 
        elif style == 'monat':
            mi=pf.date.min().strftime('Anzahl Calls %m %B %Y')
            ma=pf.date.max().strftime('Anzahl Calls %m %B %Y')
            if mi == ma:
                tit=str(mi)
            else:
                tit=str(mi+' bis '+ma)
        return tit
    tit=titgen(style)
            
    
    fig=plt.figure(figsize=(fs))
    fig.suptitle(tit)
    
    
    ########## SUBPLOT 0, Horiz. Line #############
    
    ax=plt.subplot(111)
    ax.plot(x_,y_sla,ls='None')
    if slamin < 55:
        ax.set_ylim(slamin-10,101)
    else:
        ax.set_ylim(50,101)
    ax.axhline(y=80,color=sla_c,ls='--',alpha=0.6, label='80%')
    

    ########## SUBPLOT 1, 3 Bars #############
   
    ax1=ax.twinx()
    ax1.yaxis.tick_left()
    if anmax < 100:
        ax1.set_ylim(0,100)
    anbar=ax1.bar(x_,y_an,width=0.8,color=an_c,label='angekommen')
    vnbar=ax1.bar(x_,y_vb,width=0.6,color=vb_c,label='verbunden')
    vlbar=ax1.bar(x_,y_vl,width=0.4,color=vl_c,label='lost')
    for bar in vnbar:
        xpos=bar.get_x()
        heig=bar.get_height()
        ax1.text(xpos+0.3,heig+4.3, s=str(int(heig)),ha='center',color=vb_c, weight='bold')
    for bar in vlbar:
        xpos=bar.get_x()
        heig=bar.get_height()
        if heig > 0:
            ax1.text(xpos+0.2,heig+1.7, s=str(int(heig)),ha='center',color=vl_c, weight='bold')
    
    ########## SUBPLOT 2, Scatter SLA above bars #############
    
    ax2=ax.twinx()
    pydt=[date2num(i) for i in x_]
    
    condition=['#4BE81B' if i>80 else '#E8344C' for i in y_sla]
    condsize=[10 if i>80 else 40 for i in y_sla]
    
    scat=ax2.scatter(pydt,y_sla, c=condition, marker='x', s=condsize, label='sla/tag')
    if slamin < 55:
        ax2.set_ylim(slamin-10,101)
    else:
        ax2.set_ylim(50,101)
    for ix,val in enumerate(y_sla):
        if val < 80:
            ax2.text(pydt[ix],ax2.get_ylim()[1]+1, s=str(round(val))+'%',ha='center',color=sla_c,size=lsz[style])

    
    #############    End of plotting. Now set labels, ticks etc.   ###############
    
    ## turn off the labels of the downmost plot. needs to be done after all plots are finished
        
    mjf={'monat':DateFormatter('%a,%d.%m.'),'woche':DateFormatter('%a,%d.%m.')}
    mnf={'monat':DateFormatter('%d.'),'woche':DateFormatter('%d.%m.')}
    
    xtick_mj_locator = WeekdayLocator(byweekday=MO)    
    xtick_mn_locator = WeekdayLocator(byweekday=[TU,WE,TH,FR,SA,SU])
    
    ax.xaxis.set_major_locator(xtick_mj_locator)
    ax.xaxis.set_major_formatter(mjf[style])
    
    ax.xaxis.set_minor_locator(xtick_mn_locator)
    ax.xaxis.set_minor_formatter(mnf[style])
    
    if style == 'monat':
        ax.tick_params(axis='x', which='major', labelsize=8, length=10, pad=5, colors='#003249')
        ax.tick_params(axis='x', which='minor', labelsize=8)
    elif style == 'woche':
        ax.tick_params(axis='x', which='both', labelsize=8, length=2, pad=2)
        
    ax.tick_params(axis='y',which='both',left='off',right='off',labelleft='off',labelright='off') # First: disable the copied ax.labels
   

    
    # disable spines for the downmost plot so the spines won't overlay
    ax.spines['right'].set_visible(False) 
    ax.spines['left'].set_visible(False) 
    ax.spines['top'].set_visible(False) 
    ax.spines['bottom'].set_visible(False) 
    
    ax.margins(0.01)
    
    #### use the middle plot for the y-axis ticks and label on the left side and gridlines
    ax1.set_ylabel('Anzahl Calls', fontsize=10)
    ax1.yaxis.set_label_position('left')
    ax1.spines['right'].set_visible(False) 
    ax1.spines['left'].set_visible(False) 
    ax1.spines['top'].set_visible(False) 
    ax1.spines['bottom'].set_visible(False) 

    
    y_int=ax1.yaxis.get_majorticklocs()[1]  #|
    if y_int > 10:                          #|
        min_locs=AutoMinorLocator(y_int/5)  #| this will return steps of 4 for 20
    elif y_int <= 10:                       #| and steps of 2 for 10
        min_locs=AutoMinorLocator(5)        #|
            
    ax1.yaxis.set_minor_locator(min_locs)
    ax1.set_axisbelow(True)
    ax1.yaxis.grid(b=True, which='major', color=vb_c, linestyle='-')
    ax1.yaxis.grid(b=True, which='minor', color='#E8E6BF', linestyle='--', alpha=1, lw=0.5)
    ax1.tick_params(axis='y',which='both',left='on',right='off',labelleft='on',labelright='off', labelsize=lsz[style])
    ax1.tick_params(axis='x',which='both',bottom='off',top='off',labelbottom='off',labeltop='off') #xticks off -> ax.xticks
    
    #### use the top plot for the y-axis ticks and label on the right side
    ax2.tick_params(axis='y',which='both',left='off',right='on',labelleft='off',labelright='on', labelsize=lsz[style])
    ax2.set_ylabel('Prozent SLA', fontsize=10)
    ax2.yaxis.set_label_position('right')
    ax2.spines['right'].set_color(sla_c)
    ax2.spines['left'].set_color(vb_c)
    ax2.spines['left'].set_lw(2)
    ax2.tick_params(axis='x',which='both',bottom='off',top='off',labelbottom='off',labeltop='off') #xticks off -> ax.xticks
    
    ax1.legend(prop={'size':7})    
    
    #plt.close(fig)
    
    return fig,tit.replace(' ','').replace('/','_')

# end of function definitions

In [724]:
ndata,hcols=rearrange(data) ## rearranged dataframe with hour_indices as colums and list of hour_index-columns for future use
maxcallsperday=ndata.loc[ndata.status == 'angekommen'].summa.max() ## hoechste Anzahl Calls im pickle
ndata.head()

Unnamed: 0_level_0,status,month,year,00:00-00:30,00:30-01:30,01:30-02:30,02:30-03:30,03:30-04:30,04:30-05:30,05:30-06:30,...,19:30-20:30,20:30-21:30,21:30-22:30,22:30-23:30,23:30-00:00,summa,day,date,week,weekday
xlday,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
42688.0,angekommen,11,2016,0.0,0.0,0.0,0.0,0.0,0.0,1,...,0.0,0.0,1,0.0,0.0,321.0,14,2016-11-14,46,Mon
42688.0,verbunden,11,2016,0.0,0.0,0.0,0.0,0.0,0.0,0,...,0.0,0.0,0,0.0,0.0,221.0,14,2016-11-14,46,Mon
42688.0,verloren,11,2016,0.0,0.0,0.0,0.0,0.0,0.0,1,...,0.0,0.0,1,0.0,0.0,100.0,14,2016-11-14,46,Mon
42688.0,servicelevel,11,2016,,,,,,,0,...,,,0,,,0.688474,14,2016-11-14,46,Mon
42689.0,angekommen,11,2016,0.0,0.0,0.0,0.0,0.0,0.0,1,...,1.0,0.0,0,0.0,0.0,152.0,15,2016-11-15,46,Tue


### time range filter:
Arguments must be passed as a list or as a range. Example:

**timerange(df=ndata,years=[2016,2017],months=[3],weeks=[6],weekdays=['Sun','Sun'])**

Note the double entry for a single weekday!
Leaving parameters empty gives default values (=all). Months and Weeks may be mutually exclusive and raise exceptions

In [748]:
## Woche oder Monat angeben
for i in range (1,62):
    plotframe,plotrange = timerange(df=ndata,years=[2017],months=[i])  ## filter out a given time range
    if not plotframe.empty:
        #pf = summarize(plotframe) ## just add everything and return total numbers
        # plotted,titel=fplot(pf,style='monat')
        #path='/home/keuch/gits/keuch/code_box/pyt/spreadsheetparsing/ce_teamleitung/plots/Monate/'
        #plotted.savefig(path+titel,ext='png')
        #print(pf.head())
        print(pf.loc[(pf['status'] == 'angekommen')])


             status  month  year summa  day        date  week weekday
xlday                                                                
42826.0  angekommen      4  2017    19    1  2017-04-01    13     Sat
42827.0  angekommen      4  2017     5    2  2017-04-02    13     Sun
42828.0  angekommen      4  2017    72    3  2017-04-03    14     Mon
42829.0  angekommen      4  2017    38    4  2017-04-04    14     Tue
42830.0  angekommen      4  2017    30    5  2017-04-05    14     Wed
42831.0  angekommen      4  2017    68    6  2017-04-06    14     Thu
42832.0  angekommen      4  2017    68    7  2017-04-07    14     Fri
42833.0  angekommen      4  2017    19    8  2017-04-08    14     Sat
42834.0  angekommen      4  2017     1    9  2017-04-09    14     Sun
42835.0  angekommen      4  2017    68   10  2017-04-10    15     Mon
42836.0  angekommen      4  2017    64   11  2017-04-11    15     Tue
42837.0  angekommen      4  2017    48   12  2017-04-12    15     Wed
42838.0  angekommen 