## {{cookiecutter.project_name}}

{{cookiecutter.description}}

This notebook contains basic statistical analysis and visualization of the data.

### Data Sources
- summary : Processed file from notebook 1-Data_Prep

### Changes
- {% now 'utc', '%m-%d-%Y' %} : Started project

In [None]:
import pandas as pd
import numpy as np
from numpy import log
from numpy.random import randn
import glob
import datetime as dt
import pickle
import os
from pandas import ExcelWriter
import re
from zipfile import ZipFile
import matplotlib.pyplot as plt
from matplotlib.ticker import PercentFormatter
import seaborn as sns
import datetime

from sklearn import preprocessing
from sklearn.preprocessing import PowerTransformer
import scipy.stats as stats
from scipy.stats import normaltest
from scipy.stats import shapiro
from scipy.stats import anderson
from scipy.stats import boxcox
from scipy.stats import norm
from math import erf, sqrt


In [None]:
%matplotlib inline

### Functions

In [None]:
#80/20 analysis
def pareto(df, rows, columns, sortcol, colmonth1, colmonth2, colmonth3, minvalue):
    '''df= dataframe to use
    rows= column to use for rows
    columns= name of column to use as rows
    sortcol = column to sumarrize ej: tickets, cases, persons, etc)
    colmonth1:colmonth3 = columns to calculate average for columns
    minvalue = value to filter the result, will show recrods with values greater than minvalue
    '''
    crostab= pd.crosstab(df[rows],df[columns],margins=True)
    crostab.sort_values(sortcol, ascending=False,inplace=True)
    crostab= crostab.drop(['All'])
    print('Total of rows: {}'.format(len(crostab)))
    crostab['pc']= 100*crostab[sortcol]/crostab[sortcol].sum()
    crostab['cum_pc']=crostab['pc'].cumsum()
    crostab['AVG3M']= (crostab[colmonth1] + crostab[colmonth2] + crostab[colmonth3]) /3
    print('Total of rows up to 80%: {}'.format(len(crostab[crostab['cum_pc'] <  81])))
    print('{} Total of rows below average of {}'.format(len(crostab[crostab['AVG3M'] <=  minvalue]), minvalue))
    print('to print the table run: crostab2[crostab2["AVG3M"] > 5]')
    return crostab

#distribution
def gethrdistribution(df, group1, agg1, titletxt= 'Pie Chart', minpercent=5, filename='figpie.png'):
    '''pie distributions per group
    consolidate % < 10% in others category    
    '''
    dist1= df.groupby(group1,as_index=False)[agg1].count()
    dist1['pc']= 100*dist1[agg1]/dist1[agg1].sum()
    dist1[group1]= np.where(dist1['pc']<minpercent,'Others',dist1[group1])
    dist1= dist1.groupby(group1,as_index=False)[agg1].sum()
    dist1['pc']= 100*dist1[agg1]/dist1[agg1].sum()
    dist1= dist1.sort_values('pc', ascending=False)
    dist1.reindex(copy=False)
    dist1['cum_pc']=dist1['pc'].cumsum()
    # Create a list of colors (from iWantHue)
    colors = [ '#959a3c', '#55ac69', '#5b86d0', "#E13F29", "#D69A80", "#D63B59", 
              "#AE5552", "#CB5C3B", "#EB8076", "#96624E" ]
    # Create a pie chart
    fig, ax = plt.subplots()  
    plt.pie(
        dist1[agg1],         # using data agg1
        labels=dist1[group1],# with the labels being group1
        shadow=False, # with no shadows
        colors=colors, # with colors
        #explode=(0, 0.15, 0), # with one slide exploded out
        # with the start angle at 90%
        startangle=90, # with the start angle at 90%
        autopct='%1.1f%%', # with the percent listed as a fraction 
        counterclock= False
        )
    # View the plot drop above
    plt.axis('equal')
    plt.title(titletxt)
    # View the plot
    plt.tight_layout()
    plt.show()
    figname_file= os.path.join(figures_path,
                               directory_name + '_' + filename + '{:%m%d%y}.png').format(today)
    fig.savefig(figname_file, transparent= True)
    dist1= dist1.sort_values('pc', ascending=False)
    print(dist1)

def plottickets(df, group1, group2, countfield):
    '''plot df grouped by group1 and group2 and counting countfield'''
    ts=df.groupby([group1,group2]).agg({countfield: 'count'})
    #ts.sort_values(group1, ascending=True,inplace=True)
    ts.plot(kind= 'line')
    return ts

def weedaysbars(df, group1, agg1, title, xlab, ylab, filename='figbarcharth.png'):
    '''function to display bar chart, ej criticality, or weekdays barcharts'''
    weekdays= df.groupby(group1,as_index=False)[agg1].count()
    fig, ax = plt.subplots()  
    #plt.bar(weekdays[group1], height= weekdays[agg1], color='#607c8e')
    ax.bar(weekdays[group1], height= weekdays[agg1], color='#607c8e')
    
    width = 0.75 # the width of the bars 
    ax.barh(df.index, df['number'], width)
    
    plt.title(title)
    plt.xlabel(xlab)
    plt.ylabel(ylab)
    plt.grid(axis='y', alpha=0.75)
    ###
    #for i, v in enumerate(weekdays[group1]):
    #    ax.text(v + 3, i + .0, str(v))
    ax.patch.set_visible(False)
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.spines['bottom'].set_linewidth(0.5)
    ax.spines['left'].set_linewidth(0.5)
    ###
    plt.show()
    figname_file= os.path.join(figures_path, 
                               directory_name + '_' + filename + '{:%m%d%y}.png').format(today)
    fig.savefig(figname_file, transparent= True)
    print(weekdays)
    
def weedaysbarsh(df, group1, agg1, title, xlab, ylab, filename='figbarcharth.png'):
    '''function to display bar chart, ej criticality, or weekdays barcharts'''
    weekdays= df.groupby(group1,as_index=False)[agg1].count()
    fig, ax = plt.subplots()  
    
    width = 0.75 # the width of the bars 
    ax.barh(weekdays[group1], weekdays[agg1], width)
    
    plt.title(title)
    plt.xlabel(xlab)
    plt.ylabel(ylab)
    plt.grid(axis='y', alpha=0.75)
    for i, v in enumerate(weekdays[agg1]):
        ax.text(v + 3, i + .0, str(v))
    ax.patch.set_visible(False)
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.spines['bottom'].set_linewidth(0.5)
    ax.spines['left'].set_linewidth(0.5)
    plt.show()
    figname_file= os.path.join(figures_path, 
                               directory_name + '_' + filename + '{:%m%d%y}.png').format(today)
    fig.savefig(figname_file, transparent= True)
    print(weekdays)   
    
#cycle_time3
def cycletime3(df, groupby2, groupby3, agg1, agg2, agg3):
    '''Caclulate cycletime per vendor just for request and incidents
    usage: cycletime2(nuclear0,'yearmonth','Vendor_Closeassign','cycletime','number',
                    'closed_by_user_name', 'cycletime')
    '''
    df = df[df.Type.isin(['Requested Item','Incident'])]
    #cycle_time and FTE
    df2= df.groupby([groupby2,groupby3]).agg({agg1: ['mean','std','max','min'], 
                                                 agg2: 'count',agg3: 'nunique'})
    # Using ravel, and a string join, we can create better names for the columns:
    df2.columns = ["_".join(x) for x in df2.columns.ravel()]
    agg5= agg3 + '_nunique'
    agg6= agg2 + '_count'
    agg7= agg1 + '_mean'
    # per month
    df2= df2.groupby([groupby3]).agg({agg5: ['mean', 'std'], agg6: ['mean','count', 'median','max'], 
                                      agg7: ['mean','std', 'median']})
    return df2

def barchart(df,x,y,title, x_label, y_label,filename='figbarchart.png'):
    '''bar chart tickets per organizatio x_Vendor_Closeassign or vendor'''
    field_vendor = 'x_Vendor_Closeassign'
    field_vendor = 'x_vendor'
    pt_df= df.pivot_table(x, index=[y],
                                    aggfunc='count',
                                    margins=True)#.sort_values(('SSO','All'), ascending=False)
    pt_df.index.rename(y_label, inplace= True)
    #remove rows with cero count of tickets
    pt_df= pt_df[pt_df[x] >0].sort_values(x, ascending=True)

    fig, ax = plt.subplots()    
    width = 0.75 # the width of the bars 
    ax.barh(pt_df.index, pt_df[x], width)
    plt.title(title)
    plt.xlabel(x_label)
    plt.ylabel(y_label)   
    for i, v in enumerate(pt_df[x]):
        ax.text(v + 3, i + .0, str(v))
    ax.patch.set_visible(False)
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.spines['bottom'].set_linewidth(0.5)
    ax.spines['left'].set_linewidth(0.5)
    plt.show()
    figname_file= os.path.join(figures_path, 
                               directory_name + '_' + filename + '{:%m%d%y}.png').format(today)
    fig.savefig(figname_file, transparent= True)

def histogram(df,x, title, x_label, y_label, filter_in, filename= 'histogram'):
    #histogram aging tickets 
    df_agging=df[(df.x_agingdays > 0) &(df.Type.isin(filter_in))]
    df_agging= df_agging[x]
    fig, ax = plt.subplots() 
    ax.hist(df_agging, bins=10)
    plt.title(title)
    plt.xlabel(x_label)
    plt.ylabel(y_label) 
    ax.patch.set_visible(False)
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.spines['bottom'].set_linewidth(0.5)
    ax.spines['left'].set_linewidth(0.5)
    plt.show()
    figname_file= os.path.join(figures_path, 
                               directory_name + '_' + filename + '{:%m%d%y}.png').format(today)
    fig.savefig(figname_file, transparent= True)
    #df_agging.plot.hist(bins=10, title= 'Aging tickets')
    print(df_agging.describe())
    df_aggingsum= df[(df.x_agingdays > 0) & (df.Type.isin(filter_in))]
    aggingsum= df_aggingsum.groupby(['x_vendor', 
                                     'Type']).agg({'x_aging': 'sum',
                                                   'number':'count', 
                                                   'x_agingdays':['mean',
                                                                  'std','median']}).sort_values('x_vendor', 
                                                                                                ascending=False)

    aggingsum.rename(columns = {'sum':'Open','count':'Closed', 
                                'std': 'Std Dev', 
                                'mean':'Mean', 'number':'','x_aging':'', 'x_agingdays':''}, inplace= True)
    print(aggingsum)
    
def group_by(df):
    ''' group by df to report in xls file
    '''
    #groub by 'yearmonth', 'dayweek', 'hourday', 'cmdb_ci_name','PandL'
    grouped= df.groupby(['x_yearmonth', 'x_dayweek', 'x_hourday', 'cmdb_ci_name',
                                'PandL'], 
                                as_index = False).agg({'closed_by_user_name' :['count', 'nunique'],
                                                       'number' : 'count'})
    grouped.columns = ["_".join(x) for x in grouped.columns.ravel()]
    
    #groub by 'yearmonth', 'cmdb_ci_name', 'PandL'
    grouped1= df.groupby(['x_yearmonth', 'cmdb_ci_name', 'PandL'], 
                                 as_index = False).agg({'closed_by_user_name' :['count', 'nunique'],
                                                        'number' : 'count'})
    grouped1.columns = ["_".join(x) for x in grouped1.columns.ravel()]

    #groub by file 'yearmonth', 'PandL'
    grouped2= df.groupby(['x_yearmonth', 'PandL'], as_index = False).agg({'number' : 'count'})
    return (grouped, grouped1, grouped2)

def verify_normality(df, column):
    ''' graph distribution for a column, with values > 0
    '''
    print(df[column].describe())
    df2= df[df[column] > 0]
    arr = df2[column]
    mean=arr.mean()
    median=arr.median()
    mode=arr.mode()
    print('Mean: ',mean,'\nMedian: ',median,'\nMode: ',mode[0])

    arr = sorted(arr)
    fit = stats.norm.pdf(arr, np.mean(arr), np.std(arr)) 
 
    #plot both series on the histogram
    fig, ax = plt.subplots() 
    plt.axvline(mean,color='red',label='Mean')
    plt.axvline(median,color='yellow',label='Median')
    plt.axvline(mode[0],color='green',label='Mode')
    plt.plot(arr,fit,'-',linewidth = 2,label="Normal distribution with same mean and var")
    plt.hist(arr,density=True,bins = 10,label="Actual distribution")   
    ax.patch.set_visible(False)
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.spines['bottom'].set_linewidth(0.5)
    ax.spines['left'].set_linewidth(0.5)
    #plt.title('Histogram {}'.format(column))
    plt.legend()
    plt.show()

    fig = plt.figure()
    ax1 = fig.add_subplot(211)
    prob = stats.probplot(df2[column], dist=stats.norm, plot=ax1)
    ax1.set_xlabel('')
    ax1.set_title('Probplot against normal distribution')
    
def transform(df, column, method='power'):    
    '''Transform a column using log,scale, minmax, boxcox,  power, norm
    filter out rows with values <=0, takes only positive values.
    '''
    dfnorm= pd.DataFrame()
    df= df[df[column] > 0]
    df[column]= df[column].fillna(df[column].mean())
    dfnorm['x_original']= df[column]  
    print(df[column].describe())
    x_array = np.array(df[[column]])
    if method== 'norm':   #Scale transformation
        x_scaled = preprocessing.normalize(x_array, norm= 'l2')
        dfnorm['x_transformed'] = pd.DataFrame(x_scaled)
    if method== 'log':    #Log transformation'
        dfnorm['x_transformed'] = log(df[column])
        #plt.hist(dfnorm['log'])
    if method== 'sqt':    #Square root transformation
        dfnorm['x_transformed'] = np.square(df[column])
    if method== 'boxcox': #Box Cox transformatio
        xt = stats.boxcox(df[column], lmbda=0)
        dfnorm['x_transformed']= xt
    if method== 'minmax': #minmax transformation
        # Create a minimum and maximum processor object
        min_max_scaler = preprocessing.MinMaxScaler()
        # Create an object to transform the data to fit minmax processor
        x_scaled = min_max_scaler.fit_transform(x_array)
        # Run the normalizer on the dataframe
        dfnorm['x_transformed'] = pd.DataFrame(x_scaled)
        dfnorm['x_transformed']= dfnorm['x_transformed'].fillna(dfnorm['x_transformed'].mean())
    if method== 'power' :
        pt= preprocessing.PowerTransformer(method= 'box-cox',standardize=False)
        dfnorm['x_transformed']= pt.fit_transform(x_array)
    if method== 'scale':
        x_scaled = preprocessing.scale(x_array)
        dfnorm['x_transformed'] = pd.DataFrame(x_scaled)
    print(dfnorm['x_transformed'].describe())
    fig = plt.figure()
    ax1 = fig.add_subplot(321)
    ax2 = fig.add_subplot(322)
    ax3 = fig.add_subplot(323)
    ax4 = fig.add_subplot(324) 
    ax1.hist(dfnorm['x_original'])
    ax1.set_title= ('Histogram before {} transformation for {}'.format(method, column))
    ax2.hist(dfnorm['x_transformed'])
    ax2.set_title= ('Histogram after {} transformation for {}'.format(method, column))
    prob2 = stats.probplot(dfnorm['x_transformed'], dist=stats.norm, plot=ax3)
    ax3.set_title('Probplot after {} transformation'.format(method))
    ax4.set_title('BoxPlot')
    red_square = dict(markerfacecolor='r', marker='s')
    ax4.boxplot(dfnorm['x_transformed'], vert=False, flierprops=red_square)
    plt.subplots_adjust(top=0.92, bottom=0.08, left=0.10, right=0.95, hspace=0.6, wspace=0.35)
    plt.show()
    return dfnorm

def nomality_tests(df, column, alpha= 0.05):
    '''Test normality using D'Angostino & Pearson, Sahpiro, Anderson-Darling
    '''
    x= df[column]
    stat, p = normaltest(x)  #D'Angostino & Pearson test
    print(' D Angostino = {:.3f} pvalue = {:.4f}'.format(stat, p))
    if p > alpha:
        print('   data looks normal (fail to reject H0)')
    else:
        print('   data does not look normal (reject H0)')
    if len(x) < 5000:  #Shapiro test is reliable with less than 5K records
        stat, p = shapiro(x)
        print(' Shapiro = {:.3f} pvalue = {:.4f}'.format(stat, p))
        if p > alpha:
            print('   data looks normal (fail to reject H0)')
        else:
            print('   data does not look normal (reject H0)')
    stat = anderson(x, dist='norm')
    print(' Anderson = {:.3f}  '.format(stat.statistic))
    for i in range(len(stat.critical_values)):
        sl, cv = stat.significance_level[i], stat.critical_values[i]
        if stat.statistic < stat.critical_values[i]:
            print('   {:.3f}: {:.3f}, data looks normal (fail to reject H0)'.format(sl, cv))
        else:
            print('   {:.3f}: {:.3f}, data does not look normal (reject H0)'.format(sl, cv))
        print('   SL: {} cv = {}'.format(sl, cv))


def outliers_iqr(df, column, output= 'x_outlier'):
    '''Interquartile range method to detect outliers
    return a df with column for outlier default name x_outlier
    '''
    quartile_1, quartile_3 = np.percentile(df[column], [25, 75])
    iqr = quartile_3 - quartile_1
    lower_bound = quartile_1 - (iqr * 1.5)
    upper_bound = quartile_3 + (iqr * 1.5)
    df[output] = np.where((df[column] > upper_bound) | (df[column] < lower_bound),1,0)
    fig = plt.figure()
    ax1 = fig.add_subplot(321)
    ax2 = fig.add_subplot(322)
    red_square = dict(markerfacecolor='r', marker='s')
    ax1.boxplot(df[column], vert=False, flierprops=red_square)
    ax1.set_title('{} Before'.format(column))
    ax2.boxplot(df[output], vert=False, flierprops=red_square)
    ax2.set_title('{} After'.format(column))
    plt.subplots_adjust(top=0.92, bottom=0.08, left=0.10, right=0.95, hspace=0.6, wspace=0.35)
    plt.show()
    return df



### File Locations

In [None]:
originalpath = (os.getcwd())
print(originalpath)
os.chdir(originalpath)
#os.chdir('..')
path = os.getcwd()
print(path)

today = datetime.datetime.today()
directory_name= '{{cookiecutter.directory_name}}'
report_file= os.path.join(path, 'reports',directory_name + '_report{:%m%d%y}.xlsx').format(today)
figures_path= os.path.join(path, 'reports','figures')

datefile= input('Date of file (MMDDYY: ')
fileoriginaltickets = os.path.join(path, 'data','processed', directory_name + '_tickets' + datefile + '.pkl')
fileoriginalapps = os.path.join(path, 'data','processed', directory_name + '_apps' + datefile + '.pkl')


### Read pkl files

In [None]:
#Read PKL files
df2 = pd.read_pickle(fileoriginaltickets,'gzip')
dfreadfile = df2.copy()

df3 = pd.read_pickle(fileoriginalapps,'gzip')
dfreadfileapps = df3.copy()
print('tickets: {}'.format(len(dfreadfile)))
print('Apps: {}'.format(len(dfreadfileapps)))

### Perform Data Analysis

#### Group dataset tickets

In [None]:
grouped, grouped1, grouped2 = group_by(dfreadfile)

#### 80/20 analysis

In [None]:
#get 80/20 table based in threshold could be cum_pc or AVG3M
threshold = int(input("Enter threshold : [80]") or '81')
basedin = input('Based analysis in [cum_pc] or avg last 3 months [AVG3M] :') or ('cum_pc')
column= input('Column to use [cmdb_ci_name]: ') or ('cmdb_ci_name')
crostab= pareto(dfreadfile, column, 'x_yearmonth', 'All','201812', '201811','201810',threshold)
crostab[crostab[basedin] < threshold]

In [None]:
#pareto graph
ct= crostab[crostab[basedin] < threshold]
fig, ax = plt.subplots()
ax.bar(ct.index, ct.All, color="C0")
plt.xticks(ct.index, rotation='vertical', size=6)

ax2 = ax.twinx()
ax2.plot(ct.index, ct.cum_pc, color="C2", marker=",", ms=5)
ax2.yaxis.set_major_formatter(PercentFormatter())
ax.set_title('Pareto {}'.format(column))
ax.tick_params(axis="y", colors="C0")
ax2.tick_params(axis="y", colors="C2")
#plt.xticks(ct.index, rotation='vertical')
plt.show()

#### Distribution in the day

In [None]:
#distribution in the day
gethrdistribution(dfreadfile, 'x_bins_day', 'number', 'Distribution in a day',0,'Distday')

#### Distribution by type of tickets

In [None]:
#types of tickets
gethrdistribution(dfreadfile, 'Type', 'number', 'Types of tickets',10, 'typetks')

#### Bar chart tickets per vendor

In [None]:
barchart(dfreadfile,'number','x_vendor','Total Tickets', 'Tickets', 'Organization', 'org_tkts_bch')

#### Aging analysis

In [None]:
filter_in= ['Incident','Requested Item','Change']
histogram(dfreadfile, 'x_agingdays', 'Agging Tickets', 'Aging in Days', 'Tickets', filter_in,  'agingtkts')

#### Productivity

In [None]:
#productivity
print('Productivity= rate of output (tickets) per unit of input (hrs FTE)')
sumprod= dfreadfile.groupby('x_vendor').agg({'number':'count',
                                                       'closed_by_name':'nunique'}).sort_values('number', 
                                                                                        ascending=False)
sumprod['Productivity']= sumprod['number'] / (sumprod['closed_by_name'] * 2000)
sumprod['Tickets_per_month']= sumprod['number'] / 12 / sumprod['closed_by_name']
#sumnuc1['Productivity vs effort']= sumnuc1['number'] / sumnuc1['cycletime'] 
sumprod.rename(columns = {'closed_by_name':'Unique Solvers','number':'Tickets'}, inplace= True)
sumprod = sumprod[sumprod["Tickets"] > 0]
sumprod.index.rename('Org', inplace= True)
sumprod

#### Type distribution continues variables (cycletime, agging)

In [None]:
verify_normality(dfreadfile, 'x_cycletime')

In [None]:
verify_normality(dfreadfile, 'x_agingdays')

#### Normality Test

In [None]:
nomality_tests(dfreadfile, 'x_cycletime')

#### Transform continues variables  (cycletime, agging)

In [None]:
dftrans= transform(dfreadfile, 'x_cycletime','power')

In [None]:
nomality_tests(dftrans, 'x_transformed')

#### Outliers

In [None]:
#if not transformed, run outliers over original df
dftrans=  outliers_iqr(dftrans, 'x_transformed')
print('outliers {}'.format(dftrans.x_outlier.sum()))

dftrans=  outliers_iqr(dftrans, 'x_original', 'x_outlier2')
print('outliers2 {}'.format(dftrans.x_outlier2.sum()))

In [None]:
#merge outliers in original df, if transformed/normalized
dfreadfile= pd.merge(dfreadfile, dftrans[['x_outlier']], right_index=True, left_index=True)

#### Area under the curve

In [None]:
mu = dfreadfile.x_cycletime.mean()
sigma = dfreadfile.x_cycletime.std()
x1 = .25  #lower limit 2 hrs
x2 = 8   #upper limit 4 days
# calculate probability
# probability from Z=0 to lower bound
double_prob = erf( (x1-mu) / (sigma*sqrt(2)) )
p_lower = double_prob/2
print('\n Lower Bound: {}'.format(round(p_lower,4)))

# probability from Z=0 to upper bound
double_prob = erf( (x2-mu) / (sigma*sqrt(2)) )
p_upper = double_prob/2
print(' Upper Bound: {}'.format(round(p_upper,4)))

# print the results
Pin = (p_upper) - (p_lower)
print('\n')
print('mean = {}    std dev = {} \n'.format(mu, sigma))
print('Calculating the probability of occurring between {} <--> {} days\n'.format(x1, x2))
print('inside interval Pin = {}%'.format(round(Pin*100,1)))
print('outside interval Pout = {}% \n'.format(round((1-Pin)*100,1)))
print('\n')

# calculate the z-transform
z1 = ( x1 - mu ) / sigma
z2 = ( x2 - mu ) / sigma

x = np.arange(z1, z2, 0.001) # range of x in spec
x_all = np.arange(-10, 10, 0.001) # entire range of x, both in and out of spec
# mean = 0, stddev = 1, since Z-transform was calculated
y = norm.pdf(x,0,1)
y2 = norm.pdf(x_all,0,1)

# build the plot
fig, ax = plt.subplots(figsize=(9,6))
plt.style.use('fivethirtyeight')
ax.plot(x_all,y2)

ax.fill_between(x,y,0, alpha=0.3, color='b')
ax.fill_between(x_all,y2,0, alpha=0.1)
ax.set_xlim([-4,4])
ax.set_xlabel('# of Standard Deviations Outside the Mean')
ax.set_yticklabels([])
ax.set_title('Probability to comply')

plt.savefig('normal_curve.png', dpi=72, bbox_inches='tight')
plt.show()

### Save Excel file into reports directory

Save an Excel file with intermediate results into the report directory

In [None]:
writer = ExcelWriter(report_file,options={'strings_to_urls': False})

In [None]:
dfreadfile.to_excel(writer, sheet_name='Tickets')
grouped.to_excel(writer, sheet_name='G_by_day_hr_CI')
grouped1.to_excel(writer, sheet_name='G_by_month_CI')
grouped2.to_excel(writer, sheet_name='G_by_month_PL')
dfreadfileapps.to_excel(writer, sheet_name= 'apps')

In [None]:
writer.save()