## Again, load up the libraries and data

In [117]:
%matplotlib inline
import pandas as pd
import numpy as np
from matplotlib import pyplot
import os
import re

## here we'll set the matplotlib figure size to take up more real estate
pyplot.rcParams['figure.figsize'] = [12, 5]

outputFiles = os.listdir('./output')
output_series = (list(), list())
output_metrics = (list(), list())
inputs = (list(), list())
for outputFile in outputFiles:
    simName = outputFile[:outputFile.index('.output')]
    with open("./output/" + outputFile, 'r') as f:
        for cnt, line in enumerate(f):
            split = line.split()
            if len(split) > 1:
                if split[0].startswith('OUT'):
                    measure = split[0][:split[0].rfind('_')]
                    iteration = int(split[0][split[0].rfind('_')+1:])
                    if len(split) > 2:
                        output_series[0].append((simName, measure, iteration))
                        output_series[1].append(pd.Series(split[1:]).astype(float))
                    else:
                        output_metrics[0].append((simName, measure, iteration))
                        output_metrics[1].append(float(split[1]))
                        
                if split[0].startswith('INP'):
                    inputs[0].append((simName, split[0]))
                    inputs[1].append(float(split[1]))
                                   
series_index = pd.MultiIndex.from_tuples(output_series[0], names=['simulation', 'measure', 'iteration'])
series = pd.DataFrame(pd.Series(output_series[1], index=series_index), columns=['values'])

metrics_index = pd.MultiIndex.from_tuples(output_metrics[0], names=['simulation', 'measure', 'iteration'])
metrics = pd.DataFrame(pd.Series(output_metrics[1], index=metrics_index), columns=['values'])

inputs_index = pd.MultiIndex.from_tuples(inputs[0], names=['simulation', 'measure'])
inputs = pd.DataFrame(pd.Series(inputs[1], index=inputs_index), columns=['values'])


## Lets take a look at the 22 cashier configuration again. This time we have run 10 simulations. Lets graph any solutions that have a max wait time larger than 10 minutes.

In [118]:
series = series.reset_index()
series.set_index(['measure', 'simulation', 'iteration'], inplace=True)
meanWaitTimes = series['values']['OUT_CUSTOMER_WAIT_TIMES'].apply(lambda x: x.mean())
meanIdleTimes = series['values']['OUT_CASHIER_IDLE_TIMES'].apply(lambda x: x.mean())
mean_times = pd.concat([meanWaitTimes, meanIdleTimes], axis=1)
mean_times.columns = ['mean_wait', 'mean_idle']


# df = pd.DataFrame(series['values']['OUT_CUSTOMER_WAIT_TIMES'].apply(lambda x: x.mean()))
mean_times.loc[df['mean_wait'] < 15].groupby('simulation').count()


Unnamed: 0_level_0,mean_wait,mean_idle
simulation,Unnamed: 1_level_1,Unnamed: 2_level_1
cashier_1000_13,80,80
cashier_1000_14,88,88
cashier_1000_15,96,96
cashier_1000_16,94,94
cashier_1000_17,99,99


## This is interesting. We can see that with 17 cashiers, the mean wait time is less than 15 minutes 99% of the time. Furthermore, 15 and 16 cashiers' mean wait time is less than 15 minutes ~95% of the time. Lets dig into those two.

In [144]:
series['values']['cashier_1000_16'].apply(lambda x: x.describe())['75%']['OUT_CUSTOMER_WAIT_TIMES'].max()

42.38445

In [6]:
bins = np.linspace(10, 70, 100)
waitTimeKeys = filter(lambda k: k.startswith('OUT_CUSTOMER_WAIT_TIMES'), output['cashier_1000_22.output'].keys())
for key, waitTimes in {key: output['cashier_1000_22.output'][key].astype(float) for key in waitTimeKeys}.iteritems():
    if waitTimes.max() > 10:
        pyplot.hist(waitTimes, bins, alpha=0.5, label=key)
pyplot.legend(loc='upper right')
pyplot.show()

NameError: name 'output' is not defined

## We see here that there are 3 solutions with wait times larger than 10 minutes, but only one that has several wait times over 15 minutes: solution #8.

In [None]:
## Lets see what the wait times look like in a 24 cashier scenario.

In [None]:
bins = np.linspace(5, 10, 100)
waitTimeKeys = filter(lambda k: k.startswith('OUT_CUSTOMER_WAIT_TIMES'), output['cashier_1000_24.output'].keys())
for key, waitTimes in {key: output['cashier_1000_24.output'][key].astype(float) for key in waitTimeKeys}.iteritems():
    if waitTimes.max() > 5:
        pyplot.hist(waitTimes, bins, alpha=0.5, label=key)
pyplot.legend(loc='upper right')
pyplot.show()

## Here we see a single solution with wait times over 5 minutes, but no wait times over ~6 minutes exist in the solutions.