In [1]:
import os
import numpy as np
import pandas as pd
import simpy

import plotly.io as pio
import plotly.offline as pyo
from functions.plotting import (make_sankey_df,
                                make_link_and_node_df,
                                plot_sankey)
from functions.data import sort_df
from functions.sim import Customer

pd.set_option('display.max_columns', None)
project_name = os.getcwd().split('/')[-1]
to_save = True

In [2]:
label_colors = {
    "Space Mountain": "#d62728",
    "Indiana Jones Adventure": "#1f77b4",
    "Haunted Mansion": "#2ca02c",
    "None": "#ff7f0e",
    "Untracked":  "#ff7f0e",
}

# Simulate Data

In [3]:
np.random.seed(42)
results = []

env = simpy.Environment()

for customer_id in range(1, 5000):
    customer = Customer(env, results, customer_id)  # Instantiate a customer with customer_id
    env.process(customer.make_choices())

env.run(until=6*60)  # until 6 hours

df = pd.DataFrame(results).reset_index().rename(columns={'index': 'order_id'}).sort_values(
    ['customer_id', 'order_id']).reset_index()
df

Unnamed: 0,index,order_id,customer_id,order_num,order_date,item
0,1931,1931,1,0,2020-11-24 10:54:19,Haunted Mansion
1,4010,4010,2,0,2020-11-24 11:31:51,Indiana Jones Adventure
2,6444,6444,2,1,2020-11-24 12:40:51,Space Mountain
3,788,788,3,0,2020-11-24 10:31:22,Space Mountain
4,282,282,4,0,2020-11-24 10:21:05,Haunted Mansion
...,...,...,...,...,...,...
7499,2342,2342,4996,0,2020-11-24 11:02:17,Space Mountain
7500,2451,2451,4997,0,2020-11-24 11:04:27,Haunted Mansion
7501,6160,6160,4997,1,2020-11-24 12:26:27,Space Mountain
7502,628,628,4998,0,2020-11-24 10:28:05,Haunted Mansion


In [4]:
df['order_date'].min()

Timestamp('2020-11-24 10:15:00')

In [5]:
df['order_date'].max()

Timestamp('2020-11-24 15:55:13')

# Check

### Initial Weights

In [6]:
customer.initial_weights

[0.5, 0.3, 0.2]

In [7]:
counts = df.groupby(['order_num', 'item'])['item'].count().unstack()
counts.div(counts.sum(axis=1), axis=0)[['Space Mountain', 'Indiana Jones Adventure', 'Haunted Mansion']]

item,Space Mountain,Indiana Jones Adventure,Haunted Mansion
order_num,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.505301,0.293859,0.20084
1,0.34413,0.503374,0.152497
2,0.316476,0.59217,0.091354
3,0.358885,0.574913,0.066202
4,0.284553,0.609756,0.105691


### Transition Matrix

In [8]:
customer.transition_matrix

Unnamed: 0,Space Mountain,Indiana Jones Adventure,Haunted Mansion
Space Mountain,0.8,0.15,0.05
Indiana Jones Adventure,0.3,0.65,0.05
Haunted Mansion,0.15,0.05,0.8


In [9]:
# simulated data
df['next_item'] = df.groupby('customer_id')['item'].shift(-1)
transition_matrix = df.groupby(['item', 'next_item'])['customer_id'].count().unstack()

order = ['Space Mountain', 'Indiana Jones Adventure', 'Haunted Mansion']
sort_df(transition_matrix.div(transition_matrix.sum(axis=1), axis=0),
        order, order)

next_item,Space Mountain,Indiana Jones Adventure,Haunted Mansion
item,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Space Mountain,0.806854,0.146417,0.046729
Indiana Jones Adventure,0.276995,0.672926,0.050078
Haunted Mansion,0.194757,0.044944,0.7603


### Dropout Rate

In [10]:
customer.dropout_rates

[0.9, 0.3, 0.8]

In [11]:
# simulated data
df['next_item'] = df.groupby('customer_id')['item'].shift(-1)
transition_matrix = df.fillna('None').groupby(['item', 'next_item'])['customer_id'].count().unstack()

transition_matrix.div(transition_matrix.sum(axis=1), axis=0)['None'].reindex(customer.choices)

item
Space Mountain             0.904691
Indiana Jones Adventure    0.319730
Haunted Mansion            0.797420
Name: None, dtype: float64

### TIme Constraints

In [12]:
print(df['order_date'].min())
print(df['order_date'].max())

2020-11-24 10:15:00
2020-11-24 15:55:13


### Wait Times

In [13]:
customer.wait_times

[65, 44, 23]

In [14]:
df['next_order_date'] = df.groupby('customer_id')['order_date'].shift(-1)
df['wait_time'] = (df['next_order_date']-df['order_date'])/ np.timedelta64(1, 's')
df.groupby(['item'])['wait_time'].mean()/60-10

item
Haunted Mansion            32.307116
Indiana Jones Adventure    48.980177
Space Mountain             59.155763
Name: wait_time, dtype: float64

In [15]:
df['next_order_date'] = df.groupby('customer_id')['order_date'].shift(-1)
df['wait_time'] = (df['next_order_date']-df['order_date'])/ np.timedelta64(1, 's')
df.groupby(['item'])['wait_time'].std()/60

item
Haunted Mansion            18.682228
Indiana Jones Adventure    13.535487
Space Mountain             14.284849
Name: wait_time, dtype: float64

# Plot Results

In [16]:
# Include None

history_df = df.pivot(index='customer_id', columns='order_num', values='item')
sankey_df = make_sankey_df(history_df, fillna='Untracked')
link_df, node_df = make_link_and_node_df(sankey_df, num_steps=4)

fig = plot_sankey(link_df, node_df,
                  label_colors=label_colors,
                  title='Customer History, Show Untracked')

dir_name = 'figures/sankey'
filename =  'history-includena'

if to_save:
    div = pyo.plot(fig, output_type='div')
    with open(f'{dir_name}/{filename}.html', 'w') as f:
        f.write(div)
    pio.write_image(fig, f'{dir_name}/{filename}.png', width=800, height=600)

# fig.show()

In [17]:
# Exclude None

history_df = df.pivot(index='customer_id', columns='order_num', values='item')
sankey_df = make_sankey_df(history_df, dropna=True)
link_df, node_df = make_link_and_node_df(sankey_df, num_steps=4, dropna=True)

fig = plot_sankey(link_df, node_df,
                  label_colors=label_colors,
                  title='Customer History, Hide Untracked')

dir_name = 'figures/sankey'
filename =  'history-dropna'

if to_save:
    div = pyo.plot(fig, output_type='div')
    with open(f'{dir_name}/{filename}.html', 'w') as f:
        f.write(div)
    pio.write_image(fig, f'{dir_name}/{filename}.png', width=800, height=600)

# fig.show()