In [None]:
import seaborn as sns
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px

In [None]:
# Options
pd.set_option('precision', 2)
%matplotlib inline

In [None]:
# CONSTANTS
DOW_ORDER =['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
MAX_MINUTES = 60 * 5

In [None]:
clean_df = (
    pd.read_csv('data/clean_data.csv')
    .assign(
        date=lambda d: pd.to_datetime(d.date),
        datetime=lambda d: pd.to_datetime(d.datetime),
        day_of_week=lambda d: pd.to_datetime(d.date).dt.dayofweek,
        day_name=lambda d: pd.to_datetime(d.date).dt.day_name(),
    )
)

print(clean_df.shape)
clean_df.columns

In [None]:
open_days_df = clean_df[(clean_df.SPOSTMIN > 0) | (clean_df.SACTMIN > 0)]
actuals_df = open_days_df[(open_days_df.SACTMIN > 0) & (open_days_df.SACTMIN < MAX_MINUTES)]

# Summary and distribution

In [None]:
(
    open_days_df
    .loc[:, ['short_name', 'duration', 'SACTMIN', 'SPOSTMIN']]
    .groupby(['short_name', 'duration'])
    .describe()
)

## Posted wait-time boxplot per ride

In [None]:
fig = px.box(open_days_df, x='SPOSTMIN', y='short_name')
fig.update_xaxes(title_text='Posted wait time (minutes)')
fig.update_yaxes(title_text='')
fig.show()

# Relationships

## Wait times per day of week

### Actual wait times: biased observation of actuals

In [None]:
fig = px.box(actuals_df, x='day_of_week', y='SACTMIN')
fig.update_yaxes(title_text='Actual wait time (minutes)')
fig.update_xaxes(title_text='Day of week',
                 ticktext=DOW_ORDER, tickvals=list(range(7)))
fig.show()

In [None]:
plt.figure(figsize=(7, 6))
g = sns.boxplot(x='day_of_week', y='SACTMIN', data=actuals_df, )
g = g.set(xlabel='Day of week', xticks=range(7), xticklabels=DOW_ORDER,
          ylabel='Actual wait time (log(minutes))', yscale='log')

In [None]:
g = sns.relplot(x='day_of_week', y='SACTMIN', kind='line', data=actuals_df, height=7, aspect=2)
g = g.set(xlabel='Day of week', xticks=range(7), xticklabels=DOW_ORDER,
          ylabel='Actual wait time (minutes)')

### Posted wait times: complete observation of estimates

In [None]:
plt.figure(figsize=(7, 6))
g = sns.boxplot(x='day_of_week', y='SPOSTMIN', data=open_days_df, )
g = g.set(xlabel='Day of week', xticks=range(7), xticklabels=DOW_ORDER,
          ylabel='Posted wait time (log(minutes))', yscale='log')

In [None]:
g = sns.relplot(x='day_of_week', y='SPOSTMIN', kind='line', data=open_days_df, height=7, aspect=2)
g = g.set(xlabel='Day of week', xticks=range(7), xticklabels=DOW_ORDER,
          ylabel='Posted wait time (minutes)')

In [None]:
seasons_map = (
    open_days_df
    .groupby('SEASON')
    ['date']
    .quantile(.5)
    .reset_index()
)

# Seasons to exclude, for viz. purposes
# exclude_seasons = ['CHRISTMAS PEAK', 'MARDI GRAS', 'JULY 4TH', 'JERSEY WEEK']
# seasons_map = seasons_map[~seasons_map.SEASON.isin(exclude_seasons)]
seasons_map.sort_values('date')

In [None]:
open_days_df.date

In [None]:
g = sns.relplot(x='date', y='SPOSTMIN', kind='line', data=open_days_df, height=7, aspect=2)
g = g.set(xlabel='Date', ylabel='Posted wait time (minutes)', xticks=seasons_map.date, xticklabels=seasons_map.SEASON)
g = g.set_xticklabels(rotation=45, horizontalalignment='right')

In [None]:
g = sns.relplot(x='date', y='SPOSTMIN', kind='line', data=open_days_df, height=7, aspect=2, hue='short_name')
g = g.set(xlabel='Date', ylabel='Posted wait time (minutes)', xticks=seasons_map.date, xticklabels=seasons_map.SEASON)
g = g.set_xticklabels(rotation=45, horizontalalignment='right')

## Wait times by weather

In [None]:
g = sns.lmplot(x='WDWMINTEMP', y='SACTMIN', data=actuals_df,
               height=7)