# Waiting Times of Taron

This notebook analyses how the waiting times of River Quest (a very popular outdoor 
rollercoaster that is open year-round) depends on weather, time of year and holidays. It
was selected to be an example of an attraction that is probably dependent on weather but
not as much as River Quest.

In [None]:
import pandas as pd
import seaborn as sns
import numpy as np
from matplotlib import pyplot as plt
import plotly.express as px
import plotly.figure_factory as ff
import plotly.graph_objects as go


In [None]:
waiting_times_df = pd.read_csv(
    "../../data/processed/waiting_times.csv", index_col="id", parse_dates=["date"]
)
taron_df = waiting_times_df[
    (waiting_times_df.attraction == "Taron")
    & (waiting_times_df.waiting_time >= 0)
]
taron_df = taron_df[["waiting_time", "date"]].groupby("date").agg("mean")
taron_df["bin_waiting_time"] = taron_df.waiting_time.apply(
    lambda x: "<30m" if x < 30 else (">60m" if x > 60 else "30-60m")
)

taron_df.head(5)


In [None]:
sns.histplot(taron_df.waiting_time, kde=True)
plt.title("histogram of the mean daily waiting time of Taron")


## Evaluate correlation between weather data and waiting time

In [None]:
weather_df = pd.read_csv(
    "../../data/processed/weather_station01327_Lommersum.csv",
    index_col="date",
    parse_dates=["date"],
)


In [None]:
short_df = weather_df.copy()
short_df.drop(
    columns=[
        "quality_wind",
        "quality_other",  # only metadata
        "snow_depth",
        "precipitation_form",  # very seldom snow -> probably not useful
        "min_temperature_5cm",
        "max_temperature_2m",
        "min_temperature_2m",  # highly correlated with mean_temperature
        "max_wind_gust",
        "mean_wind_speed",
        "mean_cloud_cover",
        "mean_pressure",  # NaN
    ],
    inplace=True,
)
short_df["bin_waiting_time"] = taron_df.bin_waiting_time
short_df["mean_waiting_time"] = taron_df.waiting_time
short_df.drop(index=pd.to_datetime("2021-07-14"), inplace=True)  # remove rain outlier
short_df = short_df[~short_df.mean_waiting_time.isna()]
len(short_df)


In [None]:
short_df.head(5)

In [None]:
sns.pairplot(
    short_df.drop(columns="mean_waiting_time"),
    hue="bin_waiting_time",
    plot_kws={"alpha": 0.5},
)


In [None]:
fig, axs = plt.subplots(ncols=3, nrows=2, sharey=True, figsize=(18, 9))
sns.scatterplot(
    data=short_df, x="precipitation_height", y="mean_waiting_time", ax=axs[0][0]
)
sns.scatterplot(
    data=short_df, x="sunshine_duration", y="mean_waiting_time", ax=axs[0][1]
)
sns.scatterplot(
    data=short_df, x="mean_temperature", y="mean_waiting_time", ax=axs[0][2]
)
sns.scatterplot(
    data=short_df, x="mean_vapor_pressure", y="mean_waiting_time", ax=axs[1][0]
)
sns.scatterplot(
    data=short_df, x="mean_relative_humidity", y="mean_waiting_time", ax=axs[1][1]
)
fig.suptitle(
    "mean waiting time of Taron for each day it was opened vs. weather parameters"
)


In [None]:
weather_waiting_time_corr_df = (
    weather_df.drop(columns=["quality_other", "precipitation_form"])
    .dropna(axis="columns", how="all")
    .corrwith(taron_df.waiting_time)
    .sort_values(key=np.abs, ascending=False)
)
px.bar(
    weather_waiting_time_corr_df,
    width=600,
    height=400,
    title="Pearson correlation between Lommersum weather <br>parameters and Taron waiting time (daily mean)",
    range_y=(-1, 1),
).show()


## Date-based analysis

In [None]:
school_holidays_df = pd.read_csv(
    "../../data/processed/school_holidays.csv", index_col="date", parse_dates=["date"]
)
public_holidays_df = pd.read_csv(
    "../../data/processed/public_holidays.csv", index_col="date", parse_dates=["date"]
)


In [None]:
school_holidays_df.head(5)


In [None]:
public_holidays_df.head(5)


In [None]:
# aggregate the mean waiting time for each holiday type and federal state in a pivoted table
state_holiday_dict = {}

for state in school_holidays_df.columns:
    state_holiday_dict[state] = {}

    holiday_dates = set()

    for holiday in school_holidays_df[state].unique():

        # all holiday names are strings except for the not-a-holiday state which is NaN
        if not type(holiday) == str:
            continue

        selected_dates = set(
            school_holidays_df[school_holidays_df[state] == holiday].index
        ).intersection(taron_df.index)
        state_holiday_dict[state][holiday] = taron_df.loc[
            selected_dates
        ].waiting_time.mean()

        holiday_dates.update(selected_dates)

    # average all dates that are no holiday
    selected_dates = set(taron_df.index).difference(holiday_dates)
    state_holiday_dict[state]["None"] = taron_df.loc[
        selected_dates
    ].waiting_time.mean()

state_holiday_df = pd.DataFrame(state_holiday_dict)
state_holiday_df


In [None]:
# create an annotated heatmap of the state_holiday_df


@np.vectorize
def num2str(n):
    """format numbers appropriately for heatmap plotting"""
    if np.isnan(n):
        return ""
    else:
        return f"{n:.1f}"


state_holiday_np = state_holiday_df.to_numpy()
state_holiday_text_np = num2str(state_holiday_np)

fig = ff.create_annotated_heatmap(
    state_holiday_np,
    x=list(state_holiday_df.columns),
    y=list(state_holiday_df.index),
    annotation_text=state_holiday_text_np,
    showscale=True,
    font_colors=["black"],
    colorscale="YlOrBr",
)

fig.update_layout(
    title="mean waiting time of Taron during the holidays of different states",
    width=900,
)
fig.show()


In [None]:
# create a DataFrame with daily waiting times and date-based information
school_holidays = set(
    school_holidays_df[
        ~school_holidays_df.NW.isna()
        | ~school_holidays_df.HE.isna()
        | ~school_holidays_df.NI.isna()
        | ~school_holidays_df.RP.isna()
    ].index
)
public_holidays = set(
    public_holidays_df[
        public_holidays_df.NW
        | public_holidays_df.HE
        | public_holidays_df.NI
        | public_holidays_df.RP
    ].index
)

date_based_df = taron_df.copy()
date_based_df["weekday"] = date_based_df.index.weekday
date_based_df["school_holiday"] = date_based_df.index.map(
    lambda x: x in school_holidays
)
date_based_df["public_holidays"] = date_based_df.index.map(
    lambda x: x in public_holidays
)
date_based_df["month"] = date_based_df.index.month

date_based_df.head(5)


In [None]:
def waiting_times_by_day_type(date_based_df):
    """calculate mean and support for each month and category. Categories are
    public_holiday, school_holiday, weekend and None of the above."""

    day_type_list = []

    for month in date_based_df.month.unique():

        day_type_list.append(
            {
                "month": str(month),
                "type": "public_holiday",
                "mean": (
                    _waiting_time := date_based_df[
                        (date_based_df.month == month) & date_based_df.public_holidays
                    ].waiting_time
                ).mean(),
                "support": _waiting_time.count(),
            }
        )
        day_type_list.append(
            {
                "month": str(month),
                "type": "school_holiday",
                "mean": (
                    _waiting_time := date_based_df[
                        (date_based_df.month == month) & date_based_df.school_holiday
                    ].waiting_time
                ).mean(),
                "support": _waiting_time.count(),
            }
        )
        day_type_list.append(
            {
                "month": str(month),
                "type": "weekend",
                "mean": (
                    _waiting_time := date_based_df[
                        (date_based_df.month == month) & (date_based_df.weekday >= 5)
                    ].waiting_time
                ).mean(),
                "support": _waiting_time.count(),
            }
        )
        day_type_list.append(
            {
                "month": str(month),
                "type": "None of the above",
                "mean": (
                    _waiting_time := date_based_df[
                        (date_based_df.month == month)
                        & (date_based_df.weekday < 5)
                        & np.logical_not(date_based_df.public_holidays)
                        & np.logical_not(date_based_df.school_holiday)
                    ].waiting_time
                ).mean(),
                "support": _waiting_time.count(),
            }
        )

    return pd.DataFrame(day_type_list)


In [None]:
day_type_df = waiting_times_by_day_type(date_based_df)
day_type_df["text_support"] = day_type_df.support.apply(lambda x: f"supp.\n{x}")

day_type_df.head(5)


In [None]:
px.bar(
    day_type_df,
    x="month",
    y="mean",
    color="type",
    text="text_support",
    barmode="group",
    category_orders={
        "type": ["public_holiday", "school_holiday", "weekend", "weekday"],
        "month": ["5", "6", "7", "8", "9", "10", "11"],
    },
    labels={"mean": "mean waiting time (min)"},
    height=600,
    title="mean waiting times by type of day of Taron (including support)<br>(days falling into multiple categories are used multiple times)<br>timespan: 2019/07-2021/10",
).show()


In [None]:
taron_df["month"] = taron_df.index.month
monthly_means_df = taron_df.groupby(["month"]).agg("mean")
monthly_means_df.index = monthly_means_df.index.map(str)
normalized_day_type_df = day_type_df.join(monthly_means_df, on="month")
normalized_day_type_df["mean"] = normalized_day_type_df["mean"] - normalized_day_type_df["waiting_time"]
taron_df.drop(columns=["month"], inplace=True)

In [None]:
px.bar(
    normalized_day_type_df,
    x="month",
    y="mean",
    color="type",
    text="text_support",
    barmode="group",
    category_orders={
        "type": ["public_holiday", "school_holiday", "weekend", "weekday"],
        "month": ["5", "6", "7", "8", "9", "10", "11"],
    },
    labels={"mean": "mean waiting time deviation (min)"},
    height=600,
    title="deviation from monthly mean waiting times by type of day for Taron (including support)<br>(days falling into multiple categories are used multiple times)<br>timespan: 2019/07-2021/10",
).show()


In [None]:
fig = go.Figure()

fig.add_shape(
    type="rect",
    x0=pd.to_datetime("2021-07-05"),
    x1=pd.to_datetime("2021-08-17"),
    y0=0,
    y1=110,
    fillcolor="yellow",
    opacity=0.5,
)
fig.add_shape(
    type="rect",
    x0=pd.to_datetime("2020-06-29"),
    x1=pd.to_datetime("2020-08-11"),
    y0=0,
    y1=110,
    fillcolor="yellow",
    opacity=0.5,
)
fig.add_shape(
    type="rect",
    x0=pd.to_datetime("2019-07-15"),
    x1=pd.to_datetime("2019-08-27"),
    y0=0,
    y1=110,
    fillcolor="yellow",
    opacity=0.5,
)

fig.add_shape(
    type="rect",
    x0=pd.to_datetime("2020-10-12"),
    x1=pd.to_datetime("2020-10-24"),
    y0=0,
    y1=110,
    fillcolor="orange",
    opacity=0.5,
)
fig.add_shape(
    type="rect",
    x0=pd.to_datetime("2019-10-14"),
    x1=pd.to_datetime("2019-10-26"),
    y0=0,
    y1=110,
    fillcolor="orange",
    opacity=0.5,
)


fig.add_scattergl(
    x=date_based_df.index, y=date_based_df.waiting_time, line={"color": "blue"}
)

fig.update_layout(
    title="mean daily waiting time of Taron with summer (yellow) and fall (orange) NRW school holidays highlighted",
    xaxis_title="day",
    yaxis_title="mean daily waiting time (min)",
)

fig.show()
