In [None]:
import pandas as pd
from matplotlib import pyplot as plt
import folium
import seaborn as sns
sns.set_theme()

In [None]:
df = pd.read_csv("../../../csv/v2/050_gardiner_flow_processed.csv.zip")

In [None]:
df["collectedAt"] = pd.to_datetime(df["collectedAt"], utc=True)
df["collectedAt"] = df.collectedAt.dt.floor(freq="s")
df["year"] = df["collectedAt"].dt.year
df["month"] = df["collectedAt"].dt.month
df["day"] = df["collectedAt"].dt.day
df["hour"] = df["collectedAt"].dt.hour
df["dow"] = df["collectedAt"].dt.day_of_week
df["dow_name"] = df["collectedAt"].dt.day_name()
df["month_name"] = df["collectedAt"].dt.month_name()
df["year_month"] = df["collectedAt"].dt.to_period("M").astype(str)
df["year_month_day"] = df["collectedAt"].dt.to_period("D").astype(str)

# Speed

- All data provided by TomTom are averages of the values throughout Gardiner Expy - all the way from the DVP to QEW (about 7-8km in extension).

## Current Speed - Monthly analysis
- This plot shows the mean and confidence interval for each day of the month

In [None]:
g = sns.FacetGrid(df,col="month_name",hue="month",col_wrap=4,sharex=False)
g.map(sns.lineplot,"day","currentSpeed")
g.set_axis_labels("Day", "Current Speed (km/h)")
g.set_titles(col_template="{col_name}")
plt.savefig("./plots/010_current_speed_all_months.eps", format="eps", bbox_inches="tight")
plt.savefig("./plots/010_current_speed_all_months.png", bbox_inches="tight")

## Current Speed - Month sample
- Sample a month from the dataframe
- Plot all observations for that month
- In detail, this is what each month looks like

In [None]:
sample_month = df.sample()["year_month"]
sample_month = sample_month.iloc[0]
df_sample = df[df["year_month"] == sample_month]
print("Num observations", len(df_sample))
df_sample.head()

In [None]:
ax = sns.lineplot(data=df_sample,x="collectedAt",y="currentSpeed",estimator=None)
ax.set(xlabel="", ylabel="Current Speed (km/h)")
plt.xticks(rotation=45)
plt.savefig("./plots/010_current_speed_sampled_month.eps", format="eps", bbox_inches="tight")
plt.savefig("./plots/010_current_speed_sampled_month.png", bbox_inches="tight")

## Current Speed - Day sample
- Sample a day fromthe dataframe
- In detail, this is what a day looks like

In [None]:
sample_day = df.sample()["year_month_day"]
sample_day = sample_day.iloc[0]
df_sample = df[df["year_month_day"] == sample_day]
print("Num observations", len(df_sample))
df_sample.head()

In [None]:
data_row = df_sample.iloc[0]
title = f"{data_row.month_name} {data_row.day}, {data_row.year}"
ax = sns.lineplot(data=df_sample,x="collectedAt",y="currentSpeed",estimator=None)
ax.set(xlabel="", ylabel="Current Speed (km/h)")
ax.set_title(title)
plt.xticks(rotation=45)
plt.savefig("./plots/010_current_speed_sampled_day.eps", format="eps", bbox_inches="tight")
plt.savefig("./plots/010_current_speed_sampled_day.png", bbox_inches="tight")

## Current Speed - Month and day of week
- Aggregate entire df based on day of week by mean (1) and median (2)
- Try and show mean and confidence interval

In [None]:
df_by_dow_mean = df.drop(["coordinates", "year_month", "year_month_day"], axis=1).groupby(["month_name","dow_name"]).mean().reset_index()\
    .sort_values(by=["year","month","dow"]).reset_index()\
    .drop(columns=["index"])
df_by_dow_median = df.drop(["coordinates", "year_month", "year_month_day"], axis=1).groupby(["month_name","dow_name"]).median().reset_index()\
    .sort_values(by=["year","month","dow"]).reset_index()\
    .drop(columns=["index"])

In [None]:
df_by_dow_mean.head()

In [None]:
df_by_dow_median.head()

In [None]:
dow_order = list(df_by_dow_mean.sort_values(by="dow")["dow_name"].drop_duplicates())
g = sns.FacetGrid(df,col="dow_name",row="month_name",hue="dow_name",col_order=dow_order,sharex=False,sharey=False,margin_titles=True)
g.map(sns.lineplot,"hour","currentSpeed")
g.set_axis_labels("Hour of day", "Current Speed (km/h)")
g.set_titles(col_template="{col_name}",row_template="{row_name}")
plt.savefig("./plots/010_current_speed_month_and_dow_mean.eps", format="eps", bbox_inches="tight")
plt.savefig("./plots/010_current_speed_month_and_dow_mean.png", bbox_inches="tight")

In [None]:
dow_order = list(df_by_dow_median.sort_values(by="dow")["dow_name"].drop_duplicates())
g = sns.FacetGrid(df,col="dow_name",row="month_name",hue="dow_name",col_order=dow_order,sharex=False,sharey=False,margin_titles=True)
g.map(sns.lineplot,"hour","currentSpeed",estimator="median")
g.set_axis_labels("Hour of day", "Current Speed (km/h)")
g.set_titles(col_template="{col_name}",row_template="{row_name}")
plt.savefig("./plots/010_current_speed_month_and_dow_median.eps", format="eps", bbox_inches="tight")
plt.savefig("./plots/010_current_speed_month_and_dow_median.png", bbox_inches="tight")

In [None]:
df_dow_mean = df.drop(["coordinates", "month_name", "year_month", "year_month_day"], axis=1).groupby(["dow", "dow_name", "hour"]).mean().reset_index()
df_dow_mean["Weekday"] = df_dow_mean["dow_name"]
df_dow_mean

In [None]:
df_dow_median = df.drop(["coordinates", "month_name", "year_month", "year_month_day"], axis=1).groupby(["dow", "dow_name", "hour"]).median().reset_index()
df_dow_median["Weekday"] = df_dow_median["dow_name"]
df_dow_median

In [None]:
ax = sns.lineplot(data=df_dow_mean, x="hour", y="currentSpeed", hue="Weekday")
ax.set(ylabel="Current Speed (km/h)", xlabel="Hour of Day")
plt.savefig("./plots/010_current_speed_weekday_mean.eps", format="eps", bbox_inches="tight")
plt.savefig("./plots/010_current_speed_weekday_mean.png", bbox_inches="tight")
plt.savefig("./plots/010_current_speed_weekday_mean.pdf", bbox_inches="tight")

In [None]:
ax = sns.lineplot(data=df_dow_median, x="hour", y="currentSpeed", hue="Weekday")
ax.set(ylabel="Current Speed (km/h)", xlabel="Hour of Day")
plt.savefig("./plots/010_current_speed_weekday_median.eps", format="eps", bbox_inches="tight")
plt.savefig("./plots/010_current_speed_weekday_median.png", bbox_inches="tight")
plt.savefig("./plots/010_current_speed_weekday_median.pdf", bbox_inches="tight")