In [None]:
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns
sns.set_theme()

In [None]:
df = pd.read_csv("../../../csv/v2/040_gardiner_count_processed.csv.zip")
df

In [None]:
df["datetime_bin"] = pd.to_datetime(df["datetime_bin"],utc=True)
df["year"] = df["datetime_bin"].dt.year
df["year_month"] = df["datetime_bin"].dt.to_period("M")
df["year_month_day"] = df["datetime_bin"].dt.to_period("D")
df["month"] = df["datetime_bin"].dt.month
df["month_name"] = df["datetime_bin"].dt.month_name()
df["day"] = df["datetime_bin"].dt.day
df["dow"] = df["datetime_bin"].dt.day_of_week
df["dow_name"] = df["datetime_bin"].dt.day_name()
df

In [None]:
df = df.sort_values(by="datetime_bin")
df

# Dates

## Date span

In [None]:
print("First available date:", df.iloc[0]["datetime_bin"])
print("Last available date:", df.iloc[len(df)-1]["datetime_bin"])

## Number of days with reported data

In [None]:
print(len(df["year_month_day"].drop_duplicates()))

### Since there is only speed data from 2022 (July onwards), it is fair to filter the count dataframe
- There is no gardiner count data for July

In [None]:
df_2022 = df[(df["year"] == 2022)]

In [None]:
print(len(df_2022))
df_2022.head()

In [None]:
print("First date of 2022", df_2022.iloc[0]["datetime_bin"])
print("Last date of 2022", df_2022.iloc[len(df_2022)-1]["datetime_bin"])

There are only 1696 data readings for the timespan of available speed data

# Show number of readings per day in the entire dataset

In [None]:
readings_per_day = df.groupby(["year","month_name","month","day"]).count().reset_index()
readings_per_day = readings_per_day.sort_values(by=["year","month","day"])

In [None]:
readings_per_day.head()

In [None]:
day_order = readings_per_day["day"].sort_values().drop_duplicates()
dayticks = [day for day in day_order if day%2!=0]

fig, axes = plt.subplots(2,3, figsize=(16,9))

aug_data = df[df["month"] == 8]
sep_data = df[df["month"] == 9]
oct_data = df[df["month"] == 10]
nov_data = df[df["month"] == 11]
dec_data = df[df["month"] == 12]
jan_data = df[df["month"] == 1]

ax1 = sns.histplot(data=aug_data, x="day", discrete=True, ax=axes[0,0])
ax1.set(xlabel="", ylabel="Num. Observations", title="Aug. 2022", xticks=dayticks)
ax1.tick_params(axis='x', rotation=30)

ax2 = sns.histplot(data=sep_data, x="day", discrete=True, ax=axes[0,1])
ax2.set(xlabel="", ylabel="", title="Sep. 2022", xticks=dayticks)
ax2.tick_params(axis='x', rotation=30)

ax3 = sns.histplot(data=oct_data, x="day", discrete=True, ax=axes[0,2])
ax3.set(xlabel="", ylabel="", title="Oct. 2022", xticks=dayticks)
ax3.tick_params(axis='x', rotation=30)

ax4 = sns.histplot(data=nov_data, x="day", discrete=True, ax=axes[1,0])
ax4.set(xlabel="Day", ylabel="Num. Observations", title="Nov. 2022", xticks=dayticks)
ax4.tick_params(axis='x', rotation=30)

ax5 = sns.histplot(data=dec_data, x="day", discrete=True, ax=axes[1,1])
ax5.set(xlabel="Day", ylabel="", title="Dec. 2022", xticks=dayticks)
ax5.tick_params(axis='x', rotation=30)

ax6 = sns.histplot(data=jan_data, x="day", discrete=True, ax=axes[1,2])
ax6.set(xlabel="Day", ylabel="", title="Jan. 2023", xticks=dayticks)
ax6.tick_params(axis='x', rotation=30)

plt.savefig("./plots/000_gardiner_volume_num_readings_per_day.eps", format="eps", bbox_inches="tight")
plt.savefig("./plots/000_gardiner_volume_num_readings_per_day.png", bbox_inches="tight")
plt.savefig("./plots/000_gardiner_volume_num_readings_per_day.pdf", bbox_inches="tight")

In [None]:
month_order = readings_per_day[["year","month_name","month"]].sort_values(by=["year","month"]).drop_duplicates()["month_name"]

ax = sns.catplot(data=readings_per_day,x="day",y="detector_id",col="month_name",col_wrap=4,
                 col_order=month_order,margin_titles=True,kind="bar",sharey=False)
ax.set_titles(col_template="{col_name}",row_template="{row_name}")
ax.set(xlabel="",ylabel="Count",xticks=[day for day in day_order if day%2!=0])
ax.set_xticklabels([day for day in day_order if day%2!=0],rotation=45)

plt.savefig("./plots/000_number_of_readings_per_day.eps",format="eps",bbox_inches="tight")
plt.savefig("./plots/000_number_of_readings_per_day.png",bbox_inches="tight")
plt.savefig("./plots/000_number_of_readings_per_day.pdf",bbox_inches="tight")

# Show number of readings per month in the entire dataset

In [None]:
readings_per_month = df.groupby(["year_month"]).count().reset_index()[["year_month","detector_id"]]

In [None]:
readings_per_month

In [None]:
ax = sns.catplot(data=readings_per_month,x="year_month",y="detector_id",kind="bar")
ax.set_xticklabels(rotation=30)
ax.set(xlabel="",ylabel="Count",yscale="log")
ax.set_titles(col_template="{col_name}")

plt.savefig("./plots/000_number_of_readings_per_month.eps",format="eps",bbox_inches="tight")
plt.savefig("./plots/000_number_of_readings_per_month.png",bbox_inches="tight")

# Show mean number of readings per day of week in the entire dataset
- There are no count data for Monday, Sunday

In [None]:
readings_per_dow = df.groupby(["year","month_name","month","dow_name","dow"]).mean().reset_index()
readings_per_dow.head()

In [None]:
readings_per_dow = readings_per_dow.sort_values(by=["year","month","dow"])
readings_per_dow.head()

In [None]:
dow_order = readings_per_dow[["dow_name","dow"]].sort_values(by="dow").drop_duplicates()["dow_name"]

ax = sns.catplot(data=readings_per_dow,x="month_name",y="volume_15min",col="dow_name",col_wrap=4,sharey=False,kind="bar",margin_titles=True,row_order=dow_order)
ax.set_titles(col_template="{col_name}",row_template="{row_name}")
ax.set(xlabel="",ylabel="Count")
ax.set_xticklabels(month_order,rotation=30)

plt.savefig("./plots/000_mean_number_of_readings_per_dow.eps",format="eps",bbox_inches="tight")
plt.savefig("./plots/000_mean_number_of_readings_per_dow.png",bbox_inches="tight")

# Show median number of readings per day of week in the entire dataset
- There are no count data for Monday, Sunday

In [None]:
readings_per_dow = df.groupby(["year","month_name","month","dow_name","dow"]).median().reset_index()
readings_per_dow.head()

In [None]:
readings_per_dow = readings_per_dow.sort_values(by=["year","month","dow"])
readings_per_dow.head()

In [None]:
dow_order = readings_per_dow[["dow_name","dow"]].sort_values(by="dow").drop_duplicates()["dow_name"]

ax = sns.catplot(data=readings_per_dow,x="month_name",y="volume_15min",col="dow_name",col_wrap=4,sharey=False,kind="bar",margin_titles=True,row_order=dow_order)
ax.set_titles(col_template="{col_name}",row_template="{row_name}")
ax.set(xlabel="",ylabel="Count")
ax.set_xticklabels(month_order,rotation=45)

plt.savefig("./plots/000_median_number_of_readings_per_dow.eps",format="eps",bbox_inches="tight")
plt.savefig("./plots/000_median_number_of_readings_per_dow.png",bbox_inches="tight")

# Show total number of readings per location
- There are 23 unique detector locations

In [None]:
print("Total number of locations:",len(df["detector_loc"].drop_duplicates()))

- In the entire dataset, these are the how many vehicles were detected per location

In [None]:
vol_per_detector = df.drop(["datetime_bin", "year_month", "year_month_day"], axis=1).groupby("detector_loc").sum().reset_index()[["detector_loc","volume_15min"]]
vol_per_detector.head()

In [None]:
vol_per_detector = vol_per_detector.sort_values(by="detector_loc")

ax = sns.catplot(kind="bar",data=vol_per_detector,x="detector_loc",y="volume_15min",hue="detector_loc",aspect=2,dodge=False)
ax.set(xlabel="Detector Location",ylabel="Vehicle Volume (log)",xticklabels="",yscale="log")
plt.legend(bbox_to_anchor=(1.02, 1), loc='upper left', borderaxespad=0)

plt.savefig("./plots/000_total_readings_detector.eps",format="eps", bbox_inches="tight")
plt.savefig("./plots/000_total_readings_detector.png", bbox_inches="tight")
plt.savefig("./plots/000_total_readings_detector.pdf", bbox_inches="tight")

# Vehicle count per detector, per day of week

In [None]:
vol_per_detector_and_dow = df.drop(["datetime_bin", "year_month", "year_month_day"], axis=1).groupby(["detector_loc","dow","dow_name"]).sum().reset_index()
vol_per_detector_and_dow.head()

In [None]:
ax = sns.catplot(kind="bar",data=vol_per_detector_and_dow,x="detector_loc",y="volume_15min",
                 col="dow_name",col_wrap=4,hue="detector_loc",dodge=False,sharey=False)
ax.set_titles(col_template="{col_name}")
ax.set(xlabel="Detector Location",ylabel="Vehicle Count (log)",xticklabels="",yscale="log")
plt.legend(bbox_to_anchor=(1.02, 1), loc='upper left', borderaxespad=0)

plt.savefig("./plots/000_readings_per_detector_per_dow.eps",format="eps", bbox_inches="tight")
plt.savefig("./plots/000_readings_per_detector_per_dow.png", bbox_inches="tight")
plt.savefig("./plots/000_readings_per_detector_per_dow.pdf", bbox_inches="tight")

# Random detector data per day of week and time

- Mean volume per day of week and time

In [None]:
random_detector = "E of DUNN AV"

In [None]:
df["time_of_day"] = df["datetime_bin"].dt.time.astype(str)
df["hour"] = df["datetime_bin"].dt.hour
df.head()

In [None]:
random_detector_data = df[df["detector_loc"] == random_detector]
random_detector_data.head()

In [None]:
random_detector_data_mean = random_detector_data.drop(
    [
        "detector_id",
        "primary_road",
        "direction",
        "datetime_bin",
        "year_month",
        "year_month_day",
        "month_name",
        "number_of_lanes",
        "latitude",
        "longitude",
        "year",
        "month",
        "day"
    ], axis=1
).groupby(["dow","dow_name","time_of_day","hour","detector_loc"]).mean().reset_index()
random_detector_data_mean.head()

In [None]:
x_labels = random_detector_data_mean["hour"].drop_duplicates().astype(int)

ax = sns.lineplot(data=random_detector_data_mean,x="time_of_day",y="volume_15min",hue="dow_name")
ax.legend().set_title("Weekday")
ax.set(xlabel="Time of Day",ylabel="Volume (mean)",title=random_detector)

for idx, label in enumerate(ax.get_xticklabels()):
    if idx % 10 == 0:  # every 10th label is kept
        label.set_visible(True)
    else:
        label.set_visible(False)
        
plt.xticks(rotation=30)

plt.savefig("./plots/000_volume_mean_per_dow.eps",format="eps",bbox_inches="tight")
plt.savefig("./plots/000_volume_mean_per_dow.png",bbox_inches="tight")
plt.savefig("./plots/000_volume_mean_per_dow.pdf",bbox_inches="tight")

- Median volume per day of week and time

In [None]:
random_detector_data_median = random_detector_data.drop(
    [
        "detector_id",
        "primary_road",
        "direction",
        "datetime_bin",
        "year_month",
        "year_month_day",
        "month_name",
        "number_of_lanes",
        "latitude",
        "longitude",
        "year",
        "month",
        "day"
    ], axis=1
).groupby(["dow","dow_name","time_of_day","hour","detector_loc"]).median().reset_index()
random_detector_data_median.head()

In [None]:
x_labels = random_detector_data_median["hour"].drop_duplicates().astype(int)

ax = sns.lineplot(data=random_detector_data_median,x="time_of_day",y="volume_15min",hue="dow_name")
ax.legend().set_title("Weekday")
ax.set(xlabel="Time of Day",ylabel="Volume (median)",title=random_detector)

for idx, label in enumerate(ax.get_xticklabels()):
    if idx % 10 == 0:  # every 10th label is kept
        label.set_visible(True)
    else:
        label.set_visible(False)
        
plt.xticks(rotation=30)

plt.savefig("./plots/000_volume_median_per_dow.eps",format="eps",bbox_inches="tight")
plt.savefig("./plots/000_volume_median_per_dow.png",bbox_inches="tight")
plt.savefig("./plots/000_volume_median_per_dow.pdf",bbox_inches="tight")

# All detectors volume mean per dow and time

In [None]:
detector_data = df.groupby(["dow","dow_name","detector_loc","time_of_day"]).mean().reset_index()
detector_data.sample(5)

In [None]:
g = sns.FacetGrid(detector_data,col="detector_loc",col_wrap=6,hue="dow_name",sharey=False)
g.set_titles(col_template="{col_name}")
g.map(sns.lineplot,"time_of_day","volume_15min")
g.add_legend(title="Day of Week")
g.set(xlabel="",ylabel="Volume (mean)")

for ax in g.axes.flat:
    for idx, label in enumerate(ax.get_xticklabels()):
        if idx % 10 == 0:  # every 10th label is kept
            label.set_visible(True)
            label.set_rotation(45)
        else:
            label.set_visible(False)
            
plt.savefig("./plots/000_volume_mean_per_dow_all_locs.eps",format="eps",bbox_inches="tight")
plt.savefig("./plots/000_volume_mean_per_dow_all_locs.png",bbox_inches="tight")

# All detectors volume median per dow and time

In [None]:
detector_data = df.groupby(["dow","dow_name","detector_loc","time_of_day"]).median().reset_index()
detector_data.sample(5)

In [None]:
g = sns.FacetGrid(detector_data,col="detector_loc",col_wrap=6,hue="dow_name",sharey=False)
g.set_titles(col_template="{col_name}")
g.map(sns.lineplot,"time_of_day","volume_15min")
g.add_legend(title="Day of Week")
g.set(xlabel="",ylabel="Volume (median)")

for ax in g.axes.flat:
    for idx, label in enumerate(ax.get_xticklabels()):
        if idx % 10 == 0:  # every 10th label is kept
            label.set_visible(True)
            label.set_rotation(45)
        else:
            label.set_visible(False)
            
plt.savefig("./plots/000_volume_median_per_dow_all_locs.eps",format="eps",bbox_inches="tight")
plt.savefig("./plots/000_volume_median_per_dow_all_locs.png",bbox_inches="tight")