In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
sns.set_theme()

In [None]:
df = pd.read_csv("../../../csv/v2/030_yonge_count_processed.csv.zip")
df.drop("end_day_of_week",axis=1,inplace=True)
df

In [None]:
df["time_end"] = pd.to_datetime(df["time_end"],utc=True)
df

# Filter off only locations about Yonge St

In [None]:
df = df[df["location"].str.contains("YONGE ST")]

In [None]:
print(len(df["location"].drop_duplicates()))

# Understanding the time span

In [None]:
df.sort_values(by="time_end",inplace=True)
df

## First and last dates

In [None]:
print("First available date and time:",df.iloc[0]["time_end"])
print("Last available date and time:",df.iloc[len(df)-1]["time_end"])

### Further expand "time_end"

In [None]:
df["year"] = df.time_end.dt.year
df["month_name"] = df.time_end.dt.month_name()
df["month"] = df.time_end.dt.month
df["day"] = df.time_end.dt.day
df["day_of_week_name"] = df.time_end.dt.day_name()
df["day_of_week"] = df.time_end.dt.day_of_week
df["year_month"] = df.time_end.dt.to_period("M")
df.head()

## Show number of readings per day in the entire dataset

In [None]:
readings_per_day = df.groupby(["year","month_name","month","day"]).count().reset_index()
readings_per_day = readings_per_day.sort_values(by=["year","month_name","day"])
readings_per_day.head()

In [None]:
day_order = readings_per_day["day"].sort_values().drop_duplicates()
month_order = readings_per_day[["year","month","month_name"]].sort_values(by=["year","month"]).drop_duplicates()["month_name"]

ax = sns.catplot(data=readings_per_day,x="day",y="_id",col="month_name",col_wrap=4,
                 col_order=month_order,margin_titles=True,kind="bar",sharey=False)
ax.set_titles(col_template="{col_name}",row_template="{row_name}")
ax.set(xlabel="",ylabel="Count",xticks=[day for day in day_order if day%2!=0])
ax.set_xticklabels([day for day in day_order if day%2!=0],rotation=45)

plt.savefig("./plots/000_number_of_readings_per_day.eps",format="eps",bbox_inches="tight")
plt.savefig("./plots/000_number_of_readings_per_day.png",bbox_inches="tight")

## Number of readings per month

In [None]:
df["year_month_str"] = df["year_month"].astype(str)

ax = sns.histplot(data=df, x="year_month_str", color="steelblue")
ax.set(ylabel="Num. Observations", xlabel="Year & Month")

plt.xticks(rotation=30, size=10)

plt.savefig("./plots/000_yonge_num_readings_per_month.eps", format="eps", bbox_inches="tight")
plt.savefig("./plots/000_yonge_num_readings_per_month.png", bbox_inches="tight")
plt.savefig("./plots/000_yonge_num_readings_per_month.pdf", bbox_inches="tight")

# Get number of observations from each detector and plot top 3 and last 3

In [None]:
num_observations_per_detector = df[["location","_id"]].groupby("location").count().reset_index()
lowest_readings = num_observations_per_detector.sort_values(by="_id").head(5)
highest_readings = num_observations_per_detector.sort_values(by="_id", ascending=False).head(5)
readings_df = pd.concat([highest_readings, lowest_readings])
readings_df.sort_values(by="_id", inplace=True, ascending=False)

In [None]:
readings_df

In [None]:
ax = sns.barplot(data=readings_df, x="_id", y="location", orient="h")
ax.set(ylabel="Detector Location")
ax.set(xlabel="Num. Observations")

plt.savefig("./plots/000_num_readings_per_detector.eps", format="eps", bbox_inches="tight")
plt.savefig("./plots/000_num_readings_per_detector.png", bbox_inches="tight")
plt.savefig("./plots/000_num_readings_per_detector.pdf", bbox_inches="tight")

# Show mean number of readings per day of week in the entire dataset

In [None]:
df.head()

In [None]:
readings_per_dow = df.groupby(["year","month_name","month","day_of_week_name","day_of_week"]).mean().reset_index()
readings_per_dow.head()

In [None]:
readings_per_dow = readings_per_dow.sort_values(by=["year","month","day_of_week"])
readings_per_dow.head()

In [None]:
dow_order = readings_per_dow[["day_of_week_name","day_of_week"]].sort_values(by="day_of_week").drop_duplicates()["day_of_week_name"]
month_order = readings_per_dow[["month_name","month"]].sort_values(by="month").drop_duplicates()["month_name"]

ax = sns.catplot(data=readings_per_dow,x="month_name",y="total_count",col="day_of_week_name",col_wrap=4,sharey=False,kind="bar",margin_titles=True,row_order=dow_order)
ax.set_titles(col_template="{col_name}",row_template="{row_name}")
ax.set(xlabel="",ylabel="Count")
ax.set_xticklabels(month_order,rotation=45)

plt.savefig("./plots/000_mean_number_of_readings_per_dow.eps",format="eps",bbox_inches="tight")
plt.savefig("./plots/000_mean_number_of_readings_per_dow.png",bbox_inches="tight")

# Show median number of readings per day of week in the entire dataset

In [None]:
readings_per_dow = df.groupby(["year","month_name","month","day_of_week_name","day_of_week"]).median().reset_index()
readings_per_dow.head()

In [None]:
readings_per_dow = readings_per_dow.sort_values(by=["year","month","day_of_week"])
readings_per_dow.head()

In [None]:
dow_order = readings_per_dow[["day_of_week_name","day_of_week"]].sort_values(by="day_of_week").drop_duplicates()["day_of_week_name"]

ax = sns.catplot(data=readings_per_dow,x="month_name",y="total_count",col="day_of_week_name",col_wrap=4,sharey=False,kind="bar",margin_titles=True,row_order=dow_order)
ax.set_titles(col_template="{col_name}",row_template="{row_name}")
ax.set(xlabel="",ylabel="Count")
ax.set_xticklabels(month_order,rotation=45)

plt.savefig("./plots/000_median_number_of_readings_per_dow.eps",format="eps",bbox_inches="tight")
plt.savefig("./plots/000_median_number_of_readings_per_dow.png",bbox_inches="tight")

# Show total number of readings per location

## Get total number of locations

In [None]:
len(df["location"].drop_duplicates())

In [None]:
df

In [None]:
vol_per_detector = df.groupby("location").sum().reset_index()[["location","total_count"]]
vol_per_detector.head()

In [None]:
vol_per_detector = vol_per_detector.sort_values(by="location")

ax = sns.catplot(kind="bar",data=vol_per_detector,x="location",y="total_count",hue="location",aspect=2,dodge=False)
ax.set(xlabel="Detector Location",ylabel="Vehicle Count (log)",xticklabels="",yscale="log")
plt.legend(bbox_to_anchor=(1.02, 1), loc='upper left', borderaxespad=0)

plt.savefig("./plots/000_total_readings_detector.eps",format="eps", bbox_inches="tight")
plt.savefig("./plots/000_total_readings_detector.png", bbox_inches="tight")

# Vehicle count per detector, per day of week
- Too many locations, still dont know how to plot

In [None]:
vol_per_detector_and_dow = df.groupby(["location","day_of_week","day_of_week_name"]).sum().reset_index()
vol_per_detector_and_dow.head()

In [None]:
ax = sns.catplot(data=vol_per_detector_and_dow,x="location",y="total_count",col="day_of_week_name",hue="location",sharey=False,dodge=False,kind="point")

ax.set_titles(col_template="{col_name}")
ax.set(xlabel="Detector Location",ylabel="Vehicle Count (log)",xticklabels="",yscale="log")
# plt.legend(bbox_to_anchor=(1.02, 1), loc='upper left', borderaxespad=0)

# plt.savefig("./plots/000_readings_per_detector_per_dow.eps",format="eps", bbox_inches="tight")
# plt.savefig("./plots/000_readings_per_detector_per_dow.png", bbox_inches="tight")

# YONGE ST AT CHAPLIN CRES & DAVISVILLE AVE (PX 47) detector data per day of week and time

## Mean volume per day of week and time

In [None]:
detector = df[df["location"] == "YONGE ST AT CHAPLIN CRES & DAVISVILLE AVE (PX 47)"]
detector["time_of_day"] = detector["time_end"].dt.time.astype(str)
detector["hour"] = detector["time_end"].dt.hour
detector

In [None]:
detector_data_mean = detector[["location", "total_count", "day_of_week","day_of_week_name","time_of_day", "hour"]].groupby(["day_of_week","day_of_week_name","time_of_day","location","hour"]).mean().reset_index()
detector_data_mean

In [None]:
x_labels = detector_data_mean["hour"].drop_duplicates().astype(int)

ax = sns.lineplot(data=detector_data_mean,x="time_of_day",y="total_count",hue="day_of_week_name")
ax.legend().set_title("Weekday")
ax.set(xlabel="Time of Day",ylabel="Volume (mean)",title="YONGE ST AT CHAPLIN CRES & DAVISVILLE AVE (PX 47)")

for idx, label in enumerate(ax.get_xticklabels()):
    if idx % 10 == 0:  # every 10th label is kept
        label.set_visible(True)
    else:
        label.set_visible(False)
        
plt.xticks(rotation=30)

plt.savefig("./plots/000_count_mean_per_dow.eps",format="eps",bbox_inches="tight")
plt.savefig("./plots/000_count_mean_per_dow.png",bbox_inches="tight")
plt.savefig("./plots/000_count_mean_per_dow.pdf",bbox_inches="tight")

## Median volume per day of week and time

In [None]:
detector_data_median = detector[["location", "total_count", "day_of_week","day_of_week_name","time_of_day", "hour"]].groupby(["day_of_week","day_of_week_name","time_of_day","location","hour"]).median().reset_index()
detector_data_median

In [None]:
x_labels = detector_data_median["hour"].drop_duplicates().astype(int)

ax = sns.lineplot(data=detector_data_median,x="time_of_day",y="total_count",hue="day_of_week_name")
ax.legend().set_title("Weekday")
ax.set(xlabel="Time of Day",ylabel="Volume (median)",title="YONGE ST AT CHAPLIN CRES & DAVISVILLE AVE (PX 47)")

for idx, label in enumerate(ax.get_xticklabels()):
    if idx % 10 == 0:  # every 10th label is kept
        label.set_visible(True)
    else:
        label.set_visible(False)
        
plt.xticks(rotation=30)

plt.savefig("./plots/000_count_median_per_dow.eps",format="eps",bbox_inches="tight")
plt.savefig("./plots/000_count_median_per_dow.png",bbox_inches="tight")
plt.savefig("./plots/000_count_median_per_dow.pdf",bbox_inches="tight")

# All detectors volume mean per dow and time

In [None]:
detector_data = df.groupby(["day_of_week","day_of_week_name","location","time_of_day"]).mean().reset_index()
detector_data.sample(5)

In [None]:
g = sns.FacetGrid(detector_data,col="location",col_wrap=8,hue="day_of_week_name",sharey=False)
g.set_titles(col_template="{col_name}",size=8)
g.map(sns.lineplot,"time_of_day","total_count")
g.add_legend(title="Day of Week")
g.set(xlabel="",ylabel="Count (mean)")

for ax in g.axes.flat:
    for idx, label in enumerate(ax.get_xticklabels()):
        if idx % 10 == 0:  # every 10th label is kept
            label.set_visible(True)
            label.set_rotation(45)
        else:
            label.set_visible(False)
            
plt.savefig("./plots/000_count_mean_per_dow_all_locs.eps",format="eps",bbox_inches="tight")
plt.savefig("./plots/000_count_mean_per_dow_all_locs.png",bbox_inches="tight")

# All detectors volume median per dow and time

In [None]:
detector_data = df.groupby(["day_of_week","day_of_week_name","location","time_of_day"]).median().reset_index()
detector_data.sample(5)

In [None]:
g = sns.FacetGrid(detector_data,col="location",col_wrap=6,hue="day_of_week_name",sharey=False)
g.set_titles(col_template="{col_name}",size=8)
g.map(sns.lineplot,"time_of_day","total_count")
g.add_legend(title="Day of Week")
g.set(xlabel="",ylabel="Count (median)")

for ax in g.axes.flat:
    for idx, label in enumerate(ax.get_xticklabels()):
        if idx % 10 == 0:  # every 10th label is kept
            label.set_visible(True)
            label.set_rotation(45)
        else:
            label.set_visible(False)
            
plt.savefig("./plots/000_count_median_per_dow_all_locs.eps",format="eps",bbox_inches="tight")
plt.savefig("./plots/000_count_median_per_dow_all_locs.png",bbox_inches="tight")