In [1]:
import polars as pl
import altair as alt
from datetime import datetime

In [24]:
# Load data, turn Date into actual date, sort by date
broadway = pl.read_csv("data/broadway.csv.gz", dtypes={"Day": pl.Categorical}, try_parse_dates=True).with_columns(pl.col("Date").str.strptime(pl.Date, fmt="%m/%d/%Y")).sort("Date")
# Filter out partial months
broadway = broadway.filter((pl.col("Date") >= datetime(2015, 7, 1)) & (pl.col("Date") < datetime(2023, 3, 1)))
broadway

DateTime,Day,Date,Time,Total,Westbound,Eastbound
str,cat,date,time,i64,i64,i64
"""07/01/2015 12:…","""Wednesday""",2015-07-01,00:00:00,6,3,3
"""07/01/2015 12:…","""Wednesday""",2015-07-01,00:15:00,2,1,1
"""07/01/2015 12:…","""Wednesday""",2015-07-01,00:30:00,2,2,0
"""07/01/2015 12:…","""Wednesday""",2015-07-01,00:45:00,1,1,0
"""07/01/2015 01:…","""Wednesday""",2015-07-01,01:00:00,1,1,0
"""07/01/2015 01:…","""Wednesday""",2015-07-01,01:15:00,2,1,1
"""07/01/2015 01:…","""Wednesday""",2015-07-01,01:30:00,1,1,0
"""07/01/2015 01:…","""Wednesday""",2015-07-01,01:45:00,1,0,1
"""07/01/2015 02:…","""Wednesday""",2015-07-01,02:00:00,0,0,0
"""07/01/2015 02:…","""Wednesday""",2015-07-01,02:15:00,1,1,0


In [25]:
broadway.filter((pl.col("Date").dt.month() == 6) & (pl.col("Date").dt.year() == 2016)).select([pl.col("Total").sum(), pl.col("Westbound").sum(), pl.col("Eastbound").sum()])

Total,Westbound,Eastbound
i64,i64,i64
45803,19895,25908


In [11]:
def by_month(df):
    return df.groupby_dynamic("Date", every="1mo").agg(pl.col("Total").sum())

monthly = by_month(broadway)
monthly

Date,Total
date,i64
2015-07-01,48851
2015-08-01,47162
2015-09-01,50185
2015-10-01,43826
2015-11-01,35944
2015-12-01,25203
2016-01-01,17526
2016-02-01,18331
2016-03-01,27342
2016-04-01,32399


In [17]:
def linechart(df, title):
    return alt.Chart(df.to_pandas()).mark_line().encode(x="Date:T", y="Total:Q").properties(title=title)
    
linechart(monthly, 'Monthly Broadway bike volume, all days of week')

In [18]:
linechart(by_month(broadway.filter(pl.col("Date").dt.weekday().is_in([1, 2, 3, 4, 5]))), "Weekdays only")

In [19]:
linechart(by_month(broadway.filter(pl.col("Date").dt.weekday().is_in([6, 7]))), "Weekends only")

## Conclusion

My hypothesis was that weekends on Broadway might be a good way to get trends that aren't impacted by the pandemic and the resulting shift to working from home.

Unfortunately, there seems to be a lot of location-specific noise. E.g. massive summer weekend spike in 2019 (construction?). 2015 when it was first installed is higher (bad calibration?). Probably new Bluebikes locations, construction, and shift in availability of PBLs all impact results to the point where I don't think you can get meaningful sense from one location.