In [1]:
import polars as pl
import altair

In [13]:
stations = pl.read_csv("data/bluebike_stations_geolocated.csv", columns=["name", "City"])
cambridge_stations = stations.filter(pl.col("City") == "Cambridge")
display(cambridge_stations)
cambridge_station_ids = {s for s in cambridge_stations.to_dict()["name"]}

name,City
str,str
"""Harvard Univer…","""Cambridge"""
"""Ames St at Mai…","""Cambridge"""
"""Kendall Street…","""Cambridge"""
"""MIT Pacific St…","""Cambridge"""
"""Central Square…","""Cambridge"""
"""One Broadway /…","""Cambridge"""
"""Harvard Square…","""Cambridge"""
"""CambridgeSide …","""Cambridge"""
"""Cambridge St -…","""Cambridge"""
"""MIT Stata Cent…","""Cambridge"""


In [14]:
rides = pl.read_csv("data/20*.csv", columns=["starttime", "start station id", "end station id", "start station name", "end station name"]).with_columns(pl.col("starttime").str.slice(0, 10).str.strptime(pl.Date, fmt="%Y-%m-%d", strict=False)).sort("starttime")
rides 

starttime,start station id,end station id,start station name,end station name
date,i64,i64,str,str
2016-01-01,36,19,"""Boston Public …","""Buswell St. at…"
2016-01-01,36,19,"""Boston Public …","""Buswell St. at…"
2016-01-01,36,19,"""Boston Public …","""Buswell St. at…"
2016-01-01,36,19,"""Boston Public …","""Buswell St. at…"
2016-01-01,36,67,"""Boston Public …","""MIT at Mass Av…"
2016-01-01,110,88,"""Harvard Univer…","""Inman Square a…"
2016-01-01,107,176,"""Ames St at Mai…","""Lesley Univers…"
2016-01-01,141,90,"""Kendall Street…","""Lechmere Stati…"
2016-01-01,178,80,"""MIT Pacific St…","""MIT Stata Cent…"
2016-01-01,68,178,"""Central Square…","""MIT Pacific St…"


In [15]:
cambridge_rides = rides.filter(pl.col("start station name").is_in(cambridge_station_ids) | pl.col("end station name").is_in(cambridge_station_ids))
cambridge_rides

starttime,start station id,end station id,start station name,end station name
date,i64,i64,str,str
2016-01-01,36,67,"""Boston Public …","""MIT at Mass Av…"
2016-01-01,110,88,"""Harvard Univer…","""Inman Square a…"
2016-01-01,107,176,"""Ames St at Mai…","""Lesley Univers…"
2016-01-01,141,90,"""Kendall Street…","""Lechmere Stati…"
2016-01-01,178,80,"""MIT Pacific St…","""MIT Stata Cent…"
2016-01-01,68,178,"""Central Square…","""MIT Pacific St…"
2016-01-01,72,178,"""One Broadway /…","""MIT Pacific St…"
2016-01-01,68,177,"""Central Square…","""University Par…"
2016-01-01,68,177,"""Central Square…","""University Par…"
2016-01-01,68,88,"""Central Square…","""Inman Square a…"


In [20]:
def by_month(df):
    return df.groupby_dynamic("starttime", every="1mo").agg(pl.count())

def by_year(df):
    return df.groupby_dynamic("starttime", every="1y").agg(pl.count())

monthly = by_month(cambridge_rides)
display(monthly.select([pl.col("count").sum()]))
yearly = by_year(cambridge_rides)
display(yearly.select([pl.col("count").sum()]))
display(monthly.filter(pl.col("starttime").dt.year() >= 2022))

count
u32
7482990


count
u32
7482990


starttime,count
date,u32
2022-01-01,43555
2022-02-01,63646
2022-03-01,93223
2022-04-01,137045
2022-05-01,163078
2022-06-01,169680
2022-07-01,191386
2022-08-01,207982
2022-09-01,254331
2022-10-01,201566


In [17]:
def linechart(df, title, y_column="count"):
    return altair.Chart(df.to_pandas()).mark_line().encode(x="starttime:T", y=f"{y_column}:Q").properties(title=title)

display(linechart(monthly, 'Monthly Bluebikes rides starting/ending in Cambridge'))
display(linechart(yearly.filter(pl.col("starttime").dt.year() < 2023), 'Yearly Bluebikes rides starting/ending in Cambridge (2016-2022)'))

In [23]:
monthly2 = monthly.select([pl.col("starttime").dt.month().alias("Month"), pl.col("starttime").dt.year().alias("Year"), pl.col("count").alias("Count")])
monthly2

Month,Year,Count
u32,i32,u32
1,2016,12031
2,2016,14631
3,2016,26227
4,2016,44246
5,2016,57531
6,2016,70931
7,2016,73552
8,2016,74497
9,2016,76185
10,2016,65004


In [28]:
month_per_year = monthly2.pivot(values="Count", index="Month", columns="Year")
month_per_year

  month_per_year = monthly2.pivot(values="Count", index="Month", columns="Year")


Month,2016,2017,2018,2019,2020,2021,2022,2023
u32,u32,u32,u32,u32,u32,u32,u32,u32
1,12031,17181,23533,37108,65306,34562,43555,72857.0
2,14631,17221,35994,44724,71408,30185,63646,85216.0
3,26227,22636,35682,55133,53050,64627,93223,105441.0
4,44246,50419,52708,83450,16506,81994,137045,
5,57531,60602,86049,105137,41073,116425,163078,
6,70931,73351,95606,122512,66621,123875,169680,
7,73552,79994,115805,142630,92341,126275,191386,
8,74497,85348,112517,146264,103762,149732,207982,
9,76185,79270,115410,165361,117601,204170,254331,
10,65004,81685,97423,145857,100752,192592,201566,


In [43]:
alt = altair
altair.Chart(month_per_year.to_pandas()).mark_line().encode(
    x=alt.X("Month"),
    y=alt.Y(altair.repeat('layer'), type="quantitative").title("Monthly rides"),
    color=altair.ColorDatum(altair.repeat('layer'), type="ordinal")
).repeat(layer=[str(y) for y in range(2019, 2024)]).properties(title="Monthly BlueBikes rides to/from Cambridge")
