In [2]:
import polars as pl
from dotenv import dotenv_values
import geopandas as gpd
config = dotenv_values(".env")

In [45]:
cfg = pl.Config()
cfg.set_tbl_rows(50)
cfg.set_fmt_str_lengths(100)

polars.config.Config

In [None]:
gdf = gpd.read_file(r"data/stops.geojson")
gdf = gdf.set_crs(4326)
bounds = gpd.read_file(r"data/congestion_zone.geojson")
bounds = bounds.set_crs(4326)

In [5]:
gdf = gdf.sjoin(bounds, how="inner").drop(columns=['index_right'])
stop_ids = gdf['id'].tolist()

In [72]:
stop_ids_sql = f"ARRAY[{','.join(f"{id}" for id in stop_ids)}]"

query = f"""
SELECT
	t.id,
	t.vehicle_id,
	t.route_id,
	t.direction,
	t.created_at,
	t.updated_at,
	st.arrival,
	st.stop_id
FROM
	trip t
LEFT JOIN stop_time st ON
	t.id = st.trip_id
WHERE
	t.express IS NULL
	AND st.arrival::time BETWEEN '12:00' AND '16:00'
	AND st.stop_id = ANY({stop_ids_sql})
"""
# 7am to 10am (converted to utc)

df: pl.DataFrame = pl.read_database_uri(query, config['DATABASE_URI'])

In [74]:
df = df.sort(["id", "arrival"])

In [23]:
# df = df.with_columns(
#     travel_time=pl.col("arrival").diff().over("id"),
# ).filter(pl.col("travel_time").is_not_null())

In [75]:
dfg = df.group_by("id", "route_id", "direction", "created_at").agg(pl.col("arrival").last() - pl.col("arrival").first()).rename({"arrival": "travel_time"})
dfg = dfg.with_columns(
    travel_time=pl.col("travel_time").dt.total_minutes(),
)

In [82]:
dfg['route_id'].value_counts(sort=True).head(10)

route_id,count
str,u32
"""M15+""",4253
"""M14D+""",3558
"""M42""",2564
"""M15""",2102
"""M101""",2090
"""M23+""",1927
"""M1""",1866
"""M4""",1853
"""M14A+""",1835
"""M31""",1751


In [87]:
chart = (
    dfg.filter(pl.col("route_id") == "M1")
    .plot.line(x="created_at", y="travel_time")
    .properties(title="Travel time", width=800)
)
chart + chart.transform_regression("created_at", "travel_time").mark_line()

In [19]:
df_filtered = df.filter(
    (pl.col("route_id") == "M15") & (pl.col("stop_id") == 401782)
).with_columns(
    travel_time=pl.col("travel_time").dt.total_minutes(),
)

In [20]:
chart = (df_filtered.plot.line(x="arrival", y="travel_time").properties(title="Travel time", width=800))
chart

In [64]:
# get the average time between stops

# df = df.groupby(["stop_id", "stop_id_1"]).agg(pl.col("travel_time").mean().alias("avg_travel_time"))
df

id,mta_id,vehicle_id,route_id,direction,created_at,updated_at,arrival,stop_id,travel_time
str,str,str,str,i32,"datetime[μs, UTC]","datetime[μs, UTC]","datetime[μs, UTC]",i32,duration[μs]
"""0193e298-6c0c-70c2-b7f3-5b7577…","""OH_D4-Weekday-SDon-012000_M15_…","""5978""","""M15""",1,2024-12-20 10:44:20.236035 UTC,2024-12-20 07:52:10.319158 UTC,2024-12-20 07:33:37 UTC,401779,
"""0193e298-6c0c-70c2-b7f3-5b7577…","""OH_D4-Weekday-SDon-012000_M15_…","""5978""","""M15""",1,2024-12-20 10:44:20.236035 UTC,2024-12-20 07:52:10.319158 UTC,2024-12-20 07:34:16 UTC,401780,39s
"""0193e298-6c0c-70c2-b7f3-5b7577…","""OH_D4-Weekday-SDon-012000_M15_…","""5978""","""M15""",1,2024-12-20 10:44:20.236035 UTC,2024-12-20 07:52:10.319158 UTC,2024-12-20 07:34:43 UTC,401781,27s
"""0193e298-6c0c-70c2-b7f3-5b7577…","""OH_D4-Weekday-SDon-012000_M15_…","""5978""","""M15""",1,2024-12-20 10:44:20.236035 UTC,2024-12-20 07:52:10.319158 UTC,2024-12-20 07:34:55 UTC,401782,12s
"""0193e298-6c0c-70c2-b7f3-5b7577…","""OH_D4-Weekday-SDon-012000_M15_…","""5978""","""M15""",1,2024-12-20 10:44:20.236035 UTC,2024-12-20 07:52:10.319158 UTC,2024-12-20 07:35:52 UTC,404105,57s
"""0193e298-6c0c-70c2-b7f3-5b7577…","""OH_D4-Weekday-SDon-012000_M15_…","""5978""","""M15""",1,2024-12-20 10:44:20.236035 UTC,2024-12-20 07:52:10.319158 UTC,2024-12-20 07:36:12 UTC,401784,20s
"""0193e298-6c0c-70c2-b7f3-5b7577…","""OH_D4-Weekday-SDon-012000_M15_…","""5978""","""M15""",1,2024-12-20 10:44:20.236035 UTC,2024-12-20 07:52:10.319158 UTC,2024-12-20 07:36:20 UTC,401785,8s
"""0193e298-6c0c-70c2-b7f3-5b7577…","""OH_D4-Weekday-SDon-012000_M15_…","""5978""","""M15""",1,2024-12-20 10:44:20.236035 UTC,2024-12-20 07:52:10.319158 UTC,2024-12-20 07:37:18 UTC,401786,58s
"""0193e298-6c0c-70c2-b7f3-5b7577…","""OH_D4-Weekday-SDon-012000_M15_…","""5978""","""M15""",1,2024-12-20 10:44:20.236035 UTC,2024-12-20 07:52:10.319158 UTC,2024-12-20 07:39:22 UTC,401787,2m 4s
"""0193e298-6c0c-70c2-b7f3-5b7577…","""OH_D4-Weekday-SDon-012000_M15_…","""5978""","""M15""",1,2024-12-20 10:44:20.236035 UTC,2024-12-20 07:52:10.319158 UTC,2024-12-20 07:40:33 UTC,403849,1m 11s


In [None]:
chart = df.plot.