In [2]:
import polars as pl
from dotenv import dotenv_values
import geopandas as gpd
import requests
from io import StringIO

config = dotenv_values(".env")

In [3]:
cfg = pl.Config()
cfg.set_tbl_rows(50)
cfg.set_fmt_str_lengths(100)

polars.config.Config

In [4]:
# r = requests.get("https://trainstat.us/api/v1/stops?route_type=bus").text
# pl.read_json(StringIO(r))

In [7]:
df = pl.read_csv(
    "data/MTA_Bus_Route_Segment_Speeds__Beginning_2023_20250127.csv",
)

In [None]:
df = df.with_columns(
    Timestamp=pl.col("Timestamp").str.to_datetime("%m/%d/%Y %I:%M:%S %p")
)

In [12]:
df.sort("Timestamp", descending=True)

Year,Month,Timestamp,Day of Week,Hour of Day,Route ID,Direction,Borough,Route Type,Stop Order,Timepoint Stop ID,Timepoint Stop Name,Timepoint Stop Latitude,Timepoint Stop Longitude,Next Timepoint Stop ID,Next Timepoint Stop Name,Next Timepoint Stop Latitude,Next Timepoint Stop Longitude,Road Distance,Average Travel Time,Average Road Speed,Bus Trip Count,Timepoint Stop Georeference,Next Timepoint Stop Georeference
i64,i64,datetime[μs],str,i64,str,str,str,str,i64,i64,str,f64,f64,i64,str,f64,f64,f64,f64,f64,i64,str,str
2024,12,2024-12-01 23:00:00,"""Friday""",23,"""M50""","""E""","""Manhattan""","""Local""",12,402171,"""E 50 ST/5 AV""",40.758431,-73.977104,402175,"""E 50 ST/2 AV""",40.754881,-73.9688,0.5,6.84168,4.384888,2,"""POINT (-73.977104 40.758431)""","""POINT (-73.9688 40.754881)"""
2024,12,2024-12-01 23:00:00,"""Friday""",23,"""Q15""","""N""","""Queens""","""Local""",7,501153,"""150 ST/NORTHERN BL""",40.765633,-73.815117,501166,"""150 ST/15 DR""",40.785497,-73.813593,1.38,8.038086,10.300956,7,"""POINT (-73.815117 40.765633)""","""POINT (-73.813593 40.785497)"""
2024,12,2024-12-01 23:00:00,"""Friday""",23,"""BX42""","""E""","""Bronx""","""Local""",31,102613,"""RANDALL AV/E TREMONT AV""",40.826154,-73.821999,104299,"""HARDING AV/HOSMER AV""",40.813235,-73.826535,2.021,9.981672,12.148265,10,"""POINT (-73.821999 40.826154)""","""POINT (-73.826535 40.813235)"""
2024,12,2024-12-01 23:00:00,"""Friday""",23,"""M4""","""N""","""Manhattan""","""Local""",51,903236,"""BROADWAY / W 135 ST""",40.820642,-73.954489,400613,"""BROADWAY/W 145 ST""",40.826791,-73.94997,0.501,4.201854,7.153986,9,"""POINT (-73.954489 40.820642)""","""POINT (-73.94997 40.826791)"""
2024,12,2024-12-01 23:00:00,"""Friday""",23,"""S61""","""W""","""Staten Island""","""Local""",41,203689,"""FOREST HILL RD/ROCKLAND AV""",40.589154,-74.145849,203197,"""PLATINUM AV/SEARS""",40.578047,-74.166551,2.345,8.361108,16.827909,6,"""POINT (-74.145849 40.589154)""","""POINT (-74.166551 40.578047)"""
2024,12,2024-12-01 23:00:00,"""Friday""",23,"""Q7""","""E""","""Queens""","""Local""",1,350000,"""PITKIN AV /EUCLID AV""",40.675497,-73.871784,553297,"""LIBERTY AV/95 ST""",40.680362,-73.84404,1.997,11.294442,10.60876,3,"""POINT (-73.871784 40.675497)""","""POINT (-73.84404 40.680362)"""
2024,12,2024-12-01 23:00:00,"""Friday""",23,"""B6""","""W""","""Brooklyn""","""Local""",1,306921,"""LIVONIA AV/ASHFORD ST""",40.666382,-73.883617,300590,"""COZINE AV /ASHFORD ST""",40.658352,-73.877775,0.724,5.087496,8.538582,20,"""POINT (-73.883617 40.666382)""","""POINT (-73.877775 40.658352)"""
2024,12,2024-12-01 23:00:00,"""Friday""",23,"""BXM1""","""N""","""Bronx""","""Express""",13,404250,"""BROADWAY/W 207 ST""",40.867705,-73.920997,984006,"""W 230 ST / BROADWAY""",40.877199,-73.906891,1.027,6.027084,10.223852,8,"""POINT (-73.920997 40.867705)""","""POINT (-73.906891 40.877199)"""
2024,12,2024-12-01 23:00:00,"""Friday""",23,"""Q25""","""N""","""Queens""","""Local""",20,550999,"""KISSENA BL/JEWEL AV""",40.732447,-73.814763,551051,"""MAIN ST/ROOSEVELT AV""",40.759377,-73.829892,2.206,14.541666,9.10212,8,"""POINT (-73.814763 40.732447)""","""POINT (-73.829892 40.759377)"""
2024,12,2024-12-01 23:00:00,"""Friday""",23,"""B3""","""E""","""Brooklyn""","""Local""",25,300208,"""AV U/NOSTRAND AV""",40.600372,-73.941954,300220,"""AV U/FLATBUSH AV""",40.610106,-73.921794,1.29,8.87292,8.723171,8,"""POINT (-73.941954 40.600372)""","""POINT (-73.921794 40.610106)"""


In [20]:
gdf = gpd.read_file(r"data/stops.geojson")
gdf = gdf.set_crs(4326)
bounds = gpd.read_file(r"data/congestion_zone.geojson")
bounds = bounds.set_crs(4326)

In [21]:
gdf = gdf.sjoin(bounds, how="inner").drop(columns=["index_right"])
stop_ids = gdf["id"].tolist()

In [27]:
stop_ids_sql = f"ARRAY[{','.join(f'{id}' for id in stop_ids)}]"

query = f"""
SELECT
	t.id,
	t.vehicle_id,
	t.route_id,
	t.direction,
	t.created_at,
	t.updated_at,
	st.arrival,
	st.stop_id
FROM
	trip t
LEFT JOIN stop_time st ON
	t.id = st.trip_id
WHERE
	t.express IS NULL
	AND t.created_at::time BETWEEN '11:00' AND '16:00'
    AND t.route_id LIKE 'M%'
"""
# 7am to 10am (converted to utc)

df: pl.DataFrame = pl.read_database_uri(query, config["DATABASE_URI"])

In [29]:
df = df.sort(["id", "arrival"])

In [23]:
# df = df.with_columns(
#     travel_time=pl.col("arrival").diff().over("id"),
# ).filter(pl.col("travel_time").is_not_null())

In [30]:
dfg = (
    df.group_by("id", "route_id", "direction", "created_at")
    .agg(pl.col("arrival").last() - pl.col("arrival").first())
    .rename({"arrival": "travel_time"})
)
dfg = dfg.with_columns(
    travel_time=pl.col("travel_time").dt.total_minutes(),
)

In [31]:
dfg["route_id"].value_counts(sort=True).head(10)

route_id,count
str,u32
"""M101""",1894
"""M60+""",1731
"""M15""",1707
"""M86+""",1287
"""M14D+""",1158
"""M15+""",1028
"""M102""",1016
"""M125""",884
"""M96""",845
"""M14A+""",816


In [35]:
chart = (
    dfg.filter(pl.col("route_id") == "M14A+")
    .plot.line(x="created_at", y="travel_time")
    .properties(title="Travel time", width=800)
)
chart + chart.transform_regression("created_at", "travel_time").mark_line()

In [19]:
df_filtered = df.filter(
    (pl.col("route_id") == "M15") & (pl.col("stop_id") == 401782)
).with_columns(
    travel_time=pl.col("travel_time").dt.total_minutes(),
)

In [20]:
chart = df_filtered.plot.line(x="arrival", y="travel_time").properties(
    title="Travel time", width=800
)
chart

In [64]:
# get the average time between stops

# df = df.groupby(["stop_id", "stop_id_1"]).agg(pl.col("travel_time").mean().alias("avg_travel_time"))
df

id,mta_id,vehicle_id,route_id,direction,created_at,updated_at,arrival,stop_id,travel_time
str,str,str,str,i32,"datetime[μs, UTC]","datetime[μs, UTC]","datetime[μs, UTC]",i32,duration[μs]
"""0193e298-6c0c-70c2-b7f3-5b7577…","""OH_D4-Weekday-SDon-012000_M15_…","""5978""","""M15""",1,2024-12-20 10:44:20.236035 UTC,2024-12-20 07:52:10.319158 UTC,2024-12-20 07:33:37 UTC,401779,
"""0193e298-6c0c-70c2-b7f3-5b7577…","""OH_D4-Weekday-SDon-012000_M15_…","""5978""","""M15""",1,2024-12-20 10:44:20.236035 UTC,2024-12-20 07:52:10.319158 UTC,2024-12-20 07:34:16 UTC,401780,39s
"""0193e298-6c0c-70c2-b7f3-5b7577…","""OH_D4-Weekday-SDon-012000_M15_…","""5978""","""M15""",1,2024-12-20 10:44:20.236035 UTC,2024-12-20 07:52:10.319158 UTC,2024-12-20 07:34:43 UTC,401781,27s
"""0193e298-6c0c-70c2-b7f3-5b7577…","""OH_D4-Weekday-SDon-012000_M15_…","""5978""","""M15""",1,2024-12-20 10:44:20.236035 UTC,2024-12-20 07:52:10.319158 UTC,2024-12-20 07:34:55 UTC,401782,12s
"""0193e298-6c0c-70c2-b7f3-5b7577…","""OH_D4-Weekday-SDon-012000_M15_…","""5978""","""M15""",1,2024-12-20 10:44:20.236035 UTC,2024-12-20 07:52:10.319158 UTC,2024-12-20 07:35:52 UTC,404105,57s
"""0193e298-6c0c-70c2-b7f3-5b7577…","""OH_D4-Weekday-SDon-012000_M15_…","""5978""","""M15""",1,2024-12-20 10:44:20.236035 UTC,2024-12-20 07:52:10.319158 UTC,2024-12-20 07:36:12 UTC,401784,20s
"""0193e298-6c0c-70c2-b7f3-5b7577…","""OH_D4-Weekday-SDon-012000_M15_…","""5978""","""M15""",1,2024-12-20 10:44:20.236035 UTC,2024-12-20 07:52:10.319158 UTC,2024-12-20 07:36:20 UTC,401785,8s
"""0193e298-6c0c-70c2-b7f3-5b7577…","""OH_D4-Weekday-SDon-012000_M15_…","""5978""","""M15""",1,2024-12-20 10:44:20.236035 UTC,2024-12-20 07:52:10.319158 UTC,2024-12-20 07:37:18 UTC,401786,58s
"""0193e298-6c0c-70c2-b7f3-5b7577…","""OH_D4-Weekday-SDon-012000_M15_…","""5978""","""M15""",1,2024-12-20 10:44:20.236035 UTC,2024-12-20 07:52:10.319158 UTC,2024-12-20 07:39:22 UTC,401787,2m 4s
"""0193e298-6c0c-70c2-b7f3-5b7577…","""OH_D4-Weekday-SDon-012000_M15_…","""5978""","""M15""",1,2024-12-20 10:44:20.236035 UTC,2024-12-20 07:52:10.319158 UTC,2024-12-20 07:40:33 UTC,403849,1m 11s


In [None]:
chart = df.plot.