In [1]:
import os
from pathlib import Path

from typing import Optional
import altair as alt
import geopandas as gpd

alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

# Load in the Subway ridership data
#### (generated from the aggregates in `nyc_subway_ridership_data_processing.ipynb`)

In [2]:
gdf = gpd.read_file("../data/mta_subway_total_daily_ridership_by_station_2024-01-01_to_2025-01-01.geojson")

In [3]:
nyc_boroughs_gdf = gpd.read_file(
    "https://raw.githubusercontent.com/codeforgermany/click_that_hood/main/public/data/new-york-city-boroughs.geojson",
    driver="GeoJSON"
)
boroughs_chart = alt.Chart(nyc_boroughs_gdf[nyc_boroughs_gdf.name != "Staten Island"]).mark_geoshape(
    fill='lightgray',
    stroke='black'
)
# boroughs_chart

  return ogr_read(


In [4]:
def save_visualization(
    chart: alt.Chart,
    format: str = "html",
    filename: Optional[str] = None,
) -> None:
    """
    Save the Altair chart to a file.
    """
    if not filename:
        filename = "mta_visualization_2_point_0_inline"
    filename_with_extension = filename + f".{format}"
    filepath = Path(os.path.join("../data/", filename_with_extension))
    if format == "html":
        chart.save(filepath, format=format, inline=True)
    elif format == "png":
        chart.save(filepath, scale_factor=2.0)
    else:
        raise ValueError("Unsupported format. Use 'html' or 'png'.")


In [5]:
annual_ridership_by_station = gdf.groupby(["station_complex_id", "station_complex"], as_index=False).sum("total_daily_ridership")

In [7]:
month_selector = alt.selection_point(
    fields=["month"],
    name="month_selector",
    on="click",
    toggle=True,
    clear=False,
)

station_selector = alt.selection_point(
    fields=["station_complex_id"],
    empty=False,
    name="station_selector",
    on="click",
)

highlight = alt.selection_point(name="highlight", on="pointerover", empty=False)

stroke_width = (
    alt.when(month_selector).then(alt.value(3, empty=False))
    .when(highlight).then(alt.value(2.5))
    .otherwise(alt.value(1))
)
stroke_width_station = (
    alt.when(station_selector).then(alt.value(3, empty=False))
    .when(highlight).then(alt.value(2))
    .otherwise(alt.value(.5))
)

station_map = alt.Chart(gdf).transform_calculate(
    lon="datum.geometry.coordinates[0]",
    lat="datum.geometry.coordinates[1]",
).transform_aggregate(
    total_daily_ridership="sum(total_daily_ridership)",
    groupby=["station_complex_id", "station_complex", "lon", "lat"]
).mark_circle(stroke='black').encode(
    longitude=alt.X("lon:Q"),
    latitude=alt.Y("lat:Q"),
    strokeWidth=stroke_width_station,
    color=alt.Color(
        "total_daily_ridership:Q",
        scale=alt.Scale(
            type='log',
            scheme="viridis",
            domain=[
                annual_ridership_by_station["total_daily_ridership"].min(),
                annual_ridership_by_station["total_daily_ridership"].max()
            ],
            reverse=True
        ),
        title="Total Annual Ridership",
    ),
    size=alt.value(50),
    tooltip=[
        alt.Tooltip("station_complex_id:N", title="Station Complex ID"),
        alt.Tooltip("station_complex:N", title="Station Complex"),
        alt.Tooltip("total_daily_ridership:Q", title="Ridership"),
        alt.Tooltip("borough:N", title="Borough"),
    ],
).add_params(station_selector).project('albersUsa').properties(
    width=500,
    height=800,
    title=alt.Title(text="Map of MTA Subway Stations Colored by Total Annual Ridership"),
)

agg_trend_chart = alt.Chart(
    gdf
).transform_filter(
    month_selector
).transform_aggregate(
    total_daily_ridership="sum(total_daily_ridership)",
    groupby=["transit_timestamp"]
).transform_filter(
    alt.expr.length(alt.expr.data('station_selector_store')) == 0
).mark_line().encode(
    x=alt.X("transit_timestamp:T", title="Day", axis=alt.Axis(labelAngle=0)),
    y=alt.Y("total_daily_ridership:Q", title="Total Daily Ridership"),
).add_params(station_selector, month_selector).properties(
    width=800,
    height=350,
)

trend_chart = alt.Chart(
    gdf
).transform_calculate(
    lon="datum.geometry.coordinates[0]",
    lat="datum.geometry.coordinates[1]",
).transform_filter(month_selector).transform_filter(
    station_selector
).mark_line().encode(
    x=alt.X("transit_timestamp:T", title="Day", axis=alt.Axis(labelAngle=0)),
    y=alt.Y("total_daily_ridership:Q", title="Total Daily Ridership"),
    color=alt.Color("station_complex:N", title="Station Complex"),
).add_params(station_selector, month_selector).properties(
    width=800,
    height=350,
    title=alt.Title(text="Total Daily MTA Subway Ridership in (2024) (aggregated or by station complex)")
)

month_labels = {
    1: "January", 2: "February", 3: "March", 4: "April",
    5: "May", 6: "June", 7: "July", 8: "August",
    9: "September", 10: "October", 11: "November", 12: "December"
}

base = alt.Chart(gdf).transform_calculate(
    month_name="{'1': 'January', '2': 'February', '3': 'March', '4': 'April', "
               "'5': 'May', '6': 'June', '7': 'July', '8': 'August', "
               "'9': 'September', '10': 'October', '11': 'November', '12': 'December'}[datum.month + '']"
).transform_aggregate(
    total_monthly_ridership="sum(total_daily_ridership)",
    groupby=["month", "month_name"]
).mark_bar(
    stroke="black"
).encode(
    x=alt.X("month_name:N", title="Month", sort=list(month_labels.values())),
    y=alt.Y("total_monthly_ridership:Q", title="Total Monthly Ridership"),
    color=alt.condition(
        month_selector,
        alt.value("steelblue"),
        alt.value("lightblue")
    ),
    strokeWidth=stroke_width,
    opacity=alt.condition(month_selector, alt.value(1), alt.value(0.65)),
).add_params(month_selector, highlight).properties(
    width=800,
    height=350,
    title=alt.Title(text="Monthly Total Ridership")
)

summary_chart = alt.vconcat(
    (trend_chart + agg_trend_chart),
    base
)

final_chart = ((boroughs_chart + station_map) | summary_chart).configure_legend(
    labelLimit=640,
).properties(
    title=alt.Title(
        text="New York MTA Subway Ridership has strong weekly cycles with other anomalies often coinciding with holidays.",
        align="left",
        anchor="start",
        fontSize=30,
    )
)

In [8]:
# Save the thing
save_visualization(final_chart, format="html", filename="mta_visualization_2_point_0_updated")