In [31]:
import pandas as pd # data wrangler library, dataframes are used to display and manipulate data
import seaborn as sns # data graphing library, built on top of matplotlib
import matplotlib.pyplot as plt # graphing library, used for titles and customization
import urllib.parse # library to parse URLs for querying
import folium # library to create interactive maps
import folium.plugins as plugins # plugins for folium, used for clustering map points
import geopandas as gpd # library to handle geospatial data

In [32]:
def encode_soql_query(query:str) -> str:
    # just in case there are any new lines in the query, replaces them with spaces
    while '\n' in query:
        query = query.replace('\n', ' ')
    
    # encodes the query to be URL friendly
    parsed_query = urllib.parse.quote(query)
    
    # return query with prefix needed for soql queries
    return '?$query=' + parsed_query

In [33]:
ace_violations_api = "https://data.ny.gov/resource/kh8p-hcbm.csv"

In [34]:
latest_violations_query = """
SELECT  * 
WHERE violation_status = 'VIOLATION ISSUED'
LIMIT 250
"""

encoded_query = encode_soql_query(latest_violations_query)
api_query = ace_violations_api + encoded_query

In [35]:
violations_df = pd.read_csv(api_query)
violations_df.head()

Unnamed: 0,violation_id,vehicle_id,first_occurrence,last_occurrence,violation_status,violation_type,bus_route_id,violation_latitude,violation_longitude,stop_id,stop_name,bus_stop_latitude,bus_stop_longitude,violation_georeference,bus_stop_georeference
0,488695180,29af8d864d6758cca3c301fabad56e5e27c2bd3688d50f...,2025-08-13T08:12:16.000,2025-08-13T08:18:52.000,VIOLATION ISSUED,MOBILE BUS LANE,BX35,40.84953,-73.934291,403783,WADSWORTH AV/W 181 ST,40.849579,-73.935065,POINT (-73.934291 40.84953),POINT (-73.935065 40.849579)
1,488623774,16888160f975233eacabe8ee726e9bbe5c5faaf72ea7c1...,2025-07-27T21:49:28.000,2025-07-27T22:03:27.000,VIOLATION ISSUED,MOBILE DOUBLE PARKED,BX12+,40.866168,-73.919078,402611,BROADWAY/ISHAM ST,40.868072,-73.919801,POINT (-73.919078 40.866168),POINT (-73.919801 40.868072)
2,488609748,29af8d864d6758cca3c301fabad56e5e27c2bd3688d50f...,2025-08-13T06:05:25.000,2025-08-13T06:12:10.000,VIOLATION ISSUED,MOBILE BUS LANE,BX35,40.849545,-73.9343,403783,WADSWORTH AV/W 181 ST,40.849579,-73.935065,POINT (-73.9343 40.849545),POINT (-73.935065 40.849579)
3,488529079,781d305d8791813421024b6bad6eeb3deccaed9963ae63...,2025-08-11T17:52:43.000,2025-08-11T18:01:09.000,VIOLATION ISSUED,MOBILE BUS LANE,M42,40.757759,-73.990841,401851,W 42 ST/8 AV,40.757575,-73.990464,POINT (-73.99084123526166 40.757758827753094),POINT (-73.990464 40.757575)
4,488522954,8bf36cd143b168030b031f780d8450ed426af44d6449f9...,2025-08-13T08:33:40.000,2025-08-13T09:13:54.000,VIOLATION ISSUED,MOBILE BUS LANE,BX5,40.822709,-73.869633,100280,STORY AV/METCALF AV,40.822549,-73.870532,POINT (-73.86963253251953 40.822709461355764),POINT (-73.870532 40.822549)


In [36]:
geometry = gpd.points_from_xy(violations_df.bus_stop_longitude, violations_df.bus_stop_latitude)
geo_df = gpd.GeoDataFrame(
    violations_df, geometry=geometry
)
geo_df.head()

Unnamed: 0,violation_id,vehicle_id,first_occurrence,last_occurrence,violation_status,violation_type,bus_route_id,violation_latitude,violation_longitude,stop_id,stop_name,bus_stop_latitude,bus_stop_longitude,violation_georeference,bus_stop_georeference,geometry
0,488695180,29af8d864d6758cca3c301fabad56e5e27c2bd3688d50f...,2025-08-13T08:12:16.000,2025-08-13T08:18:52.000,VIOLATION ISSUED,MOBILE BUS LANE,BX35,40.84953,-73.934291,403783,WADSWORTH AV/W 181 ST,40.849579,-73.935065,POINT (-73.934291 40.84953),POINT (-73.935065 40.849579),POINT (-73.93506 40.84958)
1,488623774,16888160f975233eacabe8ee726e9bbe5c5faaf72ea7c1...,2025-07-27T21:49:28.000,2025-07-27T22:03:27.000,VIOLATION ISSUED,MOBILE DOUBLE PARKED,BX12+,40.866168,-73.919078,402611,BROADWAY/ISHAM ST,40.868072,-73.919801,POINT (-73.919078 40.866168),POINT (-73.919801 40.868072),POINT (-73.9198 40.86807)
2,488609748,29af8d864d6758cca3c301fabad56e5e27c2bd3688d50f...,2025-08-13T06:05:25.000,2025-08-13T06:12:10.000,VIOLATION ISSUED,MOBILE BUS LANE,BX35,40.849545,-73.9343,403783,WADSWORTH AV/W 181 ST,40.849579,-73.935065,POINT (-73.9343 40.849545),POINT (-73.935065 40.849579),POINT (-73.93506 40.84958)
3,488529079,781d305d8791813421024b6bad6eeb3deccaed9963ae63...,2025-08-11T17:52:43.000,2025-08-11T18:01:09.000,VIOLATION ISSUED,MOBILE BUS LANE,M42,40.757759,-73.990841,401851,W 42 ST/8 AV,40.757575,-73.990464,POINT (-73.99084123526166 40.757758827753094),POINT (-73.990464 40.757575),POINT (-73.99046 40.75758)
4,488522954,8bf36cd143b168030b031f780d8450ed426af44d6449f9...,2025-08-13T08:33:40.000,2025-08-13T09:13:54.000,VIOLATION ISSUED,MOBILE BUS LANE,BX5,40.822709,-73.869633,100280,STORY AV/METCALF AV,40.822549,-73.870532,POINT (-73.86963253251953 40.822709461355764),POINT (-73.870532 40.822549),POINT (-73.87053 40.82255)


In [37]:
map = folium.Map(location=[40.730610, -73.935242], tiles="CartoDB Positron", zoom_start=11)

In [38]:
map


In [39]:
# unique colors
unique_stops = geo_df.bus_route_id.nunique()
color_palette = sns.color_palette("hls", unique_stops).as_hex()
unique_values = geo_df.bus_route_id.unique()
color_map = dict(zip(unique_values, color_palette))

In [40]:
geo_df_list = [[point.xy[1][0], point.xy[0][0]] for point in geo_df.geometry]

# Iterate through list and add a marker for each violation
i = 0
for coordinates in geo_df_list:
    # Place the markers with the popup labels and data
    map.add_child(
        folium.Marker(
            location=coordinates,
            popup=f"""
            Route: {geo_df.bus_route_id[i]} <br>
            Stop Name: {geo_df.stop_name[i]} <br>
            Violation Type: {geo_df.violation_type[i]} <br>
            """,
            icon=folium.Icon(color='white', icon_color = color_map[geo_df.bus_route_id[i]], icon="info-sign"),
        )
    )
    i = i + 1

In [41]:
map

In [42]:
from shapely.geometry import LineString
from pathlib import Path

In [43]:
FOLDER = Path("../data/gtfs_subway")
print(f"Loading GTFS data from: {FOLDER.resolve()}")

shapes_path = FOLDER / "shapes.txt"
stops_path = FOLDER / "stops.txt"
routes_path = FOLDER / "routes.txt"
trips_path = FOLDER / "trips.txt"

shapes = pd.read_csv(shapes_path, dtype=str, low_memory=False)
stops = pd.read_csv(stops_path, dtype=str, low_memory=False)
routes = pd.read_csv(routes_path, dtype=str, low_memory=False)
trips = pd.read_csv(trips_path, dtype=str, low_memory=False)
print("Successfully loaded GTFS text files.")

feed_name = "subway"
for df in [shapes, stops, routes, trips]:
    df["feed_name"] = feed_name

for col in ["shape_pt_lat", "shape_pt_lon"]:
    shapes[col] = shapes[col].astype(float)
shapes["shape_pt_sequence"] = shapes["shape_pt_sequence"].astype(int)
stops["stop_lat"] = stops["stop_lat"].astype(float)
stops["stop_lon"] = stops["stop_lon"].astype(float)

shapes["shape_uid"] = shapes["feed_name"] + "_" + shapes["shape_id"]
trips["shape_uid"] = trips["feed_name"] + "_" + trips["shape_id"]

shape_to_route = trips.merge(
    routes, on=["route_id", "feed_name"]
).drop_duplicates("shape_uid")

lines = (
    shapes.sort_values(["shape_uid", "shape_pt_sequence"])
    .groupby("shape_uid")[["shape_pt_lon", "shape_pt_lat"]]
    .apply(lambda df: LineString(df.to_numpy()))
    .to_frame("geometry")
    .reset_index()
)

routes_gdf = gpd.GeoDataFrame(lines, geometry="geometry", crs="EPSG:4326").merge(
    shape_to_route, on="shape_uid", how="left"
)

stops_gdf = gpd.GeoDataFrame(
    stops,
    geometry=gpd.points_from_xy(stops["stop_lon"], stops["stop_lat"]),
    crs="EPSG:4326"
)

folium.map.CustomPane("routes", z_index=400).add_to(map)
folium.map.CustomPane("stops", z_index=650).add_to(map)

folium.GeoJson(
    routes_gdf,
    name="Subway Routes",
    style_function=lambda feature: {
        'color': f"#{feature['properties'].get('route_color', '000000')}",
        'weight': 3,
        'opacity': 0.7,
    },
    tooltip=folium.GeoJsonTooltip(fields=["route_short_name", "route_long_name"]),
    pane="routes"
).add_to(map)

folium.LayerControl(collapsed=False).add_to(map)

Loading GTFS data from: C:\Users\drodr\coding\MTA-MHC-Datatahon\data\gtfs_subway
Successfully loaded GTFS text files.


<folium.map.LayerControl at 0x1c0a42d8910>

In [44]:
map

In [45]:
map.save('../data/cleaned/latest_violations_map.html')