In [1]:
%pip install seaborn
%pip install folium
import pandas as pd # data wrangler library, dataframes are used to display and manipulate data
import seaborn as sns # data graphing library, built on top of matplotlib
import matplotlib.pyplot as plt # graphing library, used for titles and customization
import urllib.parse # library to parse URLs for querying
import seaborn as sns # data graphing library, built on top of matplotlib
import folium # library to create interactive maps
import folium.plugins as plugins # plugins for folium, used for clustering map points
import geopandas as gpd # library to handle geospatial data



In [4]:
def encode_soql_query(query:str) -> str:
    # just in case there are any new lines in the query, replaces them with spaces
    while '\n' in query:
        query = query.replace('\n', ' ')

    # encodes the query to be URL friendly
    parsed_query = urllib.parse.quote(query)

    # return query with prefix needed for soql queries
    return '?$query=' + parsed_query

In [2]:
ace_violations_api = "https://data.ny.gov/resource/kh8p-hcbm.csv"

In [11]:
violations_exempt_bus_query =  """
SELECT  *
WHERE violation_status = 'EXEMPT - BUS/PARATRANSIT'
LIMIT 250
"""
encoded_query = encode_soql_query(violations_exempt_bus_query)
api_query = ace_violations_api + encoded_query

In [12]:
bus_exemept_violations_df = pd.read_csv(api_query)


In [15]:
geometry = gpd.points_from_xy(bus_exemept_violations_df.bus_stop_longitude, bus_exemept_violations_df.bus_stop_latitude)
geo_df = gpd.GeoDataFrame(
    bus_exemept_violations_df, geometry=geometry
    )

In [None]:
map = folium.Map(location=[40.730610, -73.935242], tiles="CartoDB Positron", zoom_start=11)

In [None]:
map

In [None]:
unique_stops = geo_df.bus_route_id.nunique()
color_palette = sns.color_palette("hls", unique_stops).as_hex()
unique_values = geo_df.bus_route_id.unique()
color_map = dict(zip(unique_values, color_palette))

In [None]:
geo_df_list = [[point.xy[1][0], point.xy[0][0]] for point in geo_df.geometry]

# Iterate through list and add a marker for each violation
i = 0
for coordinates in geo_df_list:
    # Place the markers with the popup labels and data
    map.add_child(
        folium.Marker(
            location=coordinates,
            popup=f"""
            Route: {geo_df.bus_route_id[i]} <br>
            Stop Name: {geo_df.stop_name[i]} <br>
            Violation Type: {geo_df.violation_type[i]} <br>
            """,
            icon=folium.Icon(color='white', icon_color = color_map[geo_df.bus_route_id[i]], icon="info-sign"),
        )
    )
    i = i + 1

In [17]:
map

map

In [16]:
from shapely.geometry import LineString
from pathlib import Path

In [None]:
FOLDER = Path("../data/gtfs_subway")
print(f"Loading GTFS data from: {FOLDER.resolve()}")

shapes_path = FOLDER / "shapes.txt"
stops_path = FOLDER / "stops.txt"
routes_path = FOLDER / "routes.txt"
trips_path = FOLDER / "trips.txt"

shapes = pd.read_csv(shapes_path, dtype=str, low_memory=False)
stops = pd.read_csv(stops_path, dtype=str, low_memory=False)
routes = pd.read_csv(routes_path, dtype=str, low_memory=False)
trips = pd.read_csv(trips_path, dtype=str, low_memory=False)
print("Successfully loaded GTFS text files.")

feed_name = "subway"
for df in [shapes, stops, routes, trips]:
    df["feed_name"] = feed_name

for col in ["shape_pt_lat", "shape_pt_lon"]:
    shapes[col] = shapes[col].astype(float)
shapes["shape_pt_sequence"] = shapes["shape_pt_sequence"].astype(int)
stops["stop_lat"] = stops["stop_lat"].astype(float)
stops["stop_lon"] = stops["stop_lon"].astype(float)

shapes["shape_uid"] = shapes["feed_name"] + "_" + shapes["shape_id"]
trips["shape_uid"] = trips["feed_name"] + "_" + trips["shape_id"]

shape_to_route = trips.merge(
    routes, on=["route_id", "feed_name"]
).drop_duplicates("shape_uid")

lines = (
    shapes.sort_values(["shape_uid", "shape_pt_sequence"])
    .groupby("shape_uid")[["shape_pt_lon", "shape_pt_lat"]]
    .apply(lambda df: LineString(df.to_numpy()))
    .to_frame("geometry")
    .reset_index()
)

routes_gdf = gpd.GeoDataFrame(lines, geometry="geometry", crs="EPSG:4326").merge(
    shape_to_route, on="shape_uid", how="left"
)

stops_gdf = gpd.GeoDataFrame(
    stops,
    geometry=gpd.points_from_xy(stops["stop_lon"], stops["stop_lat"]),
    crs="EPSG:4326"
)

folium.map.CustomPane("routes", z_index=400).add_to(map)
folium.map.CustomPane("stops", z_index=650).add_to(map)

folium.GeoJson(
    routes_gdf,
    name="Subway Routes",
    style_function=lambda feature: {
        'color': f"#{feature['properties'].get('route_color', '000000')}",
        'weight': 3,
        'opacity': 0.7,
    },
    tooltip=folium.GeoJsonTooltip(fields=["route_short_name", "route_long_name"]),
    pane="routes"
).add_to(map)

folium.LayerControl(collapsed=False).add_to(map)

In [None]:
map

In [None]:
map.save('../data/cleaned/bus_exempt_map.html')