In [12]:
import sys

In [13]:
import numpy as np
import pandas as pd
import folium
import branca

In [8]:
df_station_coords = pd.read_csv(r"D:\capstone_F24\data\MTA_Subway_Stations_20241002.csv")

In [9]:
df_station_coords

Unnamed: 0,GTFS Stop ID,Station ID,Complex ID,Division,Line,Stop Name,Borough,CBD,Daytime Routes,Structure,GTFS Latitude,GTFS Longitude,North Direction Label,South Direction Label,ADA,ADA Northbound,ADA Southbound,ADA Notes,Georeference
0,R01,1,1,BMT,Astoria,Astoria-Ditmars Blvd,Q,False,N W,Elevated,40.775036,-73.912034,Last Stop,Manhattan,0,0,0,,POINT (-73.912034 40.775036)
1,R03,2,2,BMT,Astoria,Astoria Blvd,Q,False,N W,Elevated,40.770258,-73.917843,Astoria,Manhattan,1,1,1,,POINT (-73.917843 40.770258)
2,R04,3,3,BMT,Astoria,30 Av,Q,False,N W,Elevated,40.766779,-73.921479,Astoria,Manhattan,0,0,0,,POINT (-73.921479 40.766779)
3,R05,4,4,BMT,Astoria,Broadway,Q,False,N W,Elevated,40.761820,-73.925508,Astoria,Manhattan,0,0,0,,POINT (-73.925508 40.76182)
4,R06,5,5,BMT,Astoria,36 Av,Q,False,N W,Elevated,40.756804,-73.929575,Astoria,Manhattan,0,0,0,,POINT (-73.929575 40.756804)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
491,S15,517,517,SIR,Staten Island,Prince's Bay,SI,False,SIR,Open Cut,40.525507,-74.200064,Ferry,South Shore,0,0,0,,POINT (-74.200064 40.525507)
492,S14,518,518,SIR,Staten Island,Pleasant Plains,SI,False,SIR,Embankment,40.522410,-74.217847,Ferry,South Shore,0,0,0,,POINT (-74.217847 40.52241)
493,S13,519,519,SIR,Staten Island,Richmond Valley,SI,False,SIR,Open Cut,40.519631,-74.229141,Ferry,Tottenville,0,0,0,,POINT (-74.229141 40.519631)
494,S09,522,522,SIR,Staten Island,Tottenville,SI,False,SIR,At Grade,40.512764,-74.251961,Ferry,Last Stop,1,1,1,,POINT (-74.251961 40.512764)


In [10]:
df_time_series_metrics = pd.read_pickle(r"D:\capstone_F24\data\Cleaned_data\df_timeseries_metrics.pkl")

In [11]:
df_time_series_metrics

Unnamed: 0,station_complex_id,rmse,mae,smape
0,1,165.137938,165.137938,51.528270
1,10,249.379695,249.379695,55.338124
2,100,28.173102,28.173102,60.163773
3,101,141.345875,141.345875,77.205690
4,118,65.380958,65.380958,52.849263
...,...,...,...,...
356,95,138.559805,138.559805,81.402249
357,96,43.393786,43.393786,31.990164
358,97,292.582292,292.582292,149.725284
359,98,81.345115,81.345115,87.398412


In [20]:
df_time_series_metrics['station_complex_id'] = df_time_series_metrics['station_complex_id'].astype(str)
df_station_coords['Complex ID'] = df_station_coords['Complex ID'].astype(str)

# Merge the dataframes
merged_df = pd.merge(
    df_time_series_metrics,
    df_station_coords[['Complex ID', 'Stop Name', 'GTFS Latitude', 'GTFS Longitude']],
    left_on='station_complex_id',
    right_on='Complex ID',
    how='left'
)

# Drop the redundant 'Complex ID' column (if desired)
merged_df.drop(columns=['Complex ID'], inplace=True)
merged_df

Unnamed: 0,station_complex_id,rmse,mae,smape,Stop Name,GTFS Latitude,GTFS Longitude
0,1,165.137938,165.137938,51.528270,Astoria-Ditmars Blvd,40.775036,-73.912034
1,10,249.379695,249.379695,55.338124,49 St,40.759901,-73.984139
2,100,28.173102,28.173102,60.163773,Hewes St,40.706870,-73.953431
3,101,141.345875,141.345875,77.205690,Marcy Av,40.708359,-73.957757
4,118,65.380958,65.380958,52.849263,3 Av,40.732849,-73.986122
...,...,...,...,...,...,...,...
407,95,138.559805,138.559805,81.402249,Gates Av,40.689630,-73.922270
408,96,43.393786,43.393786,31.990164,Kosciuszko St,40.693342,-73.928814
409,97,292.582292,292.582292,149.725284,Myrtle Av,40.697207,-73.935657
410,98,81.345115,81.345115,87.398412,Flushing Av,40.700260,-73.941126


In [30]:
# Create base map
m = folium.Map(location=[40.73061, -73.935242], zoom_start=12)

# Normalize the error metrics
def normalize_metric(df, column):
    return (df[column] - df[column].min()) / (df[column].max() - df[column].min())

# Normalize MAE and SMAPE
merged_df['mae_normalized'] = normalize_metric(merged_df, 'mae')
merged_df['smape_normalized'] = normalize_metric(merged_df, 'smape')

# Create a function to plot the map
def create_error_map(metric='mae_normalized'):
    colormap = branca.colormap.LinearColormap(
        colors=['green', 'lightgreen', 'yellow', 'orange', 'red'],
        vmin=0, vmax=1
    )
    colormap.caption = f"{metric.upper()} (Normalized)"
    
    # Create the map
    m = folium.Map(location=[df_station_coords['GTFS Latitude'].mean(), 
                             df_station_coords['GTFS Longitude'].mean()], zoom_start=11)
    m.add_child(colormap)
    
    # Add stations to the map
    for _, row in merged_df.iterrows():  # Ensure you merge the station coordinates with metrics
        folium.CircleMarker(
            location=(row['GTFS Latitude'], row['GTFS Longitude']),
            radius=5,
            color=colormap(row[metric]),
            fill=True,
            fill_color=colormap(row[metric]),
            fill_opacity=0.7,
            popup=folium.Popup(
                f"Station: {row['Stop Name']}<br>{metric.upper()}: {row[metric]:.2f}", 
                parse_html=True
            ),
        ).add_to(m)
    return m

# Generate and save maps
mae_map = create_error_map(metric='mae_normalized')
smape_map = create_error_map(metric='smape_normalized')

# Save to files
mae_map.save("mae_error_map.html")
smape_map.save("smape_error_map.html")

In [31]:
mae_map

In [32]:
smape_map