# Explore Hunter and NYU

`datasets/MTA_Subway_Origin-Destination_2024_Hunter_Origin.csv`
* Full MTA Subway Origin-Destination 2024 Dataset queried for Hunter College Origin Station

`datasets/MTA_Subway_Origin-Destination_2024_NYU_Origin.csv`
* Full MTA Subway Origin-Destination 2024 Dataset queried for NYU Origin Station

In [1]:
import pandas as pd

hunter_origin_df = pd.read_csv("datasets/MTA_Subway_Origin-Destination_2024_Hunter_Origin.csv")
nyu_origin_df = pd.read_csv("datasets/MTA_Subway_Origin-Destination_2024_NYU_Origin.csv")

In [27]:
# Helper functions

import folium

def top_station_destinations(ridership_df, top_n=5):
    top_destinations = ridership_df.groupby("Destination Station Complex Name").size().reset_index(name='Count').sort_values('Count', ascending=False).head(top_n)
    return top_destinations

def origin_destination_visualizer(ridership_df, station_df, top_n = 5):

    # Get ridership data for the origin station and group by destination
    grouped_ridership_df = ridership_df.groupby("Destination Station Complex ID")

    # Get origin station information
    origin_station = ridership_df.iloc[0]
    origin_lat = origin_station["Origin Latitude"]
    origin_lon = origin_station["Origin Longitude"]
    origin_name = origin_station["Origin Station Complex Name"]

    # Create a Folium map centered at the origin station
    m = folium.Map(location=[origin_lat, origin_lon], zoom_start=13)

    # Add a marker for the origin station
    folium.Marker(
        location=[origin_lat, origin_lon],
        popup=f"Origin: {origin_name}",
        icon=folium.Icon(color='blue')
    ).add_to(m)

    # Add markers for the top destinations
    for dest_id, count in grouped_ridership_df.size().sort_values(ascending=False).head(top_n).items():
        # Get destination station information
        dest_df = station_df[station_df["Complex ID"] == dest_id]
        dest_lat = dest_df["Latitude"]
        dest_lon = dest_df["Longitude"]
        dest_name = dest_df["Stop Name"]
        
        # Add a marker for the destination station
        folium.Marker(
            location=[dest_lat, dest_lon],
            popup=f"Destination: {dest_name} (Ridership: {count})",
            icon=folium.Icon(color='red')
        ).add_to(m)

    return m

station_df = pd.read_csv("datasets/MTA_Subway_Stations_and_Complexes_20250225.csv")

## Hunter College Exploration and Visualization

In [32]:
hunter_origin_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 450547 entries, 0 to 450546
Data columns (total 16 columns):
 #   Column                            Non-Null Count   Dtype  
---  ------                            --------------   -----  
 0   Year                              450547 non-null  int64  
 1   Month                             450547 non-null  int64  
 2   Day of Week                       450547 non-null  object 
 3   Hour of Day                       450547 non-null  int64  
 4   Timestamp                         450547 non-null  object 
 5   Origin Station Complex ID         450547 non-null  int64  
 6   Origin Station Complex Name       450547 non-null  object 
 7   Origin Latitude                   450547 non-null  float64
 8   Origin Longitude                  450547 non-null  float64
 9   Destination Station Complex ID    450547 non-null  int64  
 10  Destination Station Complex Name  450547 non-null  object 
 11  Destination Latitude              450547 non-null  f

In [17]:
top_station_destinations(hunter_origin_df, 5)

Unnamed: 0,Destination Station Complex Name,Count
279,"Grand Central-42 St (S,4,5,6,7)",1946
18,"125 St (4,5,6)",1878
137,"86 St (4,5,6)",1872
319,"Lexington Av-53 St (E,M)/51 St (6)",1845
357,Parkchester (6),1842


In [33]:
origin_destination_visualizer(hunter_origin_df, station_df, 5)

  float(coord)
  if math.isnan(float(coord)):
  return [float(x) for x in coords]


## NYU Exploration and Visualization

In [29]:
nyu_origin_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 406202 entries, 0 to 406201
Data columns (total 16 columns):
 #   Column                            Non-Null Count   Dtype  
---  ------                            --------------   -----  
 0   Year                              406202 non-null  int64  
 1   Month                             406202 non-null  int64  
 2   Day of Week                       406202 non-null  object 
 3   Hour of Day                       406202 non-null  int64  
 4   Timestamp                         406202 non-null  object 
 5   Origin Station Complex ID         406202 non-null  int64  
 6   Origin Station Complex Name       406202 non-null  object 
 7   Origin Latitude                   406202 non-null  float64
 8   Origin Longitude                  406202 non-null  float64
 9   Destination Station Complex ID    406202 non-null  int64  
 10  Destination Station Complex Name  406202 non-null  object 
 11  Destination Latitude              406202 non-null  f

In [30]:
top_station_destinations(nyu_origin_df, 5)

Unnamed: 0,Destination Station Complex Name,Count
398,"Times Sq-42 St (N,Q,R,W,S,1,2,3,7)/42 St (A,C,E)",2148
85,"34 St-Herald Sq (B,D,F,M,N,Q,R,W)",2104
99,"49 St (N,R,W)",2037
26,"14 St-Union Sq (L,N,Q,R,W,4,5,6)",2006
88,"34 St-Penn Station (A,C,E)",1995


In [31]:
origin_destination_visualizer(nyu_origin_df, station_df, 5)

  float(coord)
  if math.isnan(float(coord)):
  return [float(x) for x in coords]
