# Explore Hunter Origin Station 2021-2024
`datasets/MTA_Subway_Origin-Destination_2021_Hunter_Origin.csv`

`datasets/MTA_Subway_Origin-Destination_2022_Hunter_Origin.csv`

`datasets/MTA_Subway_Origin-Destination_2023_Hunter_Origin.csv`

`datasets/MTA_Subway_Origin-Destination_2024_Hunter_Origin.csv`
* Full MTA Subway Origin-Destination 2021-2024 Dataset queried for Hunter College Origin Station

In 2021, most students did remote learning because of the COVID-19 pandemic. Those who took the train were primarily essential workers. We can hypothesize that in the 2021 dataset, the ridership data are mostly non-students. Perhaps that by finding an increase in certain station destinations post 2021, we can say with some confidence that those are student stops.

In [1]:
from utils import *
import pandas as pd

In [2]:
# load in all the csvs
hunter_origin_2021 = pd.read_csv("datasets/MTA_Subway_Origin-Destination_2021_Hunter_Origin.csv")
hunter_origin_2022 = pd.read_csv("datasets/MTA_Subway_Origin-Destination_2022_Hunter_Origin.csv")
hunter_origin_2023 = pd.read_csv("datasets/MTA_Subway_Origin-Destination_2023_Hunter_Origin.csv")
hunter_origin_2024 = pd.read_csv("datasets/MTA_Subway_Origin-Destination_2024_Hunter_Origin.csv")

In [3]:
# combine all the dataframes
hunter_origin_total = pd.concat([hunter_origin_2021, hunter_origin_2022, hunter_origin_2023, hunter_origin_2024])

In [4]:
top_station_destinations(hunter_origin_2021, 5)

Unnamed: 0,Destination Station Complex Name,Count
357,Parkchester (6),1719
279,"Grand Central-42 St (S,4,5,6,7)",1706
80,3 Av-138 St (6),1687
18,"125 St (4,5,6)",1684
137,"86 St (4,5,6)",1682


In [5]:
top_station_destinations(hunter_origin_2022, 5)

Unnamed: 0,Destination Station Complex Name,Count
18,"125 St (4,5,6)",1805
279,"Grand Central-42 St (S,4,5,6,7)",1757
137,"86 St (4,5,6)",1750
357,Parkchester (6),1737
12,116 St (6),1735


In [6]:
top_station_destinations(hunter_origin_2023, 5)

Unnamed: 0,Destination Station Complex Name,Count
279,"Grand Central-42 St (S,4,5,6,7)",1806
357,Parkchester (6),1744
18,"125 St (4,5,6)",1737
319,"Lexington Av-53 St (E,M)/51 St (6)",1724
12,116 St (6),1714


In [7]:
top_station_destinations(hunter_origin_2024, 5)

Unnamed: 0,Destination Station Complex Name,Count
279,"Grand Central-42 St (S,4,5,6,7)",1946
18,"125 St (4,5,6)",1878
137,"86 St (4,5,6)",1872
319,"Lexington Av-53 St (E,M)/51 St (6)",1845
357,Parkchester (6),1842


In [13]:
top_station_destinations(hunter_origin_total, 5)

Unnamed: 0,Destination Station Complex Name,Count
279,"Grand Central-42 St (S,4,5,6,7)",7215
18,"125 St (4,5,6)",7104
357,Parkchester (6),7042
137,"86 St (4,5,6)",7006
12,116 St (6),6909


In [9]:
bottom_station_destinations(hunter_origin_total, 5)

Unnamed: 0,Destination Station Complex Name,Count
396,Tompkinsville (SIR),1
182,"Beach 98 St (A,S)",380
175,"Beach 105 St (A,S)",480
195,"Broad Channel (A,S)",489
377,"Rockaway Park-Beach 116 St (A,S)",527


## Visualizing the data

In [10]:
station_df = pd.read_csv("datasets/MTA_Subway_Stations_and_Complexes_20250225.csv")
origin_destination_visualizer(hunter_origin_total, station_df, 5)

  float(coord)
  if math.isnan(float(coord)):
  return [float(x) for x in coords]


## Destination Stations of Interest
* These stations should be highly enforced and in low income neighborhoods
* `Livonia Avenue`, `Far Rockaway-Mott Avenue` and `Aqueduct Racetrack` in Ozone Park tallying between 50 and 60 tickets per 100,000 riders. On the other hand, at about half the city’s stations, police gave out fewer than five tickets per 100,000 riders between January and September 2023

* `Livonia Avenue:` ID 135
* `Far Rockaway-Mott Avenue:` ID 209
* `Aqueduct Racetrack:` ID 196

In [27]:
hunter_origin_total_grouped = hunter_origin_total.groupby(["Destination Station Complex ID", "Destination Station Complex Name"]).size().reset_index(name='Count')
hunter_origin_total_grouped

Unnamed: 0,Destination Station Complex ID,Destination Station Complex Name,Count
0,1,"Astoria-Ditmars Blvd (N,W)",5706
1,2,"Astoria Blvd (N,W)",5293
2,3,"30 Av (N,W)",5882
3,4,"Broadway (N,W)",5704
4,5,"36 Av (N,W)",4823
...,...,...,...
416,628,"Fulton St (A,C,J,Z,2,3,4,5)",6729
417,629,Lorimer St (L)/Metropolitan Av (G),5014
418,630,"Myrtle-Wyckoff Avs (L,M)",5647
419,635,"South Ferry (1)/Whitehall St (R,W)",5134


In [28]:
stations_of_interest = hunter_origin_total_grouped[(hunter_origin_total_grouped["Destination Station Complex ID"] == 135) | 
                                                   (hunter_origin_total_grouped["Destination Station Complex ID"] == 209) |
                                                   (hunter_origin_total_grouped["Destination Station Complex ID"] == 196)]

In [29]:
stations_of_interest

Unnamed: 0,Destination Station Complex ID,Destination Station Complex Name,Count
107,135,Livonia Av (L),1928
155,196,Aqueduct Racetrack (A),695
168,209,Far Rockaway-Mott Av (A),2415


In [41]:
# simple code to just double check that the data is being aggregated correctly
hunter_origin_total[hunter_origin_total["Destination Station Complex ID"] == 209 ].count()

Year                                2415
Month                               2415
Day of Week                         2415
Hour of Day                         2415
Timestamp                           2415
Origin Station Complex ID           2415
Origin Station Complex Name         2415
Origin Latitude                     2415
Origin Longitude                    2415
Destination Station Complex ID      2415
Destination Station Complex Name    2415
Destination Latitude                2415
Destination Longitude               2415
Estimated Average Ridership         2415
Origin Point                        2415
Destination Point                   2415
dtype: int64