In [39]:
#Import the required packages
#Import package pandas for data analysis
import pandas as pd

# Import package numpy for numeric computing
import numpy as np

# Import package matplotlib for visualisation/plotting
import matplotlib.pyplot as plt

#For showing plots directly in the notebook run the command below
%matplotlib inline

# For saving multiple plots into a single pdf file
from matplotlib.backends.backend_pdf import PdfPages 

In [40]:
df = pd.read_csv('MTA_Subway_Entrances_and_Exits__2024_20250524.csv', keep_default_na=True, delimiter=',', skipinitialspace=True)
#How many rows should be displayed in full
pd.set_option('display.max_rows', 100)
# Show data frame first few rows
df.head()

Unnamed: 0,Division,Line,Borough,Stop Name,Complex ID,Constituent Station Name,Station ID,GTFS Stop ID,Daytime Routes,Entrance Type,Entry Allowed,Exit Allowed,Entrance Latitude,Entrance Longitude,entrance_georeference
0,BMT,4th Av,B,Atlantic Av-Barclays Ctr,617,Atlantic Av-Barclays Ctr,27,R31,2 3 4 5 B D N Q R,Stair,YES,YES,40.683905,-73.978879,POINT (-73.978879 40.683905)
1,BMT,4th Av,B,Atlantic Av-Barclays Ctr,617,Atlantic Av-Barclays Ctr,27,R31,2 3 4 5 B D N Q R,Elevator,YES,YES,40.683805,-73.978487,POINT (-73.978487 40.683805)
2,BMT,4th Av,B,Atlantic Av-Barclays Ctr,617,Atlantic Av-Barclays Ctr,27,R31,2 3 4 5 B D N Q R,Stair,YES,YES,40.683928,-73.978412,POINT (-73.978412 40.683928)
3,BMT,4th Av,B,Union St,28,Union St,28,R32,R,Stair,YES,YES,40.677154,-73.98343,POINT (-73.9834296 40.6771544)
4,BMT,4th Av,B,Union St,28,Union St,28,R32,R,Stair,YES,YES,40.677296,-73.983336,POINT (-73.9833364 40.6772958)


For the Mobility Score we need the subway access. Therefore the following fetaures are relevant:
- Stop Name
- Entry Allowed
- Emtrance Type (for accessibility)
- Daytime Routes
- Entrance Latitude
- Entrance Longitude

In [41]:
#filter for Manhatten first

df['Borough'].unique()


array(['B', 'Q', 'M', 'Bx', 'SI'], dtype=object)

In [42]:
df = df[df['Borough']== 'M']

In [43]:
df.shape

(868, 15)

In [44]:
df = df.drop('Borough', axis=1)

In [45]:
#only select relevant features
df = df[['Stop Name','Entry Allowed', 'Entrance Type', 'Daytime Routes','Exit Allowed', 'Entrance Latitude', 'Entrance Longitude']]

In [46]:
#change datatype
df.dtypes

Stop Name              object
Entry Allowed          object
Entrance Type          object
Daytime Routes         object
Exit Allowed           object
Entrance Latitude     float64
Entrance Longitude    float64
dtype: object

In [47]:
df.head()

Unnamed: 0,Stop Name,Entry Allowed,Entrance Type,Daytime Routes,Exit Allowed,Entrance Latitude,Entrance Longitude
52,Roosevelt Island,YES,Station House,F,YES,40.759019,-73.953458
53,Lexington Av/63 St,YES,Easement - Street,F Q,YES,40.764968,-73.966679
54,Lexington Av/63 St,YES,Stair,F Q,YES,40.764738,-73.966553
55,Lexington Av/63 St,YES,Easement - Street,F Q,YES,40.764896,-73.966426
56,Lexington Av/63 St,YES,Stair,F Q,YES,40.764101,-73.965041


In [48]:
#check for duplicates
df[df.duplicated()]

Unnamed: 0,Stop Name,Entry Allowed,Entrance Type,Daytime Routes,Exit Allowed,Entrance Latitude,Entrance Longitude


In [50]:
df[df['Stop Name']=='False']

Unnamed: 0,Stop Name,Entry Allowed,Entrance Type,Daytime Routes,Exit Allowed,Entrance Latitude,Entrance Longitude


In [52]:
df.isnull().sum()

Stop Name             0
Entry Allowed         0
Entrance Type         0
Daytime Routes        0
Exit Allowed          0
Entrance Latitude     0
Entrance Longitude    0
dtype: int64

In [35]:
df.shape

(868, 7)

In [53]:
df.dtypes

Stop Name              object
Entry Allowed          object
Entrance Type          object
Daytime Routes         object
Exit Allowed           object
Entrance Latitude     float64
Entrance Longitude    float64
dtype: object

In [54]:
#Computing Nearby Access
import haversine

def count_entrances_near(lat, lon, entrances, radius_m=500):
    count = 0
    unique_lines = set()
    ada_count = 0

    for _, row in entrances.iterrows():
        dist = haversine((lat, lon), (row["entrance_latitude"], row["entrance_longitude"]), unit="m")
        if dist <= radius_m:
            count += 1
            unique_lines.update(row["daytime_routes"].split()) 
            if "elevator" in row["entrance_type"].lower():
                ada_count += 1

    return count, len(unique_lines), ada_count


In [None]:
#Get Subway Score


In [None]:
#store cleaned data set as json 
df.to_csv('subway_access_cleaned.csv')