In [1]:
#This notebook is for the purpose of comparing the accessability of various metro systems

## The MTA

In [12]:
import pandas as pd
import numpy as np

#read in MTA data source
mta_data = pd.read_csv('/Users/zoeslemmons/Desktop/MTA-elevators/systems_data/MTA_Subway_Stations.csv')
mta_data.head()

Unnamed: 0,GTFS Stop ID,Station ID,Complex ID,Division,Line,Stop Name,Borough,CBD,Daytime Routes,Structure,GTFS Latitude,GTFS Longitude,North Direction Label,South Direction Label,ADA,ADA Northbound,ADA Southbound,ADA Notes,Georeference
0,127,317,611,IRT,Broadway - 7Av,Times Sq-42 St,M,True,1 2 3,Subway,40.75529,-73.987495,Uptown,Downtown,1,1,1,,POINT (-73.987495 40.75529)
1,S17,515,515,SIR,Staten Island,Annadale,SI,False,SIR,Open Cut,40.54046,-74.178217,Ferry,South Shore,0,0,0,,POINT (-74.178217 40.54046)
2,S01,139,627,BMT,Franklin Shuttle,Franklin Av,Bk,False,S,Elevated,40.680596,-73.955827,Last Stop,Prospect Park,1,1,1,,POINT (-73.955827 40.680596)
3,254,349,349,IRT,Eastern Pky,Junius St,Bk,False,3,Elevated,40.663515,-73.902447,Manhattan,New Lots,0,0,0,,POINT (-73.902447 40.663515)
4,M01,108,108,BMT,Myrtle Av,Middle Village-Metropolitan Av,Q,False,M,Elevated,40.711396,-73.889601,Inbound,Last Stop,1,1,1,,POINT (-73.889601 40.711396)


In [20]:
#Checking that the data includes all the stations. There should be 472 not including the Staten Island Railroad
mta_data['Division'].value_counts()

Division
IRT    178
IND    155
BMT    142
SIR     21
Name: count, dtype: int64

In [23]:
#Hmmm...that adds up to 475. Where are the 3 extra coming from. Let's look at how many unique station IDs there are
ids = mta_data['Station ID'].unique()

In [26]:
#subtract the SIR stations
len(ids) - 21

472

In [28]:
#Bingo, ok so there are repeats somewhere, but at least I know we have all the stations
#I can find the duplicates

mta_data['Station ID'].value_counts()

Station ID
151    2
167    2
461    2
55     1
147    1
      ..
313    1
511    1
197    1
376    1
348    1
Name: count, Length: 493, dtype: int64

In [30]:
#151, 167, and 461. Let's see what's going on with those
mta_data[mta_data['Station ID'] ==151]

Unnamed: 0,GTFS Stop ID,Station ID,Complex ID,Division,Line,Stop Name,Borough,CBD,Daytime Routes,Structure,GTFS Latitude,GTFS Longitude,North Direction Label,South Direction Label,ADA,ADA Northbound,ADA Southbound,ADA Notes,Georeference
231,A12,151,151,IND,8th Av - Fulton St,145 St,M,False,A C,Subway,40.824783,-73.944216,Uptown,Downtown,0,0,0,,POINT (-73.944216 40.824783)
343,D13,151,151,IND,Concourse,145 St,M,False,B D,Subway,40.824783,-73.944216,Uptown,Downtown,0,0,0,,POINT (-73.944216 40.824783)


In [31]:
mta_data[mta_data['Station ID'] ==167]

Unnamed: 0,GTFS Stop ID,Station ID,Complex ID,Division,Line,Stop Name,Borough,CBD,Daytime Routes,Structure,GTFS Latitude,GTFS Longitude,North Direction Label,South Direction Label,ADA,ADA Northbound,ADA Southbound,ADA Notes,Georeference
218,A32,167,167,IND,8th Av - Fulton St,W 4 St-Wash Sq,M,True,A C E,Subway,40.732338,-74.000495,Uptown,Downtown,1,1,1,,POINT (-74.000495 40.732338)
321,D20,167,167,IND,6th Av - Culver,W 4 St-Wash Sq,M,True,B D F M,Subway,40.732338,-74.000495,Uptown,Downtown,1,1,1,,POINT (-74.000495 40.732338)


In [32]:
mta_data[mta_data['Station ID'] ==461]

Unnamed: 0,GTFS Stop ID,Station ID,Complex ID,Division,Line,Stop Name,Borough,CBD,Daytime Routes,Structure,GTFS Latitude,GTFS Longitude,North Direction Label,South Direction Label,ADA,ADA Northbound,ADA Southbound,ADA Notes,Georeference
361,R09,461,461,BMT,Astoria,Queensboro Plaza,Q,False,N W,Elevated,40.750582,-73.940202,Astoria,Manhattan,1,1,1,,POINT (-73.940202 40.750582)
483,718,461,461,IRT,Flushing,Queensboro Plaza,Q,False,7,Elevated,40.750582,-73.940202,Outbound,Manhattan,1,1,1,,POINT (-73.940202 40.750582)


In [33]:
#Ok, these appear to be different train platforms that for some reason share the same Station ID. But for this analysis we'll count them as distinct stations. 
#So the total station count for this analysis will be 475 intead of the MTA's official count of 472 stations. 

In [35]:
#Moving on, let's get the SIR stations out the dataset for good. 
mta_stations = mta_data[mta_data.Division !='SIR']
mta_stations.shape

(475, 19)

In [36]:
#Perfect
#Now, to find how many are accessible. The ADA column looks like it's gonna be a boolean but let's find out. 

mta_stations.info()

<class 'pandas.core.frame.DataFrame'>
Index: 475 entries, 0 to 495
Data columns (total 19 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   GTFS Stop ID           475 non-null    object 
 1   Station ID             475 non-null    int64  
 2   Complex ID             475 non-null    int64  
 3   Division               475 non-null    object 
 4   Line                   475 non-null    object 
 5   Stop Name              475 non-null    object 
 6   Borough                475 non-null    object 
 7   CBD                    475 non-null    bool   
 8   Daytime Routes         475 non-null    object 
 9   Structure              475 non-null    object 
 10  GTFS Latitude          475 non-null    float64
 11  GTFS Longitude         475 non-null    float64
 12  North Direction Label  475 non-null    object 
 13  South Direction Label  475 non-null    object 
 14  ADA                    475 non-null    int64  
 15  ADA Northbo

In [37]:
#Not a boolean, just an integer. Let's look at the values for that

mta_stations.ADA.value_counts()

ADA
0    320
1    146
2      9
Name: count, dtype: int64

In [38]:
#Ok, so the 0 and 1 do appear to represent True and False like a boolean...but what is 2? 
mta_stations[mta_stations.ADA == 2]

Unnamed: 0,GTFS Stop ID,Station ID,Complex ID,Division,Line,Stop Name,Borough,CBD,Daytime Routes,Structure,GTFS Latitude,GTFS Longitude,North Direction Label,South Direction Label,ADA,ADA Northbound,ADA Southbound,ADA Notes,Georeference
11,233,336,336,IRT,Eastern Pky,Hoyt St,Bk,False,2 3,Subway,40.690545,-73.985065,Manhattan,Outbound,2,0,1,Outbound only,POINT (-73.985065 40.690545)
98,633,404,404,IRT,Lexington Av,28 St,M,True,6,Subway,40.74307,-73.984264,Uptown,Downtown,2,0,1,Downtown only,POINT (-73.984264 40.74307)
129,R15,10,10,BMT,Broadway - Brighton,49 St,M,True,N R W,Subway,40.759901,-73.984139,Uptown,Downtown,2,1,0,Uptown only,POINT (-73.984139 40.759901)
148,G16,269,269,IND,Queens Blvd,Northern Blvd,Q,False,M R,Subway,40.752885,-73.906006,Outbound,Manhattan,2,1,0,Outbound only,POINT (-73.906006 40.752885)
198,L20,130,130,BMT,Canarsie,Wilson Av,Bk,False,L,Subway,40.688764,-73.904046,Manhattan,Outbound,2,1,0,Manhattan-bound only,POINT (-73.904046 40.688764)
224,A25,162,162,IND,8th Av - Fulton St,50 St,M,True,C E,Subway,40.762456,-73.985984,Uptown,Downtown,2,0,1,Downtown only,POINT (-73.985984 40.762456)
240,423,415,620,IRT,Eastern Pky,Borough Hall,Bk,False,4 5,Subway,40.692404,-73.990151,Manhattan,Outbound,2,1,0,Manhattan-bound only,POINT (-73.990151 40.692404)
391,F09,274,606,IND,Queens Blvd,Court Sq-23 St,Q,False,E M,Subway,40.747846,-73.946,Outbound,Manhattan,2,1,0,Manhattan-bound only,POINT (-73.946 40.747846)
438,626,397,397,IRT,Lexington Av,86 St,M,False,4 5 6,Subway,40.779492,-73.955589,Uptown,Downtown,2,1,0,Uptown local only,POINT (-73.955589 40.779492)


In [39]:
#Aha, looking at the ADA notes column, it looks like 2 means that the station is accessible but only in one direction. 
#I'll make an executive decsion to include those as accessible for this analysis and explain that in the methodology

In [43]:
#Moving on, let's find out what percentage of stations are accessible

ada_stations = mta_stations[mta_stations.ADA == 1] + mta_stations[mta_stations.ADA == 2]
len(ada_stations)

155

In [45]:
#Now, just a little easy math to get a percent
percent_ada = len(ada_stations)/len(mta_stations) *100
percent_ada

32.631578947368425