# Preprocessing

In [2]:
import pandas as pd
import plotly.express as px
import plotly.graph_objs as go
from shapely.geometry import LineString
import folium

from shapely.geometry import Point

## Subway Entrances

In [20]:
# load data set from csv file
df_entries = pd.read_csv('data/DOITT_SUBWAY_ENTRANCE_01_13SEPT2010.csv')

In [21]:
df_entries

Unnamed: 0,OBJECTID,URL,NAME,the_geom,LINE
0,1734,http://web.mta.info/nyct/service/,Birchall Ave & Sagamore St at NW corner,POINT (-73.86835600032798 40.84916900104506),2-5
1,1735,http://web.mta.info/nyct/service/,Birchall Ave & Sagamore St at NE corner,POINT (-73.86821300022677 40.84912800131844),2-5
2,1736,http://web.mta.info/nyct/service/,Morris Park Ave & 180th St at NW corner,POINT (-73.87349900050798 40.84122300105249),2-5
3,1737,http://web.mta.info/nyct/service/,Morris Park Ave & 180th St at NW corner,POINT (-73.8728919997833 40.84145300067447),2-5
4,1738,http://web.mta.info/nyct/service/,Boston Rd & 178th St at SW corner,POINT (-73.87962300013866 40.84081500075867),2-5
...,...,...,...,...,...
1923,1928,http://web.mta.info/nyct/service/,2nd Ave & bet 83rd & 84th St,POINT (-73.95281423699555 40.77618770946792),Q
1924,1929,http://web.mta.info/nyct/service/,2nd Ave & 96th St at SW corner,POINT (-73.94747299691045 40.784098884955554),Q
1925,1930,http://web.mta.info/nyct/service/,2nd Ave & 95th St at NW corner,POINT (-73.94761889814153 40.783916088309326),Q
1926,1931,http://web.mta.info/nyct/service/,2nd Ave & 94th St at NE corner,POINT (-73.94772938539093 40.78292438898596),Q


In [22]:
# Convert the_geom column to Point objects
df_entries['geometry'] = df_entries['the_geom'].apply(lambda x: Point(float(x.split()[1][1:]), float(x.split()[2][:-1])))


# Extract the latitude and longitude values from the Point objects
df_entries['latitude'] = df_entries['geometry'].apply(lambda x: x.y)
df_entries['longitude'] = df_entries['geometry'].apply(lambda x: x.x)

# MAKE ALL columns lowercase
df_entries.columns = map(str.lower, df_entries.columns)

# select only objectid, name, latitude, longitude, line, and url columns
df_entries = df_entries[['objectid', 'name', 'latitude', 'longitude', 'line', 'url']]

In [23]:
df_entries

Unnamed: 0,objectid,name,latitude,longitude,line,url
0,1734,Birchall Ave & Sagamore St at NW corner,40.849169,-73.868356,2-5,http://web.mta.info/nyct/service/
1,1735,Birchall Ave & Sagamore St at NE corner,40.849128,-73.868213,2-5,http://web.mta.info/nyct/service/
2,1736,Morris Park Ave & 180th St at NW corner,40.841223,-73.873499,2-5,http://web.mta.info/nyct/service/
3,1737,Morris Park Ave & 180th St at NW corner,40.841453,-73.872892,2-5,http://web.mta.info/nyct/service/
4,1738,Boston Rd & 178th St at SW corner,40.840815,-73.879623,2-5,http://web.mta.info/nyct/service/
...,...,...,...,...,...,...
1923,1928,2nd Ave & bet 83rd & 84th St,40.776188,-73.952814,Q,http://web.mta.info/nyct/service/
1924,1929,2nd Ave & 96th St at SW corner,40.784099,-73.947473,Q,http://web.mta.info/nyct/service/
1925,1930,2nd Ave & 95th St at NW corner,40.783916,-73.947619,Q,http://web.mta.info/nyct/service/
1926,1931,2nd Ave & 94th St at NE corner,40.782924,-73.947729,Q,http://web.mta.info/nyct/service/


In [24]:
# export df_entries to csv file with name 'subway_entrances.csv'
df_entries.to_csv('data/subway_entrances_cleaned.csv', index=False)

In [25]:
df_entries.dtypes

objectid       int64
name          object
latitude     float64
longitude    float64
line          object
url           object
dtype: object

## Pedestrian Ramp Locations

In [36]:
# load data/raw/Pedestrian_Ramp_Locations.csv into df_ramps
df_ramps = pd.read_csv('data/raw/Pedestrian_Ramp_Locations.csv')

df_ramps.head()

Unnamed: 0,the_geom,CornerID,RampID,Ramp_OnStreet,GeoCyclora,Borough,StName1,StName2,CURB_REVEAL,RAMP_RUNNING_SLOPE_TOTAL,...,LND_CROSS_SLOPE,COUNTER_SLOPE,RAMP_WIDTH,RAMP_RIGHT_FLARE,RAMP_LEFT_FLARE,RAMP_LENGTH,RAMP_CROSS_SLOPE,PONDING,OBSTACLES_RAMP,OBSTACLES_LANDING
0,POINT (-73.89718748103968 40.83794631113792),1370422,340375,Crotona Ave,10/19/2019,2,CROTONA AVENUE,,999.0,5.5,...,2.2,4.9,60.0,8.8,60.0,3.3,4.3,No,,
1,POINT (-73.84141724237688 40.6976600503253),1041147,20066,86 AVENUE,08/26/2018,4,107 STREET,86 AVENUE,0.3,8.6,...,-1.7,-2.8,50.1,11.1,56.9,-15.9,-0.4,No,,
2,POINT (-73.95311099495017 40.628617251244854),1130090,2924,EAST 24 STREET,04/22/2018,3,AVENUE I,EAST 24 STREET,0.6,8.1,...,0.5,-7.9,54.4,7.6,51.9,-10.3,-1.4,No,,
3,POINT (-74.15032282138301 40.56883236113826),1152698,9863,ARTHUR KILL ROAD,03/15/2018,5,NEWVALE AVENUE,ARTHUR KILL ROAD,0.5,11.1,...,-0.1,3.0,25.9,17.6,44.0,-23.0,-6.5,No,,
4,POINT (-73.90543170653538 40.81543555191576),1004993,869,EAST 152 STREET,03/25/2018,2,EAST 152 STREET,TINTON AVENUE,0.5,0.6,...,-1.1,-3.4,49.0,6.8,60.4,-6.8,-1.6,No,,


In [37]:
# Convert the_geom column to Point objects
df_ramps['geometry'] = df_ramps['the_geom'].apply(lambda x: Point(float(x.split()[1][1:]), float(x.split()[2][:-1])))


# Extract the latitude and longitude values from the Point objects
df_ramps['latitude'] = df_ramps['geometry'].apply(lambda x: x.y)
df_ramps['longitude'] = df_ramps['geometry'].apply(lambda x: x.x)

# MAKE ALL columns lowercase
df_ramps.columns = map(str.lower, df_ramps.columns)

df_ramps.head()


Unnamed: 0,the_geom,cornerid,rampid,ramp_onstreet,geocyclora,borough,stname1,stname2,curb_reveal,ramp_running_slope_total,...,ramp_right_flare,ramp_left_flare,ramp_length,ramp_cross_slope,ponding,obstacles_ramp,obstacles_landing,geometry,latitude,longitude
0,POINT (-73.89718748103968 40.83794631113792),1370422,340375,Crotona Ave,10/19/2019,2,CROTONA AVENUE,,999.0,5.5,...,8.8,60.0,3.3,4.3,No,,,POINT (-73.89718748103968 40.83794631113792),40.837946,-73.897187
1,POINT (-73.84141724237688 40.6976600503253),1041147,20066,86 AVENUE,08/26/2018,4,107 STREET,86 AVENUE,0.3,8.6,...,11.1,56.9,-15.9,-0.4,No,,,POINT (-73.84141724237688 40.6976600503253),40.69766,-73.841417
2,POINT (-73.95311099495017 40.628617251244854),1130090,2924,EAST 24 STREET,04/22/2018,3,AVENUE I,EAST 24 STREET,0.6,8.1,...,7.6,51.9,-10.3,-1.4,No,,,POINT (-73.95311099495017 40.628617251244854),40.628617,-73.953111
3,POINT (-74.15032282138301 40.56883236113826),1152698,9863,ARTHUR KILL ROAD,03/15/2018,5,NEWVALE AVENUE,ARTHUR KILL ROAD,0.5,11.1,...,17.6,44.0,-23.0,-6.5,No,,,POINT (-74.15032282138301 40.56883236113826),40.568832,-74.150323
4,POINT (-73.90543170653538 40.81543555191576),1004993,869,EAST 152 STREET,03/25/2018,2,EAST 152 STREET,TINTON AVENUE,0.5,0.6,...,6.8,60.4,-6.8,-1.6,No,,,POINT (-73.90543170653538 40.81543555191576),40.815436,-73.905432


In [38]:
# select only rampid, latitude, longitude, borough columns
# df_ramps = df_ramps[['rampid', 'latitude', 'longitude', 'borough']]
# df_ramps.head()

### Get the name of the borought for each ramp

In [39]:
# Create a GeoDataFrame from the df_ramps DataFrame
geometry_ramps = [Point(xy) for xy in zip(df_ramps['longitude'], df_ramps['latitude'])]
ramps_gdf = gpd.GeoDataFrame(df_ramps, geometry=geometry_ramps)

# Perform a spatial join to get the borough for each ramp
ramps_with_borough = gpd.sjoin(ramps_gdf, boroughs_gdf, op='within', how='left')

# Now the ramps_with_borough GeoDataFrame contains a 'boro_name' column with the borough information


  if (await self.run_code(code, result,  async_=asy)):
Use `to_crs()` to reproject one of the input geometries to match the CRS of the other.

Left CRS: None
Right CRS: GEOGCS["WGS84(DD)",DATUM["WGS84",SPHEROID["WGS84", ...

  ramps_with_borough = gpd.sjoin(ramps_gdf, boroughs_gdf, op='within', how='left')


In [40]:
df_ramps = ramps_with_borough

In [41]:
df_ramps

Unnamed: 0,the_geom,cornerid,rampid,ramp_onstreet,geocyclora,borough,stname1,stname2,curb_reveal,ramp_running_slope_total,...,obstacles_ramp,obstacles_landing,geometry,latitude,longitude,index_right,boro_code,boro_name,shape_area,shape_leng
0,POINT (-73.89718748103968 40.83794631113792),1370422,340375,Crotona Ave,10/19/2019,2,CROTONA AVENUE,,999.0,5.5,...,,,POINT (-73.89719 40.83795),40.837946,-73.897187,1.0,2.0,Bronx,1.187175e+09,463179.772813
1,POINT (-73.84141724237688 40.6976600503253),1041147,20066,86 AVENUE,08/26/2018,4,107 STREET,86 AVENUE,0.3,8.6,...,,,POINT (-73.84142 40.69766),40.697660,-73.841417,4.0,4.0,Queens,3.041419e+09,888199.731579
2,POINT (-73.95311099495017 40.628617251244854),1130090,2924,EAST 24 STREET,04/22/2018,3,AVENUE I,EAST 24 STREET,0.6,8.1,...,,,POINT (-73.95311 40.62862),40.628617,-73.953111,3.0,3.0,Brooklyn,1.934143e+09,728197.541089
3,POINT (-74.15032282138301 40.56883236113826),1152698,9863,ARTHUR KILL ROAD,03/15/2018,5,NEWVALE AVENUE,ARTHUR KILL ROAD,0.5,11.1,...,,,POINT (-74.15032 40.56883),40.568832,-74.150323,0.0,5.0,Staten Island,1.623621e+09,325917.353702
4,POINT (-73.90543170653538 40.81543555191576),1004993,869,EAST 152 STREET,03/25/2018,2,EAST 152 STREET,TINTON AVENUE,0.5,0.6,...,,,POINT (-73.90543 40.81544),40.815436,-73.905432,1.0,2.0,Bronx,1.187175e+09,463179.772813
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
217674,POINT (-73.94425547934422 40.64701843757484),1120852,331956,TILDEN AVENUE,08/23/2018,3,TILDEN AVENUE,EAST 35 STREET,0.4,4.1,...,,,POINT (-73.94426 40.64702),40.647018,-73.944255,3.0,3.0,Brooklyn,1.934143e+09,728197.541089
217675,POINT (-73.86754972365252 40.68504205975545),1114724,314127,GRANT AVENUE,10/09/2018,3,GRANT AVENUE,FULTON STREET,0.6,7.8,...,,,POINT (-73.86755 40.68504),40.685042,-73.867550,3.0,3.0,Brooklyn,1.934143e+09,728197.541089
217676,POINT (-74.01137938635985 40.72509321627661),1174199,308262,WEST STREET,10/10/2018,1,WEST STREET,CANAL STREET,999.0,999.0,...,,,POINT (-74.01138 40.72509),40.725093,-74.011379,2.0,1.0,Manhattan,6.365208e+08,357564.316391
217677,POINT (-73.93121415620382 40.6606724644776),1118811,335025,UTICA AVENUE,04/12/2018,3,UTICA AVENUE,RUTLAND ROAD,1.3,5.9,...,,,POINT (-73.93121 40.66067),40.660672,-73.931214,3.0,3.0,Brooklyn,1.934143e+09,728197.541089


In [43]:
# number of rows per boro_name
df_ramps['boro_name'].value_counts()

Queens           79970
Brooklyn         61369
Bronx            29301
Manhattan        23626
Staten Island    23326
Name: boro_name, dtype: int64

In [None]:
# exlude rows with boro_name = 'Staten Island'
df_ramps = df_ramps[df_ramps['boro_name'] != 'Staten Island']

In [44]:
# export df_ramps to csv file with name 'ramps_cleaned.csv' in data/cleaned folder
df_ramps.to_csv('data/cleaned/ramps_cleaned.csv', index=False)

In [45]:
df_ramps.columns

Index(['the_geom', 'cornerid', 'rampid', 'ramp_onstreet', 'geocyclora',
       'borough', 'stname1', 'stname2', 'curb_reveal',
       'ramp_running_slope_total', 'dws_conditions', 'gutter_slope',
       'lnd_width', 'lnd_length', 'lnd_cross_slope', 'counter_slope',
       'ramp_width', 'ramp_right_flare', 'ramp_left_flare', 'ramp_length',
       'ramp_cross_slope', 'ponding', 'obstacles_ramp', 'obstacles_landing',
       'geometry', 'latitude', 'longitude', 'index_right', 'boro_code',
       'boro_name', 'shape_area', 'shape_leng'],
      dtype='object')

## Subway Stations

In [3]:
# load data/raw/DOITT_SUBWAY_STATION_01_13SEPT2010.csv in df_stations
df_stations = pd.read_csv('data/raw/DOITT_SUBWAY_STATION_01_13SEPT2010.csv')
df_stations.head()

Unnamed: 0,URL,OBJECTID,NAME,the_geom,LINE,NOTES
0,http://web.mta.info/nyct/service/,1,Astor Pl,POINT (-73.99106999861966 40.73005400028978),4-6-6 Express,"4 nights, 6-all times, 6 Express-weekdays AM s..."
1,http://web.mta.info/nyct/service/,2,Canal St,POINT (-74.00019299927328 40.71880300107709),4-6-6 Express,"4 nights, 6-all times, 6 Express-weekdays AM s..."
2,http://web.mta.info/nyct/service/,3,50th St,POINT (-73.98384899986625 40.76172799961419),1-2,"1-all times, 2-nights"
3,http://web.mta.info/nyct/service/,4,Bergen St,POINT (-73.97499915116808 40.68086213682956),2-3-4,"4-nights, 3-all other times, 2-all times"
4,http://web.mta.info/nyct/service/,5,Pennsylvania Ave,POINT (-73.89488591154061 40.66471445143568),3-4,"4-nights, 3-all other times"


In [4]:
# Convert the_geom column to Point objects
df_stations['geometry'] = df_stations['the_geom'].apply(lambda x: Point(float(x.split()[1][1:]), float(x.split()[2][:-1])))


# Extract the latitude and longitude values from the Point objects
df_stations['latitude'] = df_stations['geometry'].apply(lambda x: x.y)
df_stations['longitude'] = df_stations['geometry'].apply(lambda x: x.x)

# MAKE ALL columns lowercase
df_stations.columns = map(str.lower, df_stations.columns)

df_stations.head()

Unnamed: 0,url,objectid,name,the_geom,line,notes,geometry,latitude,longitude
0,http://web.mta.info/nyct/service/,1,Astor Pl,POINT (-73.99106999861966 40.73005400028978),4-6-6 Express,"4 nights, 6-all times, 6 Express-weekdays AM s...",POINT (-73.99106999861966 40.73005400028978),40.730054,-73.99107
1,http://web.mta.info/nyct/service/,2,Canal St,POINT (-74.00019299927328 40.71880300107709),4-6-6 Express,"4 nights, 6-all times, 6 Express-weekdays AM s...",POINT (-74.00019299927328 40.71880300107709),40.718803,-74.000193
2,http://web.mta.info/nyct/service/,3,50th St,POINT (-73.98384899986625 40.76172799961419),1-2,"1-all times, 2-nights",POINT (-73.98384899986625 40.76172799961419),40.761728,-73.983849
3,http://web.mta.info/nyct/service/,4,Bergen St,POINT (-73.97499915116808 40.68086213682956),2-3-4,"4-nights, 3-all other times, 2-all times",POINT (-73.97499915116808 40.68086213682956),40.680862,-73.974999
4,http://web.mta.info/nyct/service/,5,Pennsylvania Ave,POINT (-73.89488591154061 40.66471445143568),3-4,"4-nights, 3-all other times",POINT (-73.89488591154061 40.66471445143568),40.664714,-73.894886


In [5]:
# get number of distinct values per column
df_stations.nunique()

url            1
objectid     473
name         355
the_geom     473
line          66
notes        105
geometry     473
latitude     473
longitude    473
dtype: int64

In [6]:
# number of stations per name
df_stations.groupby('name').size()

name
103rd St                   3
103rd St - Corona Plaza    1
104th St                   1
104th-102nd Sts            1
110th St                   1
                          ..
Woodlawn                   1
Woodside - 61st St         1
World Trade Center         1
York St                    1
Zerega Ave                 1
Length: 355, dtype: int64

In [7]:
# number of non null values per column
df_stations.count()

url          473
objectid     473
name         473
the_geom     473
line         473
notes        473
geometry     473
latitude     473
longitude    473
dtype: int64

### Get the name of the borough per station

In [13]:
import geopandas as gpd
from shapely.geometry import Point

# Load the NYC borough boundaries shapefile
boroughs_gdf = gpd.read_file('data/raw/Borough Boundaries/geo_export_2df8fb35-1788-43b3-b328-b2a358ff06e6.shp')

# Create a GeoDataFrame from the df_stations DataFrame
geometry = [Point(xy) for xy in zip(df_stations['longitude'], df_stations['latitude'])]
stations_gdf = gpd.GeoDataFrame(df_stations, geometry=geometry)

# Perform a spatial join to get the borough for each station
stations_with_borough = gpd.sjoin(stations_gdf, boroughs_gdf, op='within', how='left')

# Now the stations_with_borough GeoDataFrame contains a 'boro_name' column with the borough information


  if (await self.run_code(code, result,  async_=asy)):
Use `to_crs()` to reproject one of the input geometries to match the CRS of the other.

Left CRS: None
Right CRS: GEOGCS["WGS84(DD)",DATUM["WGS84",SPHEROID["WGS84", ...

  stations_with_borough = gpd.sjoin(stations_gdf, boroughs_gdf, op='within', how='left')


In [15]:
df_stations = stations_with_borough

In [16]:
# get the number of non null values in each column
df_stations.count()

url            473
objectid       473
name           473
the_geom       473
line           473
notes          473
geometry       473
latitude       473
longitude      473
index_right    473
boro_code      473
boro_name      473
shape_area     473
shape_leng     473
dtype: int64

In [17]:
# export as csv file with name 'stations_cleaned.csv' in data/cleaned folder
df_stations.to_csv('data/cleaned/stations_cleaned.csv', index=False)

In [13]:
df_stations.shape

(473, 9)

# Subway ridership per station

In [18]:
# read data from excel file called data/raw/subway_ridership_by_station.xlsx from the sheet called 'Annual Total' into a dataframe discard first row
df_ridership = pd.read_excel('data/raw/subway_ridership_by_station.xlsx', sheet_name='Annual Total', skiprows=1)
df_ridership.head()

Unnamed: 0,Station (alphabetical by borough),*,Boro,2016,2017,2018,2019,2020,2021,2020-2021 Change,Unnamed: 10,2021 Rank
0,"138 St-Grand Concourse (4,5)",1.0,Bx,1070024.0,1036746.0,944598.0,1035878.0,371408.014,656866.0,285457.986,0.768583,323.0
1,"149 St-Grand Concourse (2,4,5)",,Bx,4381900.0,4255015.0,3972763.0,3931908.0,1815784.971,1832521.0,16736.029,0.009217,125.0
2,"161 St-Yankee Stadium (B,D,4)",,Bx,8784407.0,8596506.0,8392290.0,8254928.0,3221650.993,4077604.0,855953.007,0.265688,33.0
3,167 St (4),,Bx,3179087.0,2954228.0,2933140.0,2653237.0,1396286.968,1615072.0,218785.032,0.156691,143.0
4,"167 St (B,D)",2.0,Bx,3365748.0,3293451.0,2022919.0,2734530.0,1422149.009,1508270.0,86120.991,0.060557,164.0


In [19]:
# drop column *
df_ridership.drop(columns='*', inplace=True)

In [20]:
# rename column Station (alphabetical by borough) to Station
df_ridership.rename(columns={'Station (alphabetical by borough)': 'Station'}, inplace=True)

In [21]:
df_ridership.dtypes

Station              object
Boro                 object
2016                float64
2017                float64
2018                float64
2019                float64
2020                float64
2021                float64
2020-2021 Change    float64
Unnamed: 10         float64
2021 Rank           float64
dtype: object

In [22]:
# drop 2020-2021 Change, Unnamed: 10, 2021 Rank
df_ridership.drop(columns=['2020-2021 Change', 'Unnamed: 10', '2021 Rank'], inplace=True)
df_ridership.dtypes

Station     object
Boro        object
2016       float64
2017       float64
2018       float64
2019       float64
2020       float64
2021       float64
dtype: object

In [26]:
# get unique values in Boro column
df_ridership['Boro'].unique()

array(['Bx', 'B', 'M', 'Q'], dtype=object)

In [24]:
# count rows with null values per column
df_ridership.isnull().sum()

Station    2
Boro       9
2016       6
2017       3
2018       3
2019       4
2020       4
2021       4
dtype: int64

In [25]:
# drop all the rows with null values
df_ridership.dropna(inplace=True)

In [27]:
# Define the mapping of Boro values to borough names
boro_mapping = {
    'Bx': 'Bronx',
    'B': 'Brooklyn',
    'M': 'Manhattan',
    'Q': 'Queens',
}

# Create a new column boro_name by replacing the Boro values with their corresponding borough names
df_ridership['boro_name'] = df_ridership['Boro'].replace(boro_mapping)

In [28]:
# unique values in boro_name column
df_ridership['boro_name'].unique()

array(['Bronx', 'Brooklyn', 'Manhattan', 'Queens'], dtype=object)

In [32]:
df_ridership.dtypes

Station       object
Boro          object
2016         float64
2017         float64
2018         float64
2019         float64
2020         float64
2021         float64
boro_name     object
dtype: object

In [33]:
# creeate a column average_ridership by taking the mean of the 2016, 2017, 2018, 2019, and 2020 columns
df_ridership['average_ridership'] = df_ridership[[2016, 2017, 2018, 2019, 2020]].mean(axis=1)

In [34]:
df_ridership.head()

Unnamed: 0,Station,Boro,2016,2017,2018,2019,2020,2021,boro_name,average_ridership
0,"138 St-Grand Concourse (4,5)",Bx,1070024.0,1036746.0,944598.0,1035878.0,371408.014,656866.0,Bronx,891730.8
1,"149 St-Grand Concourse (2,4,5)",Bx,4381900.0,4255015.0,3972763.0,3931908.0,1815784.971,1832521.0,Bronx,3671474.0
2,"161 St-Yankee Stadium (B,D,4)",Bx,8784407.0,8596506.0,8392290.0,8254928.0,3221650.993,4077604.0,Bronx,7449956.0
3,167 St (4),Bx,3179087.0,2954228.0,2933140.0,2653237.0,1396286.968,1615072.0,Bronx,2623196.0
4,"167 St (B,D)",Bx,3365748.0,3293451.0,2022919.0,2734530.0,1422149.009,1508270.0,Bronx,2567759.0


In [35]:
# export to csv file with the name subway_ridership_by_station_cleaned.csv and put on the data_cleaned folder
df_ridership.to_csv('data/cleaned/subway_ridership_by_station_cleaned.csv', index=False)