In [204]:
import pandas as pd
import numpy as np
from shapely.geometry import Point, Polygon
import geopandas as gpd

In [2]:
# Source - https://goo.gl/8CuVQF
stops_filepath = 'NYC_Transit_Subway_Entrance_And_Exit_Data_API.csv'
stops = pd.read_csv(stops_filepath)

In [3]:
# Concatenating all routes available
stops['Routes'] = (stops.Route1.astype(str)+\
    stops.Route2.astype(str)+\
    stops.Route3.astype(str)+\
    stops.Route4.astype(str)+\
    stops.Route5.astype(str)+\
    stops.Route6.astype(str)+\
    stops.Route7.astype(str)+\
    stops.Route8.astype(str)+\
    stops.Route9.astype(str)+\
    stops.Route10.astype(str)+\
    stops.Route11.astype(str)).str.replace('nan','').str.replace('.0','').str.strip()

In [4]:
stops = stops[['Division','Station Name',
                           'Station Latitude','Station Longitude','Routes']]

In [5]:
station_latlon = stops.groupby(['Division','Station Name',
                           'Station Latitude','Station Longitude'])\
            .agg({'Routes':lambda x:x.iloc[0]}).reset_index()

In [6]:
geometry = [Point(xy) for xy in zip(station_latlon['Station Longitude'],station_latlon['Station Latitude'])]
crs = {'init': 'epsg:4326'}
gdf = gpd.GeoDataFrame(station_latlon, crs=crs, geometry=geometry)

In [7]:
# Source - https://goo.gl/L8pQn7
borough_fp = 'boroughs.geojson'
boroughs = gpd.read_file(borough_fp)

In [8]:
manhattan = boroughs.iloc[3,4]

In [9]:
# Boolean column for points within Manhattan
gdf['mh_bool'] = gdf.geometry.within(manhattan)

In [10]:
# Dedupe stations once more (there seem to be multiple per station)
manhattan_stations = gdf[gdf['mh_bool']].groupby(['Station Name','Routes'])\
        .agg({'Division':lambda x: x.iloc[0],
              'Station Latitude':lambda x: x.iloc[0],
              'Station Longitude':lambda x: x.iloc[0]}).reset_index().rename(
            columns={'Station Name':'station_name','Routes':'routes','Division':'division',
                     'Station Latitude':'lat','Station Longitude':'lon'})

In [11]:
# Export CSV
manhattan_stations.to_csv('mh_station_coords.csv',encoding='utf-8',index=False)