In [1]:
import pandas as pd
import geopandas as gpd

from shapely.geometry import Point

## Filter with the Manhattan boundary

Please do not start this part if you have the `building_manhattan.geojson` file

In [2]:
# import dataset (from https://data.cityofnewyork.us/Housing-Development/Building-Footprints/nqwf-w8eh)
gdf_building = gpd.read_file('../../data/raw/building/Building Footprints.geojson')

In [None]:
# filtering buildings with the Manhattan boundary
gdf_manhattan = gpd.read_file('../../data/raw/boundary/Borough Boundaries.geojson')
gdf_manhattan = gdf_manhattan.loc[gdf_manhattan.loc[:,'boro_name'] == 'Manhattan']

gdf_building_manhattan = gpd.sjoin(gdf_building, gdf_manhattan, how='inner', op='intersects')

In [None]:
gdf_building_manhattan.to_file('../../data/raw/building/building_manhattan.geojson', driver='GeoJSON')

## Calculate the nearest street

In [2]:
gdf_building_manhattan = gpd.read_file('../../data/raw/building/building_manhattan.geojson')
gdf_streets = gpd.read_file('../../data/processed/street_network.geojson')

gdf_building_manhattan = gdf_building_manhattan.drop(['shape_len','index_right', 'boro_code', 'boro_name', 'shape_area_right','shape_leng'], axis=1)

In [3]:
# unify the coordinates systems
gdf_building_manhattan = gdf_building_manhattan.to_crs(2263)
gdf_streets.crs == gdf_building_manhattan.crs

True

In [4]:
gdf_building_manhattan = gdf_building_manhattan.sjoin_nearest(gdf_streets.loc[:,['OBJECTID','StreetWidth_Min','StreetWidth_Max','POSTED_SPEED','geometry']])

# because duplicated rows created during the spatial join process, I dropped
gdf_building_manhattan = gdf_building_manhattan.drop_duplicates(subset=['base_bbl','mpluto_bbl','globalid']).drop('index_right', axis=1)

In [5]:
gdf_building_manhattan.head()

Unnamed: 0,name,base_bbl,shape_area_left,heightroof,mpluto_bbl,cnstrct_yr,globalid,lststatype,feat_code,groundelev,geomsource,bin,lstmoddate,doitt_id,geometry,OBJECTID,StreetWidth_Min,StreetWidth_Max,POSTED_SPEED
0,,1021210037,0.0,59.722628,1021210037,1910,{A0E56BCC-A86B-4CEF-9A42-9B4ECD61743F},Constructed,2100,154,Photogramm,1062896,2017-08-22,708881,"MULTIPOLYGON (((1001211.761 244524.798, 100123...",100007,60.0,60.0,25
1,,1008870001,0.0,155.49,1008870001,1935,{0B06F7A8-1F83-44F3-97DD-AC524A6374CA},Constructed,2100,37,Photogramm,1018457,2017-08-22,584238,"MULTIPOLYGON (((989135.970 210787.603, 989181....",101045,32.0,32.0,25
2,,1013490035,0.0,167.21446831,1013490035,1931,{A091951D-C73E-4B77-9A61-716C666C5446},Constructed,2100,52,Photogramm,1039988,2017-08-22,66240,"MULTIPOLYGON (((994153.480 215659.003, 994165....",99804,60.0,60.0,25
3,,1010610006,0.0,28.57,1010610006,1910,{2BE9F674-67E7-4A49-8535-F636A16D8DA5},Constructed,2100,40,Photogramm,1026714,2017-08-22,23730,"MULTIPOLYGON (((986858.328 218070.430, 986839....",103068,32.0,32.0,25
4,,1003060013,0.0,73.13,1003060013,1900,{2403088E-77A6-4071-8B5E-D54AFBC720D9},Constructed,2100,39,Photogramm,1003956,2017-08-22,512541,"MULTIPOLYGON (((986180.396 200723.234, 986187....",92425,34.0,45.0,25


## Calculate the nearest subway station

In [6]:
df_station = pd.read_csv('../../data/raw/mta_station/MTA_Subway_Stations.csv')

In [7]:
points = [Point(float(lat),float(long)) for lat, long in zip(df_station.loc[:,'GTFS Longitude'], df_station.loc[:,'GTFS Latitude'])]
gdf_station = gpd.GeoDataFrame(df_station, geometry=points, crs=4326)
gdf_station = gdf_station.to_crs(2263)

In [8]:
gdf_building_manhattan = gdf_building_manhattan.sjoin_nearest(gdf_station.loc[:,['GTFS Stop ID','geometry']], distance_col='distance_from_station(ft)').drop('index_right', axis=1)
# because duplicated rows created during the spatial join process, I dropped
gdf_building_manhattan = gdf_building_manhattan.drop_duplicates(subset=['base_bbl','mpluto_bbl','globalid'])

In [9]:
gdf_building_manhattan = gdf_building_manhattan.drop(['name','mpluto_bbl','OBJECTID','globalid','base_bbl','shape_area_left','cnstrct_yr','lststatype','feat_code', 'groundelev','geomsource', 'bin', 'lstmoddate', 'doitt_id'], axis=1)

In [10]:
gdf_building_manhattan.to_crs(4326).to_file('../../data/processed/building.geojson', driver='GeoJSON')

In [15]:
gdf_building_manhattan

Unnamed: 0,heightroof,mpluto_bbl,globalid,geometry,OBJECTID,StreetWidth_Min,StreetWidth_Max,POSTED_SPEED,GTFS Stop ID,distance_from_station(ft)
0,59.722628,1021210037,{A0E56BCC-A86B-4CEF-9A42-9B4ECD61743F},"MULTIPOLYGON (((1001211.761 244524.798, 100123...",100007,60.0,60.0,25,A10,705.429228
1,155.49,1008870001,{0B06F7A8-1F83-44F3-97DD-AC524A6374CA},"MULTIPOLYGON (((989135.970 210787.603, 989181....",101045,32.0,32.0,25,632,302.555369
2,167.21446831,1013490035,{A091951D-C73E-4B77-9A61-716C666C5446},"MULTIPOLYGON (((994153.480 215659.003, 994165....",99804,60.0,60.0,25,F11,1378.469618
3,28.57,1010610006,{2BE9F674-67E7-4A49-8535-F636A16D8DA5},"MULTIPOLYGON (((986858.328 218070.430, 986839....",103068,32.0,32.0,25,A25,1620.005364
4,73.13,1003060013,{2403088E-77A6-4071-8B5E-D54AFBC720D9},"MULTIPOLYGON (((986180.396 200723.234, 986187....",92425,34.0,45.0,25,D22,292.273497
...,...,...,...,...,...,...,...,...,...,...
45397,125.0,1016000020,{EF73AB4D-5006-4406-ADF9-2FB65FC7B2C8},"MULTIPOLYGON (((998830.388 231258.315, 998700....",123643,,,,226,534.072735
45398,338.0,1000920030,{5CDE3269-D9A3-4732-BBB0-AD92B46BDDF2},"MULTIPOLYGON (((982522.527 198275.305, 982453....",99112,20.0,22.0,25,M22,311.235059
45399,84.72,1000920032,{5D66351B-DDFF-43FB-A05D-E5A3E861CE1B},"MULTIPOLYGON (((982550.394 198238.001, 982484....",112560,22.0,24.0,25,M22,330.921614
45400,53.48,1017080003,{1B8DDD75-71EA-44FA-A2DD-213D6A98D390},"MULTIPOLYGON (((1002022.923 228852.265, 100200...",108290,70.0,70.0,25,622,2053.559959
