In [1]:
import numpy as np
import pandas as pd
import geopandas as gpd

from tqdm import tqdm
tqdm.pandas(desc="Applying function")

## Filter with the Manhattan boundary

Please do not start this part if you have the `building_manhattan.geojson` file

In [2]:
# import dataset (from https://data.cityofnewyork.us/Housing-Development/Building-Footprints/nqwf-w8eh)
# currently we sampled entire dataset due to submit
gdf_building = gpd.read_file('../../data/raw/building/building_sample_raw.geojson', dtype={'mpluto_bbl':str})

In [3]:
# filtering buildings with the Manhattan boundary
gdf_manhattan = gpd.read_file('../../data/raw/boundary/Borough Boundaries.geojson')
gdf_manhattan = gdf_manhattan.loc[gdf_manhattan.loc[:,'boro_name'] == 'Manhattan']

gdf_building_manhattan = gpd.sjoin(gdf_building, gdf_manhattan, how='inner', op='intersects')
gdf_building_manhattan = gdf_building_manhattan.drop(['index_right', 'boro_code', 'boro_name', 'shape_leng'], axis=1)

  if await self.run_code(code, result, async_=asy):


## Filter by Land use

In [4]:
# import dataset (from https://www.nyc.gov/site/planning/data-maps/open-data/dwn-pluto-mappluto.page)
# currently we sampled entire dataset due to submit
gdf_pluto_manhattan = gpd.read_file('../../data/raw/landuse/pluto_sample.geojson', dtype={'BBL':str})

# currently not applied, filtering lots that has total commercial area more than 0
# gdf_pluto_manhattan_commercial = gdf_pluto_manhattan.loc[gdf_pluto_manhattan.loc[:,'ComArea']>0]
gdf_pluto_manhattan_commercial = gdf_pluto_manhattan.copy()
gdf_pluto_manhattan_commercial.loc[:,'BBL'] = gdf_pluto_manhattan_commercial.loc[:,'BBL'].astype('int64').astype(str)

 '1010160054']' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.
  gdf_pluto_manhattan_commercial.loc[:,'BBL'] = gdf_pluto_manhattan_commercial.loc[:,'BBL'].astype('int64').astype(str)


In [5]:
# merge the building footprint and mappluto data. Office, residential and retail area will be included in the building footprint
gdf_building_manhattan_commercial = gdf_building_manhattan.merge(pd.DataFrame(gdf_pluto_manhattan_commercial.loc[:,['BBL',
                                                                                                                    'OfficeArea',
                                                                                                                    'RetailArea',
                                                                                                                    'ResArea']]), left_on='mpluto_bbl', right_on='BBL', how='inner').drop('BBL', axis=1)

In [6]:
gdf_building_manhattan_commercial = gdf_building_manhattan_commercial.drop_duplicates(subset=['bin','globalid'])

In [7]:
gdf_building_manhattan_commercial

Unnamed: 0,name,base_bbl,heightroof,mpluto_bbl,cnstrct_yr,globalid,lststatype,feat_code,groundelev,geomsource,bin,lstmoddate,doitt_id,shape_len,geometry,shape_area,OfficeArea,RetailArea,ResArea
0,,1006100054,55.89,1006100054,1890,{3AEADBE0-CC54-4D24-B238-035EACC2FCA7},Constructed,2100,20,Photogramm,1010689,2020-01-28,181286,0.0,"MULTIPOLYGON (((-74.00001 40.73447, -74.00008 ...",636520502.758,0,1050,8294
2,,1014390003,43.48,1014390003,1920,{16E3A900-8D55-4377-ABC9-3CFEAD5AABEA},Constructed,2100,57,Photogramm,1044690,2017-08-22,597314,,POINT (-73.96166 40.76386),636520502.758,0,2175,6525
3,,1011480001,164.86,1011480001,1925,{A569DF64-C060-4E22-A45B-C3AAF3994CF0},Constructed,2100,79,Photogramm,1030169,2017-08-22,322539,0.0,"MULTIPOLYGON (((-73.97925 40.78140, -73.97927 ...",636520502.758,0,5000,118840
4,,1019660033,57.41,1019660033,1901,{FF1D608A-A8F9-4BED-8330-01D6E785BBDE},Constructed,2100,24,Photogramm,1084099,2017-08-22,630630,0.0,"MULTIPOLYGON (((-73.95580 40.81363, -73.95579 ...",636520502.758,0,3500,15130
5,,1019880018,80.0,1019880018,2017,{F3EEBE92-794C-4F0B-914A-86A784D1532B},Constructed,2100,105,Other (Man,1089415,2019-12-05,1290723,0.0,"MULTIPOLYGON (((-73.95401 40.81887, -73.95389 ...",636520502.758,2962,0,16905
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6143,,1022380001,66.64,1022380001,1952,{229CCEE9-7A0D-4559-B302-429BC53E7ECE},Constructed,2100,34,Photogramm,1064950,2017-08-22,367013,0.0,"MULTIPOLYGON (((-73.92504 40.86719, -73.92501 ...",636520502.758,1000,0,61964
6144,,1002030010,77.46,1002030010,1890,{8CEF68E8-47D9-4D24-817F-E25BDE985A47},Constructed,2100,41,Photogramm,1077584,2017-08-22,845537,0.0,"MULTIPOLYGON (((-73.99633 40.71730, -73.99633 ...",636520502.758,0,590,22664
6145,,1015300028,47.07,1015300028,1920,{FA55A8C7-2668-42F4-A6CE-AA4142E24646},Constructed,2100,66,Photogramm,1048743,2017-08-22,602526,0.0,"MULTIPOLYGON (((-73.95249 40.77712, -73.95265 ...",636520502.758,0,2282,6482
6146,,1004820002,76.1,1004820002,1888,{EDB5257F-422A-46F8-AD16-1650AB007E29},Constructed,2100,28,Photogramm,1007207,2020-01-28,165675,0.0,"MULTIPOLYGON (((-73.99834 40.72165, -73.99839 ...",636520502.758,0,2075,9445


In [8]:
gdf_building_manhattan_commercial = gdf_building_manhattan_commercial.drop(['name', 'base_bbl', 'lststatype', 'feat_code', 'groundelev','lstmoddate', 'doitt_id','geomsource'], axis=1)

In [9]:
gdf_building_manhattan_commercial.to_file('../../data/processed/building/building_sample.geojson', driver='GeoJSON')