## Calculate the nearest subway station

In [5]:
import numpy as np
import pandas as pd
import geopandas as gpd

from tqdm import tqdm
tqdm.pandas(desc="Applying function")

from shapely.geometry import Point

In [6]:
gdf_building = gpd.read_file('../../data/processed/building/building_manhattan_commercial_street.geojson')


In [7]:
gdf_subway_ridership = gpd.read_file('../../data/processed/traffic/2023_subway_ridership.geojson')


In [9]:
gdf_subway_ridership =gdf_subway_ridership.to_crs(2263)

In [10]:
# spatial join between building and station
gdf_building = gdf_building.sjoin_nearest(gdf_subway_ridership, distance_col='distance_from_station(ft)').drop('index_right', axis=1)

# because duplicated rows created during the spatial join process, I dropped
gdf_building = gdf_building.drop_duplicates(subset=['bin','mpluto_bbl','globalid'])

In [12]:
gdf_building.head()

Unnamed: 0,heightroof,mpluto_bbl,cnstrct_yr,globalid,bin,OfficeArea,RetailArea,ResArea,OBJECTID,StreetWidth_Min,...,POSTED_SPEED,betweeness,geometry,station_complex_id,ridership_evening,ridership_late_night,ridership_midday,ridership_morning,ridership_night,distance_from_station(ft)
0,59.722628,1021210037,1910,{A0E56BCC-A86B-4CEF-9A42-9B4ECD61743F},1062896,0,2250,10344,100007,60.0,...,25,0.012116,"MULTIPOLYGON (((1001211.761 244524.798, 100123...",149,54.058545,22.605253,104.186125,104.287653,19.346238,705.651652
76,60.08900155,1021110006,1904,{B585F6C4-2EF9-4E6A-B4FF-9AED827BD2E6},1062631,0,1500,7800,106805,60.0,...,25,0.010886,"MULTIPOLYGON (((1001321.856 244431.087, 100132...",149,54.058545,22.605253,104.186125,104.287653,19.346238,689.064718
128,58.51687336,1021210051,1900,{FB98F3AC-C161-41D7-9477-EBF46687D23E},1062903,0,0,10000,105281,30.0,...,25,0.022044,"MULTIPOLYGON (((1001100.355 244282.364, 100108...",149,54.058545,22.605253,104.186125,104.287653,19.346238,470.652445
149,29.24217924,1021210056,1920,{00BC7F46-F3AA-4136-8905-F3C36AD18692},1062908,0,2000,0,105250,30.0,...,25,0.008088,"MULTIPOLYGON (((1000940.729 244197.656, 100093...",149,54.058545,22.605253,104.186125,104.287653,19.346238,319.230087
301,68.12359913,1021180043,1916,{95AB7455-3C25-4B68-9C92-2890103102EB},1062799,0,1300,14258,116745,30.0,...,25,0.004009,"MULTIPOLYGON (((1000408.128 243279.307, 100041...",149,54.058545,22.605253,104.186125,104.287653,19.346238,661.720732


## Mapping AADT 2019 ~ 2021

Estimated AADT values for each building were calculated as the inverse distance weighted average 

In [13]:
gdf_aadt_2019 = gpd.read_file('../../data/raw/aadt/aadt_2019.geojson').to_crs(2263)
gdf_aadt_2020 = gpd.read_file('../../data/raw/aadt/aadt_2020.geojson').to_crs(2263)
gdf_aadt_2021 = gpd.read_file('../../data/raw/aadt/aadt_2021.geojson').to_crs(2263)


gdf_aadt_2019 =  gdf_aadt_2019.loc[np.logical_not(gdf_aadt_2019.loc[:,'AADT'].isnull())]
gdf_aadt_2020 =  gdf_aadt_2020.loc[np.logical_not(gdf_aadt_2020.loc[:,'MAT_ALH_2020_csv_AADT'].isnull())].rename(columns={'MAT_ALH_2020_csv_AADT':'AADT'})
gdf_aadt_2021 =  gdf_aadt_2021.loc[np.logical_not(gdf_aadt_2021.loc[:,'MAT_ALH_2021_csv_AADT'].isnull())].rename(columns={'MAT_ALH_2021_csv_AADT':'AADT'})

In [14]:
def calculate_idw_aadt_average(building, gdf_aadt):
    
    distances = gdf_aadt.distance(building)
    inverse_distance_weighted_traffic = np.average(gdf_aadt['AADT'], weights = 1/(distances + 1))

    return inverse_distance_weighted_traffic

In [15]:
gdf_building.loc[:,'idw_aadt_2019'] = gdf_building.loc[:,'geometry'].progress_apply(lambda x: calculate_idw_aadt_average(x, gdf_aadt_2019))

Applying function:   0%|          | 0/21123 [00:00<?, ?it/s]

Applying function: 100%|██████████| 21123/21123 [05:20<00:00, 65.82it/s] 


In [16]:
gdf_building.loc[:,'idw_aadt_2020'] = gdf_building.loc[:,'geometry'].progress_apply(lambda x: calculate_idw_aadt_average(x, gdf_aadt_2020))

Applying function: 100%|██████████| 21123/21123 [02:00<00:00, 175.59it/s]


In [17]:
gdf_building.loc[:,'idw_aadt_2021'] = gdf_building.loc[:,'geometry'].progress_apply(lambda x: calculate_idw_aadt_average(x, gdf_aadt_2021))

Applying function: 100%|██████████| 21123/21123 [02:02<00:00, 173.06it/s]


In [18]:
gdf_building.head()

Unnamed: 0,heightroof,mpluto_bbl,cnstrct_yr,globalid,bin,OfficeArea,RetailArea,ResArea,OBJECTID,StreetWidth_Min,...,station_complex_id,ridership_evening,ridership_late_night,ridership_midday,ridership_morning,ridership_night,distance_from_station(ft),idw_aadt_2019,idw_aadt_2020,idw_aadt_2021
0,59.722628,1021210037,1910,{A0E56BCC-A86B-4CEF-9A42-9B4ECD61743F},1062896,0,2250,10344,100007,60.0,...,149,54.058545,22.605253,104.186125,104.287653,19.346238,705.651652,13651.722938,15362.929888,17083.134233
76,60.08900155,1021110006,1904,{B585F6C4-2EF9-4E6A-B4FF-9AED827BD2E6},1062631,0,1500,7800,106805,60.0,...,149,54.058545,22.605253,104.186125,104.287653,19.346238,689.064718,13731.279224,15647.567223,17392.770481
128,58.51687336,1021210051,1900,{FB98F3AC-C161-41D7-9477-EBF46687D23E},1062903,0,0,10000,105281,30.0,...,149,54.058545,22.605253,104.186125,104.287653,19.346238,470.652445,13519.132539,14916.654943,16589.235098
149,29.24217924,1021210056,1920,{00BC7F46-F3AA-4136-8905-F3C36AD18692},1062908,0,2000,0,105250,30.0,...,149,54.058545,22.605253,104.186125,104.287653,19.346238,319.230087,12179.309481,12851.405406,14258.076357
301,68.12359913,1021180043,1916,{95AB7455-3C25-4B68-9C92-2890103102EB},1062799,0,1300,14258,116745,30.0,...,149,54.058545,22.605253,104.186125,104.287653,19.346238,661.720732,14385.750089,16279.141498,18138.409553


In [19]:
gdf_building_renamed = gdf_building.rename(columns={'heightroof':'height',
                                            'OfficeArea':'office_area',
                                            'RetailArea':'retail_area',
                                            'ResArea':'residential_area',
                                            'StreetWidth_Min':'street_width_min',
                                            'StreetWidth_Max':'street_width_max',
                                            'POSTED_SPEED':'posted_speed'})

## Mapping ATVC 2018 ~ 2020

In [20]:
gdf_atvc = gpd.read_file('../../data/raw/traffic_count/vehicle count.geojson').to_crs(2263)
gdf_atvc = gdf_atvc.loc[gdf_atvc.loc[:,'Yr']<2022]

In [21]:
def calculate_idw_atvc_average(building, gdf_atvc, year):
    gdf_tmp = gdf_atvc.loc[gdf_atvc.loc[:,'Yr']==year]
    distances = gdf_tmp.distance(building)
    inverse_distance_weighted_traffic = np.average(gdf_tmp['median'], weights = 1/(distances + 1))

    return inverse_distance_weighted_traffic

In [22]:
gdf_building_renamed.loc[:,'idw_atvc_2018'] = gdf_building_renamed.loc[:,'geometry'].progress_apply(lambda x: calculate_idw_atvc_average(x, gdf_atvc, 2018))

Applying function: 100%|██████████| 21123/21123 [00:36<00:00, 579.04it/s]


In [23]:
gdf_building_renamed.loc[:,'idw_atvc_2019'] = gdf_building_renamed.loc[:,'geometry'].progress_apply(lambda x: calculate_idw_atvc_average(x, gdf_atvc, 2019))

Applying function: 100%|██████████| 21123/21123 [00:44<00:00, 470.78it/s]


In [24]:
gdf_building_renamed.loc[:,'idw_atvc_2020'] = gdf_building_renamed.loc[:,'geometry'].progress_apply(lambda x: calculate_idw_atvc_average(x, gdf_atvc, 2020))

Applying function: 100%|██████████| 21123/21123 [00:24<00:00, 871.08it/s] 


In [29]:
gdf_building_renamed.drop('geometry', axis=1).to_csv('../../data/processed/building/building_240324_1726.csv', index=False)

In [30]:
gdf_building_renamed.to_file('../../data/processed/building/building_manhattan_commercial_vt.geojson', driver='GeoJSON')

In [31]:
gdf_building_renamed

Unnamed: 0,height,mpluto_bbl,cnstrct_yr,globalid,bin,office_area,retail_area,residential_area,OBJECTID,street_width_min,...,ridership_midday,ridership_morning,ridership_night,distance_from_station(ft),idw_aadt_2019,idw_aadt_2020,idw_aadt_2021,idw_atvc_2018,idw_atvc_2019,idw_atvc_2020
0,59.722628,1021210037,1910,{A0E56BCC-A86B-4CEF-9A42-9B4ECD61743F},1062896,0,2250,10344,100007,60.0,...,104.186125,104.287653,19.346238,705.651652,13651.722938,15362.929888,17083.134233,110.061311,140.829357,332.410589
76,60.08900155,1021110006,1904,{B585F6C4-2EF9-4E6A-B4FF-9AED827BD2E6},1062631,0,1500,7800,106805,60.0,...,104.186125,104.287653,19.346238,689.064718,13731.279224,15647.567223,17392.770481,111.320436,143.367502,332.935169
128,58.51687336,1021210051,1900,{FB98F3AC-C161-41D7-9477-EBF46687D23E},1062903,0,0,10000,105281,30.0,...,104.186125,104.287653,19.346238,470.652445,13519.132539,14916.654943,16589.235098,110.682936,142.131034,328.194496
149,29.24217924,1021210056,1920,{00BC7F46-F3AA-4136-8905-F3C36AD18692},1062908,0,2000,0,105250,30.0,...,104.186125,104.287653,19.346238,319.230087,12179.309481,12851.405406,14258.076357,110.494014,142.546043,325.820664
301,68.12359913,1021180043,1916,{95AB7455-3C25-4B68-9C92-2890103102EB},1062799,0,1300,14258,116745,30.0,...,104.186125,104.287653,19.346238,661.720732,14385.750089,16279.141498,18138.409553,103.508549,144.263362,311.721667
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8750,443.16,1000210001,1913,{3132397C-BB45-4776-BB37-7F473F482A5D},1000808,633379,15361,0,97286,5.0,...,147.077932,71.685322,40.327193,41.775053,9170.827823,8814.729210,9848.986714,89.726629,150.104876,360.403070
8951,402.05,1000200009,1983,{41A67EE5-56E9-4A54-8D5B-C6F690BEED6E},1000804,364483,3832,0,112549,30.0,...,147.077932,71.685322,40.327193,168.938190,10065.608960,10014.972462,11162.886474,94.655245,146.524023,359.554874
11444,379.41,1000200001,1931,{8E8CB43B-24D6-439E-A768-AA350F645EEC},1000802,312250,3900,0,112930,15.0,...,147.077932,71.685322,40.327193,333.470248,10231.526899,10324.750407,11506.268296,87.441905,139.617030,358.932692
18955,50.0,1000197501,1810,{641D894B-C433-4096-931D-DFDE209D8556},1000796,0,7542,138018,92093,32.0,...,147.077932,71.685322,40.327193,53.790295,11211.305428,11283.125574,12564.821672,96.122324,151.304396,357.264371
