## Calculate the nearest subway station

In [1]:
import numpy as np
import pandas as pd
import geopandas as gpd

from tqdm import tqdm
tqdm.pandas(desc="Applying function")

from shapely.geometry import Point

In [2]:
gdf_building = gpd.read_file('../../data/processed/building/building_manhattan_commercial_street.geojson')


In [3]:
gdf_building

Unnamed: 0,heightroof,mpluto_bbl,cnstrct_yr,globalid,bin,OfficeArea,RetailArea,ResArea,OBJECTID,StreetWidth_Min,StreetWidth_Max,POSTED_SPEED,betweeness,geometry
0,59.722628,1021210037,1910,{A0E56BCC-A86B-4CEF-9A42-9B4ECD61743F},1062896,0,2250,10344,100007,60.0,60.0,25,0.012116,"MULTIPOLYGON (((1001211.761 244524.798, 100123..."
1,155.49,1008870001,1935,{0B06F7A8-1F83-44F3-97DD-AC524A6374CA},1018457,50018,4000,0,101045,32.0,32.0,25,0.027505,"MULTIPOLYGON (((989135.970 210787.603, 989181...."
2,167.21446831,1013490035,1931,{A091951D-C73E-4B77-9A61-716C666C5446},1039988,2890,0,70110,99804,60.0,60.0,25,0.012333,"MULTIPOLYGON (((994153.480 215659.003, 994165...."
3,28.57,1010610006,1910,{2BE9F674-67E7-4A49-8535-F636A16D8DA5},1026714,0,0,0,103068,32.0,32.0,25,0.022920,"MULTIPOLYGON (((986858.328 218070.430, 986839...."
4,73.13,1003060013,1900,{2403088E-77A6-4071-8B5E-D54AFBC720D9},1003956,0,3800,0,92425,34.0,45.0,25,0.033656,"MULTIPOLYGON (((986180.396 200723.234, 986187...."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
37404,26.00608505,1003060022,2012,{BCEF4A7B-9A3B-45DE-BEBD-64809EAA3861},1077601,5000,5000,0,95865,40.0,40.0,25,0.020131,"MULTIPOLYGON (((986313.302 200636.585, 986311...."
37405,35.44835474,1016280057,1923,{B072371A-F903-41B1-B7AF-DA0BC4835B50},1051816,0,0,0,99663,30.0,30.0,25,0.039044,"MULTIPOLYGON (((998256.785 226794.479, 998271...."
37406,338.0,1000920030,2023,{5CDE3269-D9A3-4732-BBB0-AD92B46BDDF2},1091002,0,0,0,99112,20.0,22.0,25,0.010592,"MULTIPOLYGON (((982522.527 198275.305, 982453...."
37407,84.72,1000920032,1925,{5D66351B-DDFF-43FB-A05D-E5A3E861CE1B},1001278,23039,2000,0,112560,22.0,24.0,25,0.006406,"MULTIPOLYGON (((982550.394 198238.001, 982484...."


In [4]:
gdf_subway_ridership_2023 = gpd.read_file('../../data/processed/traffic/2023_ridership.geojson').to_crs(2263)
gdf_subway_ridership_2022 = gpd.read_file('../../data/processed/traffic/2022_ridership.geojson').to_crs(2263)


In [5]:
# spatial join between building and station
gdf_building_ridership_2023 = gdf_building.sjoin_nearest(gdf_subway_ridership_2023, distance_col='distance_from_station(ft)').drop('index_right', axis=1)

# because duplicated rows created during the spatial join process, I dropped
gdf_building_ridership_2023 = gdf_building_ridership_2023.drop_duplicates(subset=['bin','mpluto_bbl','globalid'])

In [6]:
# spatial join between building and station
gdf_building_ridership_2022 = gdf_building.sjoin_nearest(gdf_subway_ridership_2022, distance_col='distance_from_station(ft)').drop('index_right', axis=1)

# because duplicated rows created during the spatial join process, I dropped
gdf_building_ridership_2022 = gdf_building_ridership_2022.drop_duplicates(subset=['bin','mpluto_bbl','globalid'])

In [7]:
gdf_building_ridership_2023 = gdf_building_ridership_2023.loc[:,['bin','ridership_evening','ridership_late_night','ridership_midday','ridership_morning','ridership_night','distance_from_station(ft)']]
gdf_building_ridership_2022 = gdf_building_ridership_2022.loc[:,['bin','ridership_evening','ridership_late_night','ridership_midday','ridership_morning','ridership_night','distance_from_station(ft)']]

In [8]:
gdf_building_ridership_2023.loc[:,'year'] = 2023
gdf_building_ridership_2022.loc[:,'year'] = 2022

In [9]:
gdf_building_ridership = pd.concat([gdf_building_ridership_2023, gdf_building_ridership_2022], ignore_index=True)

In [11]:
gdf_building_ridership.to_csv('../../data/processed/traffic/ridership_by_bin.csv', index=False)

## Mapping AADT 2019 ~ 2021

Estimated AADT values for each building were calculated as the inverse distance weighted average 

In [13]:
gdf_aadt_2019 = gpd.read_file('../../data/raw/aadt/aadt_2019.geojson').to_crs(2263)
gdf_aadt_2020 = gpd.read_file('../../data/raw/aadt/aadt_2020.geojson').to_crs(2263)
gdf_aadt_2021 = gpd.read_file('../../data/raw/aadt/aadt_2021.geojson').to_crs(2263)


gdf_aadt_2019 =  gdf_aadt_2019.loc[np.logical_not(gdf_aadt_2019.loc[:,'AADT'].isnull())]
gdf_aadt_2020 =  gdf_aadt_2020.loc[np.logical_not(gdf_aadt_2020.loc[:,'MAT_ALH_2020_csv_AADT'].isnull())].rename(columns={'MAT_ALH_2020_csv_AADT':'AADT'})
gdf_aadt_2021 =  gdf_aadt_2021.loc[np.logical_not(gdf_aadt_2021.loc[:,'MAT_ALH_2021_csv_AADT'].isnull())].rename(columns={'MAT_ALH_2021_csv_AADT':'AADT'})

In [14]:
def calculate_idw_aadt_average(building, gdf_aadt):
    
    distances = gdf_aadt.distance(building)
    inverse_distance_weighted_traffic = np.average(gdf_aadt['AADT'], weights = 1/(distances + 1))

    return inverse_distance_weighted_traffic

In [15]:
gdf_building.loc[:,'idw_aadt_2019'] = gdf_building.loc[:,'geometry'].progress_apply(lambda x: calculate_idw_aadt_average(x, gdf_aadt_2019))

Applying function: 100%|██████████| 37409/37409 [06:50<00:00, 91.17it/s] 


In [16]:
gdf_building.loc[:,'idw_aadt_2020'] = gdf_building.loc[:,'geometry'].progress_apply(lambda x: calculate_idw_aadt_average(x, gdf_aadt_2020))

Applying function: 100%|██████████| 37409/37409 [02:53<00:00, 215.60it/s]


In [17]:
gdf_building.loc[:,'idw_aadt_2021'] = gdf_building.loc[:,'geometry'].progress_apply(lambda x: calculate_idw_aadt_average(x, gdf_aadt_2021))

Applying function: 100%|██████████| 37409/37409 [03:51<00:00, 161.33it/s]


In [18]:
gdf_building.head()

Unnamed: 0,heightroof,mpluto_bbl,cnstrct_yr,globalid,bin,OfficeArea,RetailArea,ResArea,OBJECTID,StreetWidth_Min,StreetWidth_Max,POSTED_SPEED,betweeness,geometry,idw_aadt_2019,idw_aadt_2020,idw_aadt_2021
0,59.722628,1021210037,1910,{A0E56BCC-A86B-4CEF-9A42-9B4ECD61743F},1062896,0,2250,10344,100007,60.0,60.0,25,0.012116,"MULTIPOLYGON (((1001211.761 244524.798, 100123...",13651.722938,15362.929888,17083.134233
1,155.49,1008870001,1935,{0B06F7A8-1F83-44F3-97DD-AC524A6374CA},1018457,50018,4000,0,101045,32.0,32.0,25,0.027505,"MULTIPOLYGON (((989135.970 210787.603, 989181....",10065.445841,10282.943569,11495.14408
2,167.21446831,1013490035,1931,{A091951D-C73E-4B77-9A61-716C666C5446},1039988,2890,0,70110,99804,60.0,60.0,25,0.012333,"MULTIPOLYGON (((994153.480 215659.003, 994165....",14437.629737,14445.844421,16299.638335
3,28.57,1010610006,1910,{2BE9F674-67E7-4A49-8535-F636A16D8DA5},1026714,0,0,0,103068,32.0,32.0,25,0.02292,"MULTIPOLYGON (((986858.328 218070.430, 986839....",10455.417903,10087.378012,11238.595524
4,73.13,1003060013,1900,{2403088E-77A6-4071-8B5E-D54AFBC720D9},1003956,0,3800,0,92425,34.0,45.0,25,0.033656,"MULTIPOLYGON (((986180.396 200723.234, 986187....",7632.299988,7433.877764,8351.143163


In [20]:
gdf_building_renamed = gdf_building.rename(columns={'heightroof':'height',
                                            'OfficeArea':'office_area',
                                            'RetailArea':'retail_area',
                                            'ResArea':'residential_area',
                                            'StreetWidth_Min':'street_width_min',
                                            'StreetWidth_Max':'street_width_max',
                                            'POSTED_SPEED':'posted_speed'})

## Mapping ATVC 2018 ~ 2020

In [33]:
gdf_atvc = gpd.read_file('../../data/raw/traffic_count/vehicle count.geojson').to_crs(2263)
gdf_atvc = gdf_atvc.loc[gdf_atvc.loc[:,'Yr']<2022]

In [34]:
def calculate_idw_atvc_average(building, gdf_atvc, year):
    gdf_tmp = gdf_atvc.loc[gdf_atvc.loc[:,'Yr']==year]
    distances = gdf_tmp.distance(building)
    inverse_distance_weighted_traffic = np.average(gdf_tmp['median'], weights = 1/(distances + 1))

    return inverse_distance_weighted_traffic

In [35]:
gdf_building_renamed.loc[:,'idw_atvc_2018'] = gdf_building_renamed.loc[:,'geometry'].progress_apply(lambda x: calculate_idw_atvc_average(x, gdf_atvc, 2018))

Applying function: 100%|██████████| 37409/37409 [00:44<00:00, 836.36it/s]


In [36]:
gdf_building_renamed.loc[:,'idw_atvc_2019'] = gdf_building_renamed.loc[:,'geometry'].progress_apply(lambda x: calculate_idw_atvc_average(x, gdf_atvc, 2019))

Applying function: 100%|██████████| 37409/37409 [00:50<00:00, 747.24it/s]


In [37]:
gdf_building_renamed.loc[:,'idw_atvc_2020'] = gdf_building_renamed.loc[:,'geometry'].progress_apply(lambda x: calculate_idw_atvc_average(x, gdf_atvc, 2020))

Applying function: 100%|██████████| 37409/37409 [00:32<00:00, 1168.34it/s]


In [22]:
gdf_building_renamed.drop('geometry', axis=1).to_csv('../../data/processed/building/building_240327_0626.csv', index=False)

In [20]:
gdf_building_renamed.to_file('../../data/processed/building/building_manhattan_commercial_vt.geojson', driver='GeoJSON')