## Calculate the nearest subway station

In [1]:
import numpy as np
import pandas as pd
import geopandas as gpd

from tqdm import tqdm
tqdm.pandas(desc="Applying function")

from shapely.geometry import Point

In [2]:
gdf_building = gpd.read_file('../../data/processed/building/building_manhattan_commercial.geojson')


In [3]:
df_station = pd.read_csv('../../data/raw/mta_station/MTA_Subway_Stations.csv')

points = [Point(float(lat),float(long)) for lat, long in zip(df_station.loc[:,'GTFS Longitude'], df_station.loc[:,'GTFS Latitude'])]
gdf_station = gpd.GeoDataFrame(df_station, geometry=points, crs=4326)
gdf_station = gdf_station.to_crs(2263)

In [4]:
# spatial join between building and station
gdf_building = gdf_building.sjoin_nearest(gdf_station.loc[:,['GTFS Stop ID','geometry']], distance_col='distance_from_station(ft)').drop('index_right', axis=1)

# because duplicated rows created during the spatial join process, I dropped
gdf_building = gdf_building.drop_duplicates(subset=['bin','mpluto_bbl','globalid'])

In [6]:
gdf_building.head()

Unnamed: 0,heightroof,mpluto_bbl,cnstrct_yr,globalid,bin,OfficeArea,RetailArea,ResArea,OBJECTID,StreetWidth_Min,StreetWidth_Max,POSTED_SPEED,betweeness,geometry,GTFS Stop ID,distance_from_station(ft)
0,59.722628,1021210037,1910,{A0E56BCC-A86B-4CEF-9A42-9B4ECD61743F},1062896,0,2250,10344,100007,60.0,60.0,25,0.012116,"MULTIPOLYGON (((1001211.761 244524.798, 100123...",A10,705.429228
1,155.49,1008870001,1935,{0B06F7A8-1F83-44F3-97DD-AC524A6374CA},1018457,50018,4000,0,101045,32.0,32.0,25,0.027505,"MULTIPOLYGON (((989135.970 210787.603, 989181....",632,302.555369
2,167.21446831,1013490035,1931,{A091951D-C73E-4B77-9A61-716C666C5446},1039988,2890,0,70110,99804,60.0,60.0,25,0.012333,"MULTIPOLYGON (((994153.480 215659.003, 994165....",F11,1378.469618
3,28.57,1010610006,1910,{2BE9F674-67E7-4A49-8535-F636A16D8DA5},1026714,0,0,0,103068,32.0,32.0,25,0.02292,"MULTIPOLYGON (((986858.328 218070.430, 986839....",A25,1620.005364
4,73.13,1003060013,1900,{2403088E-77A6-4071-8B5E-D54AFBC720D9},1003956,0,3800,0,92425,34.0,45.0,25,0.033656,"MULTIPOLYGON (((986180.396 200723.234, 986187....",D22,292.273497


## Mapping AADT 2019 ~ 2021

Estimated AADT values for each building were calculated as the inverse distance weighted average 

In [7]:
gdf_aadt_2019 = gpd.read_file('../../data/raw/aadt/aadt_2019.geojson').to_crs(2263)
gdf_aadt_2020 = gpd.read_file('../../data/raw/aadt/aadt_2020.geojson').to_crs(2263)
gdf_aadt_2021 = gpd.read_file('../../data/raw/aadt/aadt_2021.geojson').to_crs(2263)


gdf_aadt_2019 =  gdf_aadt_2019.loc[np.logical_not(gdf_aadt_2019.loc[:,'AADT'].isnull())]
gdf_aadt_2020 =  gdf_aadt_2020.loc[np.logical_not(gdf_aadt_2020.loc[:,'MAT_ALH_2020_csv_AADT'].isnull())].rename(columns={'MAT_ALH_2020_csv_AADT':'AADT'})
gdf_aadt_2021 =  gdf_aadt_2021.loc[np.logical_not(gdf_aadt_2021.loc[:,'MAT_ALH_2021_csv_AADT'].isnull())].rename(columns={'MAT_ALH_2021_csv_AADT':'AADT'})

In [8]:
def calculate_idw_aadt_average(building, gdf_aadt):
    
    distances = gdf_aadt.distance(building)
    inverse_distance_weighted_traffic = np.average(gdf_aadt['AADT'], weights = 1/(distances + 1))

    return inverse_distance_weighted_traffic

In [9]:
gdf_building.loc[:,'idw_aadt_2019'] = gdf_building.loc[:,'geometry'].progress_apply(lambda x: calculate_idw_aadt_average(x, gdf_aadt_2019))

Applying function: 100%|██████████| 21123/21123 [10:44<00:00, 32.75it/s]


In [10]:
gdf_building.loc[:,'idw_aadt_2020'] = gdf_building.loc[:,'geometry'].progress_apply(lambda x: calculate_idw_aadt_average(x, gdf_aadt_2020))

Applying function: 100%|██████████| 21123/21123 [04:24<00:00, 79.90it/s] 


In [11]:
gdf_building.loc[:,'idw_aadt_2021'] = gdf_building.loc[:,'geometry'].progress_apply(lambda x: calculate_idw_aadt_average(x, gdf_aadt_2021))

Applying function: 100%|██████████| 21123/21123 [04:33<00:00, 77.20it/s] 


In [12]:
gdf_building.head()

Unnamed: 0,heightroof,mpluto_bbl,cnstrct_yr,globalid,bin,OfficeArea,RetailArea,ResArea,OBJECTID,StreetWidth_Min,StreetWidth_Max,POSTED_SPEED,betweeness,geometry,GTFS Stop ID,distance_from_station(ft),idw_aadt_2019,idw_aadt_2020,idw_aadt_2021
0,59.722628,1021210037,1910,{A0E56BCC-A86B-4CEF-9A42-9B4ECD61743F},1062896,0,2250,10344,100007,60.0,60.0,25,0.012116,"MULTIPOLYGON (((1001211.761 244524.798, 100123...",A10,705.429228,13651.722938,15362.929888,17083.134233
1,155.49,1008870001,1935,{0B06F7A8-1F83-44F3-97DD-AC524A6374CA},1018457,50018,4000,0,101045,32.0,32.0,25,0.027505,"MULTIPOLYGON (((989135.970 210787.603, 989181....",632,302.555369,10065.445841,10282.943569,11495.14408
2,167.21446831,1013490035,1931,{A091951D-C73E-4B77-9A61-716C666C5446},1039988,2890,0,70110,99804,60.0,60.0,25,0.012333,"MULTIPOLYGON (((994153.480 215659.003, 994165....",F11,1378.469618,14437.629737,14445.844421,16299.638335
3,28.57,1010610006,1910,{2BE9F674-67E7-4A49-8535-F636A16D8DA5},1026714,0,0,0,103068,32.0,32.0,25,0.02292,"MULTIPOLYGON (((986858.328 218070.430, 986839....",A25,1620.005364,10455.417903,10087.378012,11238.595524
4,73.13,1003060013,1900,{2403088E-77A6-4071-8B5E-D54AFBC720D9},1003956,0,3800,0,92425,34.0,45.0,25,0.033656,"MULTIPOLYGON (((986180.396 200723.234, 986187....",D22,292.273497,7632.299988,7433.877764,8351.143163


In [13]:
gdf_building.shape

(21123, 19)

In [14]:
gdf_building.to_file('../../data/processed/building/building_manhattan_commercial.geojson', driver='GeoJSON')