# Matching Buildings to Streets
The purpose of this notebook is to lay out a methodology for matching buildings in NYC to the street that they are on.

This effort is being undertaken as a part of the Street Search initiative. The ticket for this can be found [here](https://perchwell.atlassian.net/browse/PWEB-11865)

Author: Arnav Malhotra

In [5]:
import geopandas as gpd
import folium
import shapely
import pandas as pd
import json

from tqdm.auto import tqdm
import warnings
warnings.filterwarnings(action='ignore')

In [3]:
def load_shape(x):
    try:
        return shapely.wkt.loads(x)
    except:
        return shapely.geometry.shape(json.loads(x))

def load_geos(df: pd.DataFrame) -> gpd.GeoDataFrame:
    preso_gdf = gpd.GeoDataFrame(df[(~pd.isna(df.geometry))])
    preso_gdf['geometry'] = [load_shape(x) for x in tqdm(preso_gdf['geometry'], total=len(preso_gdf))]    
    preso_gdf = preso_gdf.set_geometry('geometry')
    preso_gdf.set_crs(crs="EPSG:4326", inplace=True)
    return preso_gdf

def line_to_polygon(geo: shapely.LineString, street_width: float, feet_buffer=0) -> shapely.Polygon:
    return gpd.GeoSeries(shapely.buffer(geo, (street_width + feet_buffer)/364567.2, cap_style='flat'), crs="EPSG:4326")[0]

In [4]:
streets = gpd.read_file('data/Centerline_20240520')
streets.to_crs(crs="EPSG:4326", inplace=True)
streets['geometry'] = [line_to_polygon(x, y, 15) for x, y in tqdm(zip(streets['geometry'], streets['ST_WIDTH']), total=len(streets))]

  0%|          | 0/121906 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [None]:
buildings = pd.read_csv('data/nyc_all_geos.csv')
buildings.dropna(subset='geometry', inplace=True)
buildings = load_geos(buildings)

  0%|          | 0/695183 [00:00<?, ?it/s]

In [None]:
buildings_x_streets = buildings.sjoin(streets, how='left', predicate='intersects')
buildings_x_streets[['id', 'display_address', 'PHYSICALID', 'ST_LABEL']]

Unnamed: 0,id,display_address,PHYSICALID,ST_LABEL
0,1,11 Stone Street,79615.0,STONE ST
1,2,17 State Street,145498.0,STATE ST
1,2,17 State Street,82.0,PEARL ST
2,3,80 Wall Street,165.0,WALL ST
2,3,80 Wall Street,90.0,PEARL ST
...,...,...,...,...
698312,7259154,149-53 HAWTREE STREET,90682.0,HAWTREE CREEK RD
698317,17208277,491 Bedford Avenue,42854.0,16 AVE
698320,17208397,637 MADISON STREET,,
698322,17208410,149-49 Hawtree Street,33788.0,LINDEN BLVD


In [None]:
matches = buildings_x_streets[~pd.isna(buildings_x_streets["PHYSICALID"])]
no_matches = buildings_x_streets[pd.isna(buildings_x_streets["PHYSICALID"])]
print(f'number of buildings with street matches: {matches["id"].nunique()},\nnumber of buildings without street matches: {no_matches["id"].nunique()},\npercent of matches: {matches["id"].nunique()/len(buildings) * 100:.2f}%')

number of buildings with street matches: 686052,
number of buildings without street matches: 9131,
percent of matches: 98.69%


In [None]:
matched_list = set(matches['id'].values)
buildings['found_street'] = [True if x in matched_list else False for x in buildings['id']]

## Visualize Matches
The next section is visualizing the building to street matches.

In [None]:
building_style = lambda x: {
  'color' : 'green' if x['properties']['found_street'] else 'orange',
  'opacity' : 0.50,
  'weight' : 2,
}

In [None]:
buildings_sample = buildings.sample(10000, random_state=42)

In [None]:
buildings.head()

Unnamed: 0,id,display_address,geometry,found_street
0,1,11 Stone Street,"MULTIPOLYGON (((-74.01215 40.70409, -74.01231 ...",True
1,2,17 State Street,"MULTIPOLYGON (((-74.01355 40.70307, -74.01355 ...",True
2,3,80 Wall Street,"MULTIPOLYGON (((-74.00748 40.70565, -74.00752 ...",True
3,4,42 Water Street,"MULTIPOLYGON (((-74.01021 40.70365, -74.01007 ...",True
4,5,32 Pearl Street,"MULTIPOLYGON (((-74.01212 40.70307, -74.01220 ...",True


In [None]:
'''
m = folium.Map(location=[40.70, -73.94], zoom_start=10, tiles="CartoDB positron")

streets_viz = folium.GeoJson(streets[['PHYSICALID', 'ST_LABEL', 'geometry']],
                             highlight_function=lambda x: {"fillOpacity": 0.8},
                             zoom_on_click=True)#,tooltip=folium.GeoJsonTooltip(fields=['PHYSICALID', 'ST_LABEL']))
streets_viz.add_to(m)
m.keep_in_front(streets_viz)'''

'\nm = folium.Map(location=[40.70, -73.94], zoom_start=10, tiles="CartoDB positron")\n\nstreets_viz = folium.GeoJson(streets[[\'PHYSICALID\', \'ST_LABEL\', \'geometry\']],\n                             highlight_function=lambda x: {"fillOpacity": 0.8},\n                             zoom_on_click=True)#,tooltip=folium.GeoJsonTooltip(fields=[\'PHYSICALID\', \'ST_LABEL\']))\nstreets_viz.add_to(m)\nm.keep_in_front(streets_viz)'

In [None]:
'''buildings_viz = folium.GeoJson(buildings[buildings['found_street'] == False][['id', 'display_address', 'geometry', 'found_street']].reset_index(), 
                               style_function=building_style, highlight_function=lambda x: {"fillOpacity": 0.8}, 
                               tooltip=folium.GeoJsonTooltip(fields=['id', 'display_address']))
buildings_viz.add_to(m)
m.keep_in_front(buildings_viz)
m'''

'buildings_viz = folium.GeoJson(buildings[buildings[\'found_street\'] == False][[\'id\', \'display_address\', \'geometry\', \'found_street\']].reset_index(), \n                               style_function=building_style, highlight_function=lambda x: {"fillOpacity": 0.8}, \n                               tooltip=folium.GeoJsonTooltip(fields=[\'id\', \'display_address\']))\nbuildings_viz.add_to(m)\nm.keep_in_front(buildings_viz)\nm'

In [None]:
buildings.head()

Unnamed: 0,id,display_address,geometry,found_street
0,1,11 Stone Street,"MULTIPOLYGON (((-74.01215 40.70409, -74.01231 ...",True
1,2,17 State Street,"MULTIPOLYGON (((-74.01355 40.70307, -74.01355 ...",True
2,3,80 Wall Street,"MULTIPOLYGON (((-74.00748 40.70565, -74.00752 ...",True
3,4,42 Water Street,"MULTIPOLYGON (((-74.01021 40.70365, -74.01007 ...",True
4,5,32 Pearl Street,"MULTIPOLYGON (((-74.01212 40.70307, -74.01220 ...",True


In [None]:
type(buildings)

geopandas.geodataframe.GeoDataFrame

In [None]:
matches[matches['id'] == 252032]

Unnamed: 0,id,display_address,geometry,index_right,PHYSICALID,L_LOW_HN,L_HIGH_HN,R_LOW_HN,R_HIGH_HN,L_ZIP,...,PRE_DIRECT,PRE_TYPE,POST_TYPE,POST_DIREC,POST_MODIF,FULL_STREE,ST_NAME,BIKE_TRAFD,SEGMENT_TY,SHAPE_Leng
245485,252032,116 South Elliott Place,"MULTIPOLYGON (((-73.97548 40.68595, -73.97583 ...",56105.0,62747.0,91,127,88,132,11217,...,S,,PL,,,S ELLIOTT PL,ELLIOTT,,U,498.244272


In [None]:
buildings[buildings['id'] == 252032]

Unnamed: 0,id,display_address,geometry,found_street
245485,252032,116 South Elliott Place,"MULTIPOLYGON (((-73.97548 40.68595, -73.97583 ...",True


In [None]:
len(buildings[buildings['found_street'] == False].sjoin_nearest(streets, how='inner'))/len(buildings[buildings['found_street'] == False])

1.0

In [None]:
'''m = folium.Map(location=[40.70, -73.94], zoom_start=10, tiles="CartoDB positron")
for _, r in tqdm(streets.iterrows(), total=len(streets)):
    sim_geo = gpd.GeoSeries(r["geometry"])
    geo_j = sim_geo.to_json()
    geo_j = folium.GeoJson(data=geo_j, highlight_function=lambda x: {"fillOpacity": 0.8})
    folium.Tooltip(str(r['PHYSICALID']) + ': ' + r["ST_LABEL"]).add_to(geo_j)
    geo_j.add_to(m)'''

'm = folium.Map(location=[40.70, -73.94], zoom_start=10, tiles="CartoDB positron")\nfor _, r in tqdm(streets.iterrows(), total=len(streets)):\n    sim_geo = gpd.GeoSeries(r["geometry"])\n    geo_j = sim_geo.to_json()\n    geo_j = folium.GeoJson(data=geo_j, highlight_function=lambda x: {"fillOpacity": 0.8})\n    folium.Tooltip(str(r[\'PHYSICALID\']) + \': \' + r["ST_LABEL"]).add_to(geo_j)\n    geo_j.add_to(m)'

In [None]:
'''buildings_viz = folium.GeoJson(buildings_sample[['id', 'display_address', 'geometry', 'found_street', 'ST_LABEL']].reset_index(), 
                               style_function=building_style, highlight_function=lambda x: {"fillOpacity": 0.8}, zoom_on_click=True,
                               tooltip=folium.GeoJsonTooltip(fields=['id', 'display_address', 'ST_LABEL']))
buildings_viz.add_to(m)
m.keep_in_front(buildings_viz)'''

'buildings_viz = folium.GeoJson(buildings_sample[[\'id\', \'display_address\', \'geometry\', \'found_street\', \'ST_LABEL\']].reset_index(), \n                               style_function=building_style, highlight_function=lambda x: {"fillOpacity": 0.8}, zoom_on_click=True,\n                               tooltip=folium.GeoJsonTooltip(fields=[\'id\', \'display_address\', \'ST_LABEL\']))\nbuildings_viz.add_to(m)\nm.keep_in_front(buildings_viz)'

In [None]:
# m.save('data/unmatched_buildings.html')

## Go with option 2 because sjoin_nearest() causes too many false positives

1. check which street segments have fewer than normal buildings
2. increase those street segements' widths
3. rematch all buildings to capture corner buildings as well

In [None]:
buildings_x_streets.head()

Unnamed: 0,id,display_address,geometry,index_right,PHYSICALID,L_LOW_HN,L_HIGH_HN,R_LOW_HN,R_HIGH_HN,L_ZIP,...,PRE_DIRECT,PRE_TYPE,POST_TYPE,POST_DIREC,POST_MODIF,FULL_STREE,ST_NAME,BIKE_TRAFD,SEGMENT_TY,SHAPE_Leng
0,1,11 Stone Street,"MULTIPOLYGON (((-74.01215 40.70409, -74.01231 ...",70702.0,79615.0,1,19,2,24,10004,...,,,ST,,,STONE ST,STONE,,U,446.835827
1,2,17 State Street,"MULTIPOLYGON (((-74.01355 40.70307, -74.01355 ...",102310.0,145498.0,17,19,16,20,10004,...,,,ST,,,STATE ST,STATE,,U,323.530726
1,2,17 State Street,"MULTIPOLYGON (((-74.01355 40.70307, -74.01355 ...",52.0,82.0,1,23,2,24,10004,...,,,ST,,,PEARL ST,PEARL,TF,U,397.48266
2,3,80 Wall Street,"MULTIPOLYGON (((-74.00748 40.70565, -74.00752 ...",125.0,165.0,78,90,75,93,10005,...,,,ST,,,WALL ST,WALL,,U,203.319704
2,3,80 Wall Street,"MULTIPOLYGON (((-74.00748 40.70565, -74.00752 ...",60.0,90.0,157,169,154,168,10005,...,,,ST,,,PEARL ST,PEARL,TF,U,229.746461


In [None]:
buildings_x_streets['street_segment'] = buildings_x_streets['PHYSICALID'].astype(str) + ': ' + buildings_x_streets['ST_LABEL']

In [None]:
buildings_x_streets.groupby('street_segment')['id'].nunique().sort_values()

street_segment
167183.0: DRIVEWAY            1
63198.0: W 174 ST             1
38240.0: WASHINGTON AVE       1
167644.0: BROOKVILLE BLVD     1
167645.0: MERRICK BLVD        1
                             ..
48434.0: RUTLAND RD          92
44078.0: GREENE AVE          92
19960.0: FOCH BLVD           93
106336.0: ALLEY              95
75610.0: E 91 ST             98
Name: id, Length: 78905, dtype: int64

In [None]:
building_count_by_street = buildings_x_streets.groupby('PHYSICALID')['id'].nunique().sort_values()

In [None]:
len(building_count_by_street[building_count_by_street < 6])

36862

In [None]:
building_count_by_street.quantile(0.75)

15.0

In [None]:
streets_no_match = streets[~streets['PHYSICALID'].isin(buildings_x_streets['PHYSICALID'])]
len(streets_no_match)

42926

In [None]:
streets_no_match[streets_no_match['PHYSICALID'] == 47566]

Unnamed: 0,PHYSICALID,L_LOW_HN,L_HIGH_HN,R_LOW_HN,R_HIGH_HN,L_ZIP,R_ZIP,L_BLKFC_ID,R_BLKFC_ID,ST_LABEL,...,PRE_TYPE,POST_TYPE,POST_DIREC,POST_MODIF,FULL_STREE,ST_NAME,BIKE_TRAFD,SEGMENT_TY,SHAPE_Leng,geometry
42484,47566,1293,1315,1292,1316,11207,11207,2022603826,2022611140,BUSHWICK AVE,...,,AVE,,,BUSHWICK AVE,BUSHWICK,,U,260.081457,"POLYGON ((-73.91231 40.68636, -73.91247 40.686..."


In [None]:
len(building_count_by_street)

78905

In [None]:
len(buildings_x_streets[buildings_x_streets['PHYSICALID'] == 88215])

17

In [None]:
def choose_buffer(geo, st_width, ew0, ew1):
    if ew0:
        return line_to_polygon(geo, st_width, 40)
    elif ew1:
        return line_to_polygon(geo, st_width, 30)
    else:
        return line_to_polygon(geo, st_width, 15)

streets_low_matches = building_count_by_street[building_count_by_street <= 5].reset_index()

streets = gpd.read_file('data/Centerline_20240520')
streets.to_crs(crs="EPSG:4326", inplace=True)
streets['extra_width'] = streets['PHYSICALID'].isin(streets_no_match['PHYSICALID'])
streets['extra_width2'] = streets['PHYSICALID'].isin(streets_low_matches['PHYSICALID'])
streets['geometry'] = [choose_buffer(x, y, extra, extra1) for x, y, extra, extra1 in tqdm(zip(streets['geometry'], streets['ST_WIDTH'], streets['extra_width'], streets['extra_width2']), total=len(streets))]

  0%|          | 0/121906 [00:00<?, ?it/s]

In [None]:
buildings_x_streets = buildings.sjoin(streets, how='left', predicate='intersects')

matches = buildings_x_streets[~pd.isna(buildings_x_streets["PHYSICALID"])]
no_matches = buildings_x_streets[pd.isna(buildings_x_streets["PHYSICALID"])]
buildings_x_streets['street_segment'] = buildings_x_streets['PHYSICALID'].astype(str) + ': ' + buildings_x_streets['ST_LABEL']
matched_list = set(matches['id'].values)

buildings['found_street2'] = [True if x in matched_list else False for x in buildings['id']]
buildings_count_by_street = buildings_x_streets.groupby('PHYSICALID')['id'].nunique()
print(f'number of buildings with street matches: {matches["id"].nunique()},\nnumber of buildings without street matches: {no_matches["id"].nunique()},\npercent of matches: {matches["id"].nunique()/len(buildings) * 100:.2f}%')

number of buildings with street matches: 689847,
number of buildings without street matches: 5336,
percent of matches: 99.23%


In [None]:
'''streets = gpd.read_file('data/Centerline_20240520')
streets.to_crs(crs="EPSG:4326", inplace=True)
streets['extra_width'] = streets['PHYSICALID'].isin(streets_no_match['PHYSICALID'])
streets['geometry'] = [line_to_polygon(x, y, 40) if extra else x \
    for x, y, extra in tqdm(zip(streets['geometry'], streets['ST_WIDTH'], streets['extra_width']), total=len(streets))]'''

'streets = gpd.read_file(\'data/Centerline_20240520\')\nstreets.to_crs(crs="EPSG:4326", inplace=True)\nstreets[\'extra_width\'] = streets[\'PHYSICALID\'].isin(streets_no_match[\'PHYSICALID\'])\nstreets[\'geometry\'] = [line_to_polygon(x, y, 40) if extra else x     for x, y, extra in tqdm(zip(streets[\'geometry\'], streets[\'ST_WIDTH\'], streets[\'extra_width\']), total=len(streets))]'

In [None]:
streets = streets.merge(buildings_count_by_street.rename('building_count'), how='left', left_on='PHYSICALID', right_index=True)

In [None]:
m = folium.Map(location=[40.70, -73.94], zoom_start=10, tiles="CartoDB positron")
for _, r in tqdm(streets.iterrows(), total=len(streets)):
    sim_geo = gpd.GeoSeries(r["geometry"])
    geo_j = sim_geo.to_json()
    if r['extra_width']:
        geo_j = folium.GeoJson(data=geo_j, 
            highlight_function=lambda x: {"fillOpacity": 0.8},
            style_function= lambda x: {"color": 'red'})
    elif r['extra_width2']:
        geo_j = folium.GeoJson(data=geo_j, 
            highlight_function=lambda x: {"fillOpacity": 0.8},
            style_function= lambda x: {"color": 'yellow'})
    else:
        geo_j = folium.GeoJson(data=geo_j, 
            highlight_function=lambda x: {"fillOpacity": 0.8})
    
    folium.Tooltip(f"{r['PHYSICALID']}:  {r['ST_LABEL']}, buildings matched: {r['building_count']}").add_to(geo_j)
    geo_j.add_to(m)

  0%|          | 0/121906 [00:00<?, ?it/s]

In [None]:
buildings_sample = buildings[(buildings['id'].isin(buildings_x_streets[buildings_x_streets['found_street'] == False]['id'])) | (buildings['id'].isin(no_matches['id']))]
len(buildings_sample)

9131

In [None]:
building_style = lambda x: {
  'color' : 'green' if x['properties']['found_street2'] else 'orange',
  'opacity' : 0.50,
  'weight' : 2,
}

In [None]:
buildings_viz = folium.GeoJson(buildings_sample[['id', 'display_address', 'geometry', 'found_street2']].reset_index(), 
                               style_function=building_style, highlight_function=lambda x: {"fillOpacity": 0.8}, zoom_on_click=True,
                               tooltip=folium.GeoJsonTooltip(fields=['id', 'display_address']))
buildings_viz.add_to(m)
m.keep_in_front(buildings_viz)

In [None]:
m.save('data/unmatched_buildings_v3.html')

In [None]:
len(buildings_x_streets[buildings_x_streets['PHYSICALID'] == 88484])

8

In [None]:
len(matches)

906329

In [None]:
len(matches[matches['found_street'] == False])

4075

In [None]:
buildings_x_streets[buildings_x_streets['id']==181712][['id', 'street_segment']]

Unnamed: 0,id,street_segment
176839,181712,98236.0: 4 PL


In [None]:
len(buildings_x_streets[buildings_x_streets['PHYSICALID'] == 47566])

19

In [None]:
streets[streets['PHYSICALID'] == 47566]['extra_width']

42484    True
Name: extra_width, dtype: bool

## Checking Corners

In [None]:
buildings_by_street = buildings_x_streets.groupby('id')['ST_LABEL'].nunique()
buildings_with_street = buildings_x_streets.groupby('id')['ST_LABEL'].apply(list)
buildings_sample = buildings[buildings['id'].isin(buildings_by_street[buildings_by_street>1].reset_index()['id'])].merge(buildings_with_street, how='left', right_index=True, left_on='id').sample(50000, random_state=42)
len(buildings_by_street[buildings_by_street>1])

151203

In [None]:
m = folium.Map(location=[40.70, -73.94], zoom_start=10, tiles="CartoDB positron")
buildings_viz = folium.GeoJson(buildings_sample[['id', 'display_address', 'geometry', 'found_street2', 'ST_LABEL']].reset_index(), 
                               style_function=building_style, highlight_function=lambda x: {"fillOpacity": 0.8}, zoom_on_click=True,
                               tooltip=folium.GeoJsonTooltip(fields=['id', 'display_address', 'ST_LABEL']))
buildings_viz.add_to(m)
m.keep_in_front(buildings_viz)

In [None]:
m.save('data/corner_buildings_false_positive_check.html')

In [None]:
# TODO: remove outlier streets (e.g. ST_LABEL in ['ALLEY', 'DRIVEWAY', '%FERRY RTE', '%BRG%', 'PEDESTRIAN PATH', '%BIKE PTH', '%TUNL' ...])
# TODO: use sjoin_nearest() to identify the streets to increase their widths

## Increasing unmatched buildings' nearest street

In [None]:
streets = gpd.read_file('data/Centerline_20240520')
streets.to_crs(crs="EPSG:4326", inplace=True)
#streets['geometry'] = [line_to_polygon(x, y) for x, y in tqdm(zip(streets['geometry'], streets['ST_WIDTH']), total=len(streets))]

buildings_nearest_street = buildings[buildings['found_street'] == False].sjoin_nearest(streets, how='inner')

In [None]:
len(buildings_nearest_street['PHYSICALID'].unique())

2512

In [None]:
'''
def choose_buffer(geo, st_width, ew0=False, ew1=False):
    if ew0:
        return line_to_polygon(geo, st_width, 40)
    elif ew1:
        return line_to_polygon(geo, st_width, 30)
    else:
        return line_to_polygon(geo, st_width, 15)

#streets_low_matches = building_count_by_street[building_count_by_street <= 5].reset_index()
'''
streets = gpd.read_file('data/Centerline_20240520')
streets.to_crs(crs="EPSG:4326", inplace=True)
streets['extra_width'] = streets['PHYSICALID'].isin(buildings_nearest_street['PHYSICALID'].unique())
#streets['extra_width2'] = streets['PHYSICALID'].isin(streets_low_matches['PHYSICALID'])
streets['geometry'] = [line_to_polygon(x, y, 50) if extra else line_to_polygon(x, y, 15) for x, y, extra in tqdm(zip(streets['geometry'], streets['ST_WIDTH'], streets['extra_width']), total=len(streets))]

  0%|          | 0/121906 [00:00<?, ?it/s]

In [None]:
buildings_x_streets = buildings.sjoin(streets, how='left', predicate='intersects')

matches = buildings_x_streets[~pd.isna(buildings_x_streets["PHYSICALID"])]
no_matches = buildings_x_streets[pd.isna(buildings_x_streets["PHYSICALID"])]
buildings_x_streets['street_segment'] = buildings_x_streets['PHYSICALID'].astype(str) + ': ' + buildings_x_streets['ST_LABEL']
matched_list = set(matches['id'].values)

buildings['found_street2'] = [True if x in matched_list else False for x in buildings['id']]
buildings_count_by_street = buildings_x_streets.groupby('PHYSICALID')['id'].nunique()
print(f'number of buildings with street matches: {matches["id"].nunique()},\nnumber of buildings without street matches: {no_matches["id"].nunique()},\npercent of matches: {matches["id"].nunique()/len(buildings) * 100:.2f}%')

number of buildings with street matches: 693143,
number of buildings without street matches: 2040,
percent of matches: 99.71%


In [None]:
streets = streets.merge(buildings_count_by_street.rename('building_count'), how='left', left_on='PHYSICALID', right_index=True)

In [None]:
m = folium.Map(location=[40.70, -73.94], zoom_start=10, tiles="CartoDB positron")
for _, r in tqdm(streets.iterrows(), total=len(streets)):
    sim_geo = gpd.GeoSeries(r["geometry"])
    geo_j = sim_geo.to_json()
    if r['extra_width']:
        geo_j = folium.GeoJson(data=geo_j, 
            highlight_function=lambda x: {"fillOpacity": 0.8},
            style_function= lambda x: {"color": 'red'})
    else:
        geo_j = folium.GeoJson(data=geo_j, 
            highlight_function=lambda x: {"fillOpacity": 0.8})
    
    folium.Tooltip(f"{r['PHYSICALID']}:  {r['ST_LABEL']}, buildings matched: {r['building_count']}").add_to(geo_j)
    geo_j.add_to(m)

  0%|          | 0/121906 [00:00<?, ?it/s]

In [None]:
building_style = lambda x: {
  'color' : 'green' if x['properties']['found_street2'] else 'orange',
  'opacity' : 0.50,
  'weight' : 2,
}
buildings_sample = buildings[(buildings['id'].isin(buildings_x_streets[buildings_x_streets['found_street'] == False]['id'])) | (buildings['id'].isin(no_matches['id']))]

buildings_viz = folium.GeoJson(buildings_sample[['id', 'display_address', 'geometry', 'found_street2']].reset_index(), 
                               style_function=building_style, highlight_function=lambda x: {"fillOpacity": 0.8}, zoom_on_click=True,
                               tooltip=folium.GeoJsonTooltip(fields=['id', 'display_address']))
buildings_viz.add_to(m)
m.keep_in_front(buildings_viz)

In [None]:
m.save('data/unmatched_buildings_v4.html')

In [None]:
streets.head()

NameError: name 'streets' is not defined

In [24]:
display_streets = pd.read_csv('data/display_streets.csv')
buildings_x_streets = pd.read_csv('data/buildings_x_streets.csv')
display_streets.head()

Unnamed: 0.1,Unnamed: 0,ST_LABEL,borough,segment_ids,geometry,display_label
0,0,1 AVE,Bronx,"{64576, 187004, 64569, 64571, 64572, 64573, 64...",LINESTRING (-73.80724905083855 40.822362727790...,1st Avenue
1,1,1 AVE,Brooklyn,"{36755, 36756, 36757, 36758, 36759, 36760, 367...",MULTILINESTRING ((-74.01269574242941 40.656593...,1st Avenue
2,2,1 AVE,Manhattan,"{2571, 2572, 2574, 2577, 2580, 2581, 2582, 258...",MULTILINESTRING ((-73.98863843855345 40.722933...,1st Avenue
3,3,1 AVE,Staten,"{104392, 104393, 104394, 104395, 104397, 170255}",MULTILINESTRING ((-74.15256900219933 40.597967...,1st Avenue
4,4,1 AVE LOOP,Manhattan,"{103776, 103777, 103778, 192793, 103774, 103775}",LINESTRING (-73.98149890645011 40.732618053020...,1st Avenue Loop


In [25]:
buildings_x_streets.head()

Unnamed: 0.1,Unnamed: 0,id,display_address,PHYSICALID,ST_LABEL,borough
0,0,1,11 Stone Street,79615.0,STONE ST,Manhattan
1,1,2,17 State Street,145498.0,STATE ST,Manhattan
2,1,2,17 State Street,82.0,PEARL ST,Manhattan
3,2,3,80 Wall Street,165.0,WALL ST,Manhattan
4,2,3,80 Wall Street,90.0,PEARL ST,Manhattan


In [26]:
display_streets['match_key'] = display_streets['ST_LABEL'] + display_streets['borough']
buildings_x_streets['match_key'] = buildings_x_streets['ST_LABEL'] + buildings_x_streets['borough']
display_streets[~(display_streets['match_key'].isin(buildings_x_streets['match_key'])) & (display_streets['borough'] != 'Staten')]

Unnamed: 0.1,Unnamed: 0,ST_LABEL,borough,segment_ids,geometry,display_label,match_key
7,7,1 DR,Manhattan,"{136144, 136142, 136119}",LINESTRING (-73.92482570175383 40.789223991192...,1st Drive,1 DRManhattan
10,10,1 ST,Bronx,"{105113, 105114, 105115, 105116, 105117, 105118}",LINESTRING (-73.8804813266689 40.8114784131601...,1st Street,1 STBronx
376,376,2 DR,Manhattan,"{135649, 136106, 136124, 138603, 160427, 13608...",MULTILINESTRING ((-73.92571322143206 40.786266...,2nd Drive,2 DRManhattan
379,379,2 ST,Bronx,"{105120, 105121, 105122, 105123, 105124, 105119}",LINESTRING (-73.8809018063603 40.8130902390720...,2nd Street,2 STBronx
553,553,3 DR,Manhattan,"{136129, 136134, 136166, 136136, 136137, 13613...",MULTILINESTRING ((-73.92503469300016 40.787134...,3rd Drive,3 DRManhattan
...,...,...,...,...,...,...,...
9957,9957,WOOD RD,Bronx,"{105082, 105083, 121095}",LINESTRING (-73.86229201183579 40.835572451428...,Wood Road,WOOD RDBronx
9997,9997,WORTHEN ST,Bronx,"{98880, 98876, 98877, 98878, 98879}",LINESTRING (-73.8909890555788 40.8094851122316...,Worthen Street,WORTHEN STBronx
10018,10018,XAVIER WAY,Bronx,"{169584, 169571, 169573, 169574}",MULTILINESTRING ((-73.88139484609307 40.861049...,Xavier Way,XAVIER WAYBronx
10027,10027,YEATON RD,Manhattan,{26813},LINESTRING (-74.02113724442762 40.685138795802...,Yeaton Road,YEATON RDManhattan


In [27]:
no_match = display_streets[~(display_streets['match_key'].isin(buildings_x_streets['match_key'])) & (display_streets['borough'] != 'Staten')]

In [28]:
buildings_x_streets['ST_LABEL'] == 'DIVISION'

0         False
1         False
2         False
3         False
4         False
          ...  
832733    False
832734    False
832735    False
832736    False
832737    False
Name: ST_LABEL, Length: 832738, dtype: bool

In [29]:
display_streets = pd.read_csv('data/nyc_display_streets - display_streets.csv')

In [32]:
display_streets['match_key'] = display_streets['ST_LABEL'] + display_streets['borough']
display_streets[~display_streets['match_key'].isin(no_match['match_key'])].drop(columns=['match_key']).to_csv('data/display_streets.csv',index=False)