In [1]:
import geopandas as gpd

In [2]:
city = "Providence"
addresses = gpd.read_file(f"../data/Providence/filtered_{city}_addresses.geojson")
buildings = gpd.read_file(f"../data/Providence/filtered_{city}_buildings.geojson")
places = gpd.read_file(f"../data/Providence/filtered_{city}_places.geojson")
building_parts = gpd.read_file(f"../data/Providence/filtered_{city}_buildingparts.geojson")

In [4]:
merged = building_parts.merge(
    buildings,
    left_on="building_id",
    right_on="id",
    suffixes=("_part", "_building")
)

# Select and rename columns to include desired attributes
result = merged[[
    'id_part',
    'id_building',
    # From buildings
    'subtype',
    'class', 
    'names_building', 
    'level', 
    'height_building', 
    'num_floors_building',
    'geometry_building', 
    
    # From building_parts
    'names_part', 
    'height_part', 
    'num_floors_part', 
    'geometry_part'
]]

# Rename columns for clarity
result = result.rename(columns={
    'id_building': 'building_id',
    'id_part': 'part_id',
    'names_building': 'building_names',
    'height_building': 'building_height',
    'num_floors_building': 'building_num_floors',
    'geometry_building': 'building_geometry',
    'names_part': 'part_names',
    'height_part': 'part_height',
    'num_floors_part': 'part_num_floors',
    'geometry_part': 'part_geometry'
})
unassigned_building_parts = building_parts[~building_parts.id.isin(result.part_id)]
# result.head(2)
unassigned_building_parts.head(2)

Unnamed: 0,id,version,sources,names,height,is_underground,num_floors,facade_color,facade_material,min_floor,roof_material,roof_shape,building_id,geometry


In [5]:
import pandas as pd

result = result.rename(columns={"building_geometry": "geometry"})
places = places.to_crs(result.crs)
result = result.rename(columns={"geometry": "building_geometry"})
places_with_buildings = gpd.sjoin(
    places, 
    result[['building_id', 'building_geometry']].rename(columns={'building_geometry': 'geometry'}),
    how='inner', 
    predicate='intersects'
)

places_with_parts = gpd.sjoin(
    places, 
    result[['part_id', 'part_geometry']].rename(columns={'part_geometry': 'geometry'}),
    how='inner', 
    predicate='intersects'
)

places_with_geometries = pd.concat([places_with_buildings, places_with_parts])

places_with_geometries = places_with_geometries[[
    'id', 'names', 'categories', 'confidence', 'brand', 'index_right'
]].rename(columns={
    'id': 'places_id',
    'names': 'places_names',
    'categories': 'places_categories',
    'confidence': 'places_confidence',
    'brand': 'places_brand',
    'index_right': 'result_index'
})
if 'level_0' in result.columns:
    result = result.drop(columns=['level_0'])
result = result.reset_index().merge(
    places_with_geometries, 
    left_index=True, 
    right_on='result_index', 
    how='left'
)

result = result.drop(columns=['result_index'])
unassigned_places = places[~places.id.isin(places_with_geometries.places_id)]
# result.head(2)
unassigned_places.head(2)


Unnamed: 0,id,version,sources,names,categories,confidence,brand,addresses,geometry
0,08f2a3302cb06af00313014fbab163ba,0,"[ { ""property"": """", ""dataset"": ""meta"", ""record...","{ ""primary"": ""Pranzi Catering & Events"", ""comm...","{ ""primary"": ""caterer"", ""alternate"": [ ""event_...",0.948077,,"[ { ""freeform"": ""10 Rosario Dr"", ""locality"": ""...",POINT (-71.46175 41.80548)
1,08f2a3302cb33949031b885b3621f8db,0,"[ { ""property"": """", ""dataset"": ""meta"", ""record...","{ ""primary"": ""Rooter-Man Plumbers"", ""common"": ...","{ ""primary"": ""plumbing"", ""alternate"": [ ""home_...",0.459406,,"[ { ""freeform"": ""10 Rosario Dr"", ""locality"": ""...",POINT (-71.46159 41.80543)


In [6]:
result = result.rename(columns={"part_geometry": "geometry"})
addresses = addresses.to_crs(result.crs)
result = result.rename(columns={"geometry": "part_geometry"})
print(addresses.columns)
addresses_with_buildings = gpd.sjoin(
    addresses, 
    result[['building_id', 'building_geometry']].rename(columns={'building_geometry': 'geometry'}),
    how='inner', 
    predicate='intersects'
)
addresses_with_parts = gpd.sjoin(
    addresses, 
    result[['part_id', 'part_geometry']].rename(columns={'part_geometry': 'geometry'}),
    how='inner', 
    predicate='intersects'
)
addresses_with_geometries = pd.concat([addresses_with_buildings, addresses_with_parts])

addresses_with_geometries = addresses_with_geometries[[
    'id', 'postcode', 'street', 'number', 'unit', 'address_levels', 'index_right'
]].rename(columns={
    'id': 'address_id',
    'postcode': 'address_postcode',
    'street': 'address_street',
    'number': 'address_number',
    'unit': 'address_unit',
    'address_levels': 'address_levels',
    'index_right': 'result_index'
})

if 'level_0' in result.columns: # not sure why i need to use this
    result = result.drop(columns=['level_0'])

result = result.reset_index().merge(
    addresses_with_geometries, 
    left_index=True, 
    right_on='result_index', 
    how='left'
)

result = result.drop(columns=['result_index'])
unassigned_addresses = addresses[~addresses.id.isin(addresses_with_geometries.address_id)]

# result.head(2)
unassigned_addresses.head(2)

Index(['id', 'country', 'postcode', 'street', 'number', 'unit',
       'address_levels', 'version', 'sources', 'geometry'],
      dtype='object')


Unnamed: 0,id,country,postcode,street,number,unit,address_levels,version,sources,geometry
0,08b2a3302c9a3fff057e20e0183c8e4a,US,2909,JACQUELINE Drive,32,,"[ { ""value"": ""RI"" }, { ""value"": null } ]",0,"[ { ""property"": """", ""dataset"": ""NAD"", ""record_...",POINT (-71.46718 41.81349)
1,08b2a3302c9a3fff05449269345ab32c,US,2909,JACQUELINE Drive,24,,"[ { ""value"": ""RI"" }, { ""value"": null } ]",0,"[ { ""property"": """", ""dataset"": ""NAD"", ""record_...",POINT (-71.46692 41.81351)


In [7]:
if 'level_0' in result.columns:
    result = result.drop(columns=['level_0'])
result["part_wkt"] = result["part_geometry"].to_wkt()
result.drop(columns=["part_geometry"], inplace=True)

result.to_file(f"../data/Providence/{city}_merged.geojson", driver='GeoJSON')
unassigned_building_parts.to_file(f"../data/Providence/{city}_unassigned_building_parts.geojson", driver='GeoJSON')
unassigned_places.to_file(f"../data/Providence/{city}_unassigned_places.geojson", driver='GeoJSON')
unassigned_addresses.to_file(f"../data/Providence/{city}_unassigned_addresses.geojson", driver='GeoJSON')