This Python notebook takes a subset of POIs in Downtown Santa Cruz, and moves each one to the closest building. The data will be saved to disk as csv and parquet files.

In [None]:
!pip install pandas --quiet
!pip install geopandas --quiet
!pip install shapely --quiet
!pip install duckdb --quiet
!pip install jupysql --quiet
!pip install duckdb-engine --quiet
!pip install folium matplotlib mapclassify --quiet

In [None]:
import pandas as pd
import geopandas as gpd
import duckdb
from shapely import wkt
from shapely import Point
from shapely.ops import nearest_points
import shapely

In [None]:
%reload_ext sql

In [None]:
%sql duckdb://

In [None]:
%sql INSTALL spatial;
%sql INSTALL httpfs;
%sql LOAD spatial;
%sql LOAD httpfs;
%sql SET s3_region='us-west-2'

In [None]:
%config SqlMagic.autopandas = True
%config SqlMagic.feedback = False
%config SqlMagic.displaycon = False

In [None]:
%%sql places <<
SELECT
    id,
    names.primary as name,
    ST_AsText(geometry) as geometry
FROM
    read_parquet('s3://overturemaps-us-west-2/release/2025-04-23.0/theme=places/type=place/*')
WHERE
    bbox.xmin BETWEEN -122.040349 AND -121.980759 AND
    bbox.ymin BETWEEN 36.954478 AND 36.989323

In [None]:
# Convert places data to GeoFrame for getting places in Point format
places = gpd.GeoDataFrame(
    places,
    geometry=places['geometry'].apply(wkt.loads),
    crs="EPSG:4326"
)

In [None]:
%%sql buildings <<
SELECT
    ST_AsText(geometry) as geometry
FROM
    read_parquet('s3://overturemaps-us-west-2/release/2025-04-23.0/theme=buildings/type=building/*')
WHERE
    bbox.xmin BETWEEN -122.042 AND -121.978 AND
    bbox.ymin BETWEEN 36.952 AND 36.991

In [None]:
# Convert buildings data to GeoFrame for working directly with building Polygons
buildings = gpd.GeoDataFrame(
    buildings,
    geometry=buildings['geometry'].apply(wkt.loads),
    crs="EPSG:4326"
)

In [None]:
# Make copy for updated locations
places_updated = places.copy()

for i in range(len(places['geometry'])):
    cur_point = places['geometry'][i]
    min_distance = 10000000000000
    min_index = -1
    for j in range(len(buildings['geometry'])):
        cur_building = buildings['geometry'][j]
        # Check if current building is closer than closest building so far
        if shapely.distance(cur_building, cur_point) < min_distance:
            min_distance = shapely.distance(cur_building, cur_point)
            min_index = j
    if min_index != -1:
        # Move point to closest point on closest building
        places_updated.at[i, 'geometry'] = nearest_points(buildings['geometry'][min_index], cur_point)[0]

In [None]:
# Visualize the data, old location is in red, new location is in green

m = buildings.explore()

places.explore(m=m, color="red")
places_updated.explore(m=m, color="green")

m

In [None]:
# Merge the dataframes and save it
places['geometry_updated'] = places_updated['geometry']

places.to_parquet(path='santa_cruz_places_building_snap.parquet')
places.to_csv(path_or_buf='santa_cruz_places_building_snap.csv')