In [None]:
import matplotlib.pyplot as plt
from sqlalchemy import create_engine
from sqlalchemy import inspect
from sqlalchemy import text
import os
import geopandas as gpd

In [None]:
host = 'localhost'
database = 'spatial'
user = 'postgres'
port = '5432'
password = os.getenv('SQL_PASSWORD')

In [None]:
connection_string = f"postgresql://{user}:{password}@{host}:{port}/{database}"

In [None]:
engine = create_engine(connection_string)
insp = inspect(engine)
insp.get_table_names()

## Streets in MA

In [None]:
text_road = """ SELECT * FROM ch09.road"""
road = gpd.read_postgis(text_road, con=engine)

fig, ax = plt.subplots(figsize=(12, 10))
road.plot(ax=ax, color = 'grey')

## Parcel

In [None]:
text_road = """ SELECT * FROM ch09.road"""
road = gpd.read_postgis(text_road, con=engine)

text_land = """ SELECT * FROM ch09.land"""
land = gpd.read_postgis(text_land, con=engine)
# Get bounding box of land to zoom in
xmin, ymin, xmax, ymax = land.total_bounds

fig, ax = plt.subplots(figsize=(12, 10))
buffer = 20000
ax.set_xlim(xmin -  buffer, xmax + buffer)
ax.set_ylim(ymin - buffer, ymax + buffer)

land.plot(ax=ax, color = 'blue')
road.plot(ax=ax, color = 'grey')

## Snap points on the road to Parcel

In [None]:
text_snap_point= """SELECT DISTINCT ON (land.pid)
land.addr_num || ' ' || full_str AS parcel,
road.road_name AS road,
ST_ClosestPoint(land.geom,road.geom) As geom

FROM ch09.land AS land INNER JOIN ch09.road AS road
ON ST_DWithin(land.geom,road.geom,20.0)
ORDER BY land.pid, ST_Distance(land.geom,road.geom);"""

snap_point = gpd.read_postgis(text_snap_point, con=engine)

print(snap_point.head(n = 5))

text_land = """ SELECT * FROM ch09.land"""
land = gpd.read_postgis(text_land, con=engine)
fig, ax = plt.subplots(figsize=(12, 10))

snap_point.plot(ax=ax, color = 'red')
land.plot(ax=ax, color = 'black')

## Clustering: k_means

In [None]:
text_cluster = """SELECT land.pid, land.geom,
land.addr_num || ' ' || full_str AS address,
ST_ClusterKMeans(land.geom, 4) OVER() AS kcluster,
ST_ClusterDBSCAN(land.geom, 15, 2) OVER() AS dcluster
FROM ch09.land AS land;"""

cluster = gpd.read_postgis(text_cluster, con = engine)

# Plot the clusters based on kcluster column
fig, ax = plt.subplots(figsize=(10, 8))
cluster.plot(column="kcluster", cmap="tab10", edgecolor="none", alpha=0.75, ax=ax)

## DBScan clustering

In [None]:
# Plot the clusters based on kcluster column
fig, ax = plt.subplots(figsize=(10, 8))
cluster.plot(column="dcluster", cmap="tab10", edgecolor="none", alpha=0.75, ax=ax)