In [None]:
import os
from dotenv import load_dotenv
import pandas as pd
import geopandas as gpd
from sqlalchemy import create_engine

load_dotenv(dotenv_path=".env")
db = os.getenv("db_url")
engine = create_engine(db)

In [None]:
nyc = pd.read_sql("SELECT ur20, area, isoperi, solidity, rectan, pop_den, geom FROM public.nyc", con=engine)
la = pd.read_sql("SELECT ur20, area, isoperi, solidity, rectan, pop_den, geom FROM public.la", con=engine)
chicago = pd.read_sql("SELECT ur20, area, isoperi, solidity, rectan, pop_den, geom FROM public.chicago", con=engine)
bay = pd.read_sql("SELECT ur20, area, isoperi, solidity, rectan, pop_den, geom FROM public.bay", con=engine)
dallas = pd.read_sql("SELECT ur20, area, isoperi, solidity, rectan, pop_den, geom FROM public.dallas", con=engine)
dc = pd.read_sql("SELECT ur20, area, isoperi, solidity, rectan, pop_den, geom FROM public.dc", con=engine)
houston = pd.read_sql("SELECT ur20, area, isoperi, solidity, rectan, pop_den, geom FROM public.houston", con=engine)
boston = pd.read_sql("SELECT ur20, area, isoperi, solidity, rectan, pop_den, geom FROM public.boston", con=engine)
atlanta = pd.read_sql("SELECT ur20, area, isoperi, solidity, rectan, pop_den, geom FROM public.atlanta", con=engine)
seattle = pd.read_sql("SELECT ur20, area, isoperi, solidity, rectan, pop_den, geom FROM public.seattle", con=engine)

In [None]:
nyc['city'] = 'nyc'
la['city'] = 'la'
chicago['city'] = 'chicago'
bay['city'] = 'bay'
dallas['city'] = 'dallas'
dc['city'] = 'dc'
houston['city'] = 'houston'
boston['city'] = 'boston'
atlanta['city'] = 'atlanta'
seattle['city'] = 'seattle'

In [None]:
combined = pd.concat([nyc, la, chicago, bay, dallas, dc, houston, boston, atlanta, seattle]).reset_index(drop=True)

In [None]:
import pandas as pd
import numpy as np
import torch
from torch_geometric.data import Data
from shapely import wkb, wkt
from shapely.geometry import Polygon, MultiPolygon,LinearRing

def parse_geom(g):
    return wkb.loads(bytes.fromhex(g))

def _turning_angle(p_prev, p, p_next):
    v1 = p_prev - p
    v2 = p_next - p
    n1 = np.linalg.norm(v1) + 1e-12
    n2 = np.linalg.norm(v2) + 1e-12
    cosang = np.clip(np.dot(v1, v2) / (n1 * n2), -1.0, 1.0)
    return np.arccos(cosang)

def polygon_to_graph(poly_hex: str, y_value=None):
    poly = wkb.loads(bytes.fromhex(poly_hex))

    if isinstance(poly, MultiPolygon):
        poly = max(poly.geoms, key=lambda p: p.area)

    ring = LinearRing(poly.exterior.coords)
    coords = np.array(ring.coords[:-1], dtype=np.float32)
    n = coords.shape[0]
    if n < 3:
        return None

    centroid = coords.mean(axis=0, keepdims=True)
    coords0 = coords - centroid

    prev_idx = (np.arange(n) - 1) % n
    next_idx = (np.arange(n) + 1) % n

    angles = np.array(
        [_turning_angle(coords0[prev_idx[i]], coords0[i], coords0[next_idx[i]]) for i in range(n)],
        dtype=np.float32
    )

    e_prev = np.linalg.norm(coords0 - coords0[prev_idx], axis=1).astype(np.float32)
    e_next = np.linalg.norm(coords0[next_idx] - coords0, axis=1).astype(np.float32)

    h_prev = 0.5 * e_prev
    h_next = 0.5 * e_next

    area = np.float32(poly.area)
    n_vertices = np.float32(n)

    area_col = np.full((n, 1), area, dtype=np.float32)
    ncol = np.full((n, 1), n_vertices, dtype=np.float32)

    x = np.concatenate(
        [coords0, angles[:, None], h_prev[:, None], h_next[:, None], area_col, ncol],
        axis=1
    )
    x = torch.from_numpy(x)

    src = torch.arange(n, dtype=torch.long)
    dst = torch.from_numpy(next_idx.astype(np.int64))
    edge_index = torch.stack([torch.cat([src, dst]), torch.cat([dst, src])], dim=0)

    data = Data(x=x, edge_index=edge_index)

    if y_value is not None:
        data.y = torch.tensor([float(y_value)], dtype=torch.float32)

    return data

In [None]:
combined["graph"] = combined["geom"].apply(polygon_to_graph)
combined.to_pickle("combined_with_graph.pkl")