In [8]:
import time
from sqlalchemy import create_engine, text

engine = create_engine(
    "postgresql+psycopg2://postgres:postgres@postgres:5432/etl_db"
)

start_time = time.perf_counter()

with engine.begin() as conn:

    print("Populating accident_station_map...")

    conn.execute(text("""
        INSERT INTO silver.accident_station_map (
            accident_id,
            station_id,
            distance_km
        )
        SELECT
            a.accident_id,
            s.station_id,
            ST_Distance(a.geom, s.geom) / 1000.0 AS distance_km
        FROM silver.us_accidents a
        CROSS JOIN LATERAL (
            SELECT station_id, geom
            FROM silver.stations
            ORDER BY a.geom <-> geom
            LIMIT 1
        ) s
        WHERE a.geom IS NOT NULL
        ON CONFLICT (accident_id) DO UPDATE
        SET
            station_id = EXCLUDED.station_id,
            distance_km = EXCLUDED.distance_km;
    """))

elapsed = time.perf_counter() - start_time

with engine.connect() as conn:
    result = conn.execute(text("""
        SELECT COUNT(*) FROM silver.accident_station_map;
    """))
    count = result.scalar()

print("\n‚úÖ accident_station_map populated")
print(f"üì¶ Rows mapped: {count:,}")
print(f"‚è±Ô∏è Time: {elapsed:.2f} seconds")
print(f"‚ö° Rows/sec: {count / elapsed:,.0f}")


Populating accident_station_map...

‚úÖ accident_station_map populated
üì¶ Rows mapped: 7,728,394
‚è±Ô∏è Time: 928.59 seconds
‚ö° Rows/sec: 8,323
