In [9]:
import time
from sqlalchemy import create_engine, text

engine = create_engine(
    "postgresql+psycopg2://postgres:postgres@postgres:5432/etl_db"
)

start_time = time.perf_counter()

with engine.begin() as conn:

    print("Building gold.accident_weather...")

    conn.execute(text("""
        INSERT INTO gold.accident_weather (
            accident_id,
            station_id,
            distance_km,
            obs_date,

            severity,
            start_time_utc,
            duration_minutes,
            latitude,
            longitude,
            state,

            tmax_c,
            tmin_c,
            prcp_mm,
            snow_mm,
            avg_wind_mph,

            precipitation_flag,
            snow_flag
        )
        SELECT
            a.accident_id,
            m.station_id,
            m.distance_km,
            DATE(a.start_time_utc) AS obs_date,

            a.severity,
            a.start_time_utc,
            a.duration_minutes,
            a.latitude,
            a.longitude,
            a.state,

            MAX(w.value) FILTER (WHERE w.element = 'TMAX') AS tmax_c,
            MAX(w.value) FILTER (WHERE w.element = 'TMIN') AS tmin_c,
            MAX(w.value) FILTER (WHERE w.element = 'PRCP') AS prcp_mm,
            MAX(w.value) FILTER (WHERE w.element = 'SNOW') AS snow_mm,
            MAX(w.value) FILTER (WHERE w.element = 'AWND') AS avg_wind_mph,

            (MAX(w.value) FILTER (WHERE w.element = 'PRCP') > 0) AS precipitation_flag,
            (MAX(w.value) FILTER (WHERE w.element = 'SNOW') > 0) AS snow_flag

        FROM silver.us_accidents a
        JOIN silver.accident_station_map m
            ON a.accident_id = m.accident_id
        JOIN silver.weather_daily w
            ON w.station_id = m.station_id
           AND w.obs_date = DATE(a.start_time_utc)

        GROUP BY
            a.accident_id,
            m.station_id,
            m.distance_km,
            DATE(a.start_time_utc),
            a.severity,
            a.start_time_utc,
            a.duration_minutes,
            a.latitude,
            a.longitude,
            a.state

        ON CONFLICT (accident_id) DO NOTHING;
    """))

elapsed = time.perf_counter() - start_time

with engine.connect() as conn:
    result = conn.execute(text("""
        SELECT COUNT(*) FROM gold.accident_weather;
    """))
    count = result.scalar()

print("\n‚úÖ gold.accident_weather built")
print(f"üì¶ Rows: {count:,}")
print(f"‚è±Ô∏è Time: {elapsed:.2f} seconds")
print(f"‚ö° Rows/sec: {count / elapsed:,.0f}")


Building gold.accident_weather...



‚úÖ gold.accident_weather built
üì¶ Rows: 370,459
‚è±Ô∏è Time: 140.22 seconds
‚ö° Rows/sec: 2,642
