In [1]:
import pandas as pd
from sqlalchemy import create_engine, text

engine = create_engine(
    "postgresql+psycopg2://postgres:postgres@postgres:5432/etl_db"
)

In [2]:
pd.read_sql(
    """
    SELECT *
    FROM bronze.stations
    where station_id is null
    """,
    con=engine
)


Unnamed: 0,station_id,latitude,longitude,elevation,state,name,gsn,hcn,wmo,ingested_at


In [3]:
with engine.begin() as conn:
    conn.execute(text("""
        INSERT INTO silver.stations (
            station_id,
            country_code,
            state,
            name,
            latitude,
            longitude,
            elevation_m,
            is_gsn,
            is_hcn,
            geom
        )
        SELECT DISTINCT
            station_id,
            SUBSTRING(station_id FROM 1 FOR 2) AS country_code,
            NULLIF(state, '') AS state,
            name,
            latitude::DOUBLE PRECISION AS latitude,
            longitude::DOUBLE PRECISION AS longitude,
            elevation::DOUBLE PRECISION AS elevation_m,
            (gsn = 'GSN') AS is_gsn,
            (hcn = 'HCN') AS is_hcn,

            -- Create spatial column
            ST_SetSRID(
                ST_MakePoint(
                    longitude::DOUBLE PRECISION,
                    latitude::DOUBLE PRECISION
                ),
                4326
            )::GEOGRAPHY

        FROM bronze.stations
        WHERE station_id LIKE 'US%'
          AND latitude IS NOT NULL
          AND longitude IS NOT NULL
        ON CONFLICT (station_id) DO NOTHING
    """))

In [4]:
query = """
SELECT
    *
FROM silver.stations
ORDER BY station_id
LIMIT 10;
"""

df_check = pd.read_sql(text(query), engine)
df_check

Unnamed: 0,station_id,country_code,state,name,latitude,longitude,elevation_m,is_gsn,is_hcn,geom,created_at,last_updated_at
0,US009052008,US,SD,SIOUX FALLS (ENVIRON. CANADA),43.7333,-96.6333,482.0,,,0101000020E6100000A323B9FC872858C012143FC6DCDD...,2026-02-11 14:29:38.278354+00:00,2026-02-11 14:29:38.278354+00:00
1,US10adam001,US,NE,JUNIATA 1.5 S,40.568,-98.5069,598.0,,,0101000020E6100000EA95B20C71A058C062105839B448...,2026-02-11 14:29:38.278354+00:00,2026-02-11 14:29:38.278354+00:00
2,US10adam002,US,NE,JUNIATA 6.0 SSW,40.5093,-98.5493,601.1,,,0101000020E610000057EC2FBB27A358C029ED0DBE3041...,2026-02-11 14:29:38.278354+00:00,2026-02-11 14:29:38.278354+00:00
3,US10adam003,US,NE,HOLSTEIN 0.1 NW,40.4663,-98.6537,615.1,,,0101000020E61000004B598638D6A958C0F90FE9B7AF3B...,2026-02-11 14:29:38.278354+00:00,2026-02-11 14:29:38.278354+00:00
4,US10adam004,US,NE,AYR 3.5 NE,40.4798,-98.4026,570.0,,,0101000020E6100000A857CA32C49958C0764F1E166A3D...,2026-02-11 14:29:38.278354+00:00,2026-02-11 14:29:38.278354+00:00
5,US10adam006,US,NE,ROSELAND 2.8 SW,40.4372,-98.5912,601.1,,,0101000020E61000004B598638D6A558C0D5E76A2BF637...,2026-02-11 14:29:38.278354+00:00,2026-02-11 14:29:38.278354+00:00
6,US10adam007,US,NE,HASTINGS 5.4 WSW,40.5389,-98.4713,588.9,,,0101000020E6100000B5A679C7299E58C03EE8D9ACFA44...,2026-02-11 14:29:38.278354+00:00,2026-02-11 14:29:38.278354+00:00
7,US10adam008,US,NE,GLENVIL 2.3 WSW,40.4953,-98.2973,566.9,,,0101000020E6100000744694F6069358C0BADA8AFD653F...,2026-02-11 14:29:38.278354+00:00,2026-02-11 14:29:38.278354+00:00
8,US10adam010,US,NE,JUNIATA 6.9 WSW,40.5532,-98.6297,622.1,,,0101000020E6100000A3923A014DA858C0D712F241CF46...,2026-02-11 14:29:38.278354+00:00,2026-02-11 14:29:38.278354+00:00
9,US10adam011,US,NE,ROSELAND 5.2 SW,40.4078,-98.6161,593.1,,,0101000020E610000033C4B12E6EA758C087A757CA3234...,2026-02-11 14:29:38.278354+00:00,2026-02-11 14:29:38.278354+00:00
