In [5]:
import pandas as pd
from sqlalchemy import create_engine, text

In [6]:
engine = create_engine(
    "postgresql+psycopg2://postgres:postgres@postgres:5432/etl_db"
)

In [7]:
query = """
SELECT
    *
FROM bronze.weather_daily
"""

In [8]:
df = pd.read_sql(query, engine)
df.head()


In [9]:
ELEMENT_CONFIG = {
    # Temperature
    "TMAX": {"scale": 10.0, "unit": "celsius"},
    "TMIN": {"scale": 10.0, "unit": "celsius"},
    "TAVG": {"scale": 10.0, "unit": "celsius"},

    # Precipitation / snow
    "PRCP": {"scale": 10.0, "unit": "mm"},
    "SNOW": {"scale": 10.0, "unit": "mm"},
    "SNWD": {"scale": 1.0,  "unit": "mm"},

    # Wind (optional but reasonable)
    # "AWND": {"scale": 10.0, "unit": "m_per_s"},
    # "WSF2": {"scale": 10.0, "unit": "m_per_s"},
    # "WSF5": {"scale": 10.0, "unit": "m_per_s"},
}

In [10]:
#Scale values & assign units
def scale(row):
    cfg = ELEMENT_CONFIG.get(row["element"])
    if not cfg:
        return None, None
    return row["value"] / cfg["scale"], cfg["unit"]

In [11]:
df["unit"] = df["element"].map(
    lambda e: ELEMENT_CONFIG.get(e, {}).get("unit")
)

df["value"] = df["value"] / df["element"].map(
    lambda e: ELEMENT_CONFIG.get(e, {}).get("scale", 1)
)

# Drop rows where unit is null (i.e., element not in config)
df = df[df["unit"].notna()].copy()

In [12]:
#deduplicate
df = df.drop_duplicates(
    subset=["station_id", "obs_date", "element"]
)

In [13]:
df_silver = df[
    ["station_id", "obs_date", "element", "value", "unit"]
].copy()

In [14]:
with engine.begin() as conn:
    conn.execute(text("TRUNCATE TABLE silver.weather_daily"))


In [15]:
df_silver.to_sql(
    name="weather_daily",
    schema="silver",
    con=engine,
    if_exists="append",
    index=False,
    method="multi",
    chunksize=10_000
)


In [17]:
pd.read_sql(
    """
    SELECT
    *
    FROM silver.weather_daily
    where element != 'PRCP' and value > 0
    limit 100
    """,
    engine
)
