In [9]:
#IMPORTS
import pandas as pd
import shutil
from sqlalchemy import create_engine
from pathlib import Path

In [10]:
# ----------------------------------
# Load CSV
# ----------------------------------
path = "/app/data/landing/stations/ghcnd-stations.csv"
df = pd.read_csv(path)

In [11]:
# Visually Verifying
df.head(2)

Unnamed: 0,station_id,latitude,longitude,elevation,state,name,gsn,hcn,wmo
0,ACW00011604,17.1167,-61.7833,10.1,,ST JOHNS COOLIDGE FLD,,,
1,ACW00011647,17.1333,-61.7833,19.2,,ST JOHNS,,,


In [12]:
# Rename to match Bronze
df = df.rename(columns={
    "ID": "station_id",
    "LATITUDE": "latitude",
    "LONGITUDE": "longitude",
    "ELEVATION": "elevation",
    "STATE": "state",
    "NAME": "name",
    "GSN": "gsn",
    "HCN": "hcn",
    "WMO": "wmo"
})




In [13]:
# Bronze-only metadata
df["source_file"] = Path(path).name

In [14]:
# ----------------------------------
# DB connection (Docker)
# ----------------------------------
engine = create_engine(
    "postgresql+psycopg2://postgres:postgres@postgres:5432/etl_db"
)

In [None]:
# ----------------------------------
# Write to Bronze
# ----------------------------------
df.to_sql(
    name="stations",
    schema="bronze",
    con=engine,
    if_exists="append",
    index=False,
    method="multi",
    chunksize=10_000
)

In [15]:
# ----------------------------------
# Archive source file
# ----------------------------------
src = Path(path)
archive_dir = Path("/app/data/archive/stations")
archive_dir.mkdir(parents=True, exist_ok=True)

dst = archive_dir / src.name

shutil.move(src, dst)

print(f"[ARCHIVED] {src.name} → {dst}")

[ARCHIVED] ghcnd-stations.csv → /app/data/archive/stations/ghcnd-stations.csv


In [None]:
df_test = pd.read_sql(
    """
    SELECT *
    FROM bronze.stations
    order by station_id asc
    LIMIT 10;
    """,
    con=engine
)

df_test