In [5]:
#IMPORTS
import pandas as pd
import shutil
from sqlalchemy import create_engine
from pathlib import Path

In [6]:
# ----------------------------------
# Load CSV
# ----------------------------------
path = "/app/data/landing/stations/ghcnd-stations.csv"
df = pd.read_csv(path)

In [7]:
# Visually Verifying
df.head(2)

Unnamed: 0,station_id,latitude,longitude,elevation,state,name,gsn,hcn,wmo
0,ACW00011604,17.1167,-61.7833,10.1,,ST JOHNS COOLIDGE FLD,,,
1,ACW00011647,17.1333,-61.7833,19.2,,ST JOHNS,,,


In [8]:
# Rename to match Bronze
df = df.rename(columns={
    "ID": "station_id",
    "LATITUDE": "latitude",
    "LONGITUDE": "longitude",
    "ELEVATION": "elevation",
    "STATE": "state",
    "NAME": "name",
    "GSN": "gsn",
    "HCN": "hcn",
    "WMO": "wmo"
})




In [9]:
# Bronze-only metadata
df["source_file"] = Path(path).name

In [10]:
# ----------------------------------
# DB connection (Docker)
# ----------------------------------
engine = create_engine(
    "postgresql+psycopg2://postgres:postgres@postgres:5432/etl_db"
)

In [11]:
# ----------------------------------
# Write to Bronze
# ----------------------------------
df.to_sql(
    name="stations",
    schema="bronze",
    con=engine,
    if_exists="append",
    index=False,
    method="multi",
    chunksize=10_000
)

129657

In [12]:
# ----------------------------------
# Archive source file
# ----------------------------------
src = Path(path)
archive_dir = Path("/app/data/archive/stations")
archive_dir.mkdir(parents=True, exist_ok=True)

dst = archive_dir / src.name

shutil.move(src, dst)

print(f"[ARCHIVED] {src.name} → {dst}")

[ARCHIVED] ghcnd-stations.csv → /app/data/archive/stations/ghcnd-stations.csv


In [13]:
df_test = pd.read_sql(
    """
    SELECT *
    FROM bronze.stations
    order by station_id asc
    LIMIT 10;
    """,
    con=engine
)

df_test

Unnamed: 0,station_id,latitude,longitude,elevation,state,name,gsn,hcn,wmo,source_file,ingested_at
0,ACW00011604,17.1167,-61.7833,10.1,,ST JOHNS COOLIDGE FLD,,,,ghcnd-stations.csv,2026-02-06 14:42:55.281806+00:00
1,ACW00011604,17.1167,-61.7833,10.1,,ST JOHNS COOLIDGE FLD,,,,ghcnd-stations.csv,2026-02-08 21:42:22.304047+00:00
2,ACW00011604,17.1167,-61.7833,10.1,,ST JOHNS COOLIDGE FLD,,,,ghcnd-stations.csv,2026-02-10 02:28:01.814863+00:00
3,ACW00011647,17.1333,-61.7833,19.2,,ST JOHNS,,,,ghcnd-stations.csv,2026-02-06 14:42:55.281806+00:00
4,ACW00011647,17.1333,-61.7833,19.2,,ST JOHNS,,,,ghcnd-stations.csv,2026-02-08 21:42:22.304047+00:00
5,ACW00011647,17.1333,-61.7833,19.2,,ST JOHNS,,,,ghcnd-stations.csv,2026-02-10 02:28:01.814863+00:00
6,AE000041196,25.333,55.517,34.0,,SHARJAH INTER. AIRP,GSN,,41196.0,ghcnd-stations.csv,2026-02-06 14:42:55.281806+00:00
7,AE000041196,25.333,55.517,34.0,,SHARJAH INTER. AIRP,GSN,,41196.0,ghcnd-stations.csv,2026-02-08 21:42:22.304047+00:00
8,AE000041196,25.333,55.517,34.0,,SHARJAH INTER. AIRP,GSN,,41196.0,ghcnd-stations.csv,2026-02-10 02:28:01.814863+00:00
9,AEM00041194,25.255,55.364,10.4,,DUBAI INTL,,,41194.0,ghcnd-stations.csv,2026-02-06 14:42:55.281806+00:00
