In [1]:
from pathlib import Path
import psycopg2
import time
import shutil

In [2]:
path = "/app/data/landing/stations/ghcnd-stations.csv"

start_time = time.perf_counter()

conn = psycopg2.connect(
    dbname="etl_db",
    user="postgres",
    password="postgres",
    host="postgres",
    port=5432,
)

In [3]:
cur = conn.cursor()

with open(path, "r") as f:
    cur.copy_expert(
        """
        COPY bronze.stations (
            station_id,
            latitude,
            longitude,
            elevation,
            state,
            name,
            gsn,
            hcn,
            wmo
        )
        FROM STDIN
        WITH CSV HEADER
        """,
        f
    )

conn.commit()
cur.close()
conn.close()


# ----------------------------------
# Archive source file
# ----------------------------------
src = Path(path)
archive_dir = Path("/app/data/archive/stations")
archive_dir.mkdir(parents=True, exist_ok=True)

dst = archive_dir / src.name

if src.exists():
    shutil.move(src, dst)
    print(f"[ARCHIVED] {src.name} ‚Üí {dst}")
else:
    print(f"[WARN] Source file not found: {src}")


# ----------------------------------
# Print Elapsed Time
# ----------------------------------
elapsed = time.perf_counter() - start_time

print("‚úÖ Stations ingest complete (COPY)")
print(f"‚è±Ô∏è Time: {elapsed:.2f} seconds")

[ARCHIVED] ghcnd-stations.csv ‚Üí /app/data/archive/stations/ghcnd-stations.csv
‚úÖ Stations ingest complete (COPY)
‚è±Ô∏è Time: 0.25 seconds


In [4]:
# import pandas as pd
# from sqlalchemy import create_engine

# # ----------------------------------
# # Validation Check - Count bronze.stations ROWS   -- UNCOMMENT to use
# # ----------------------------------
# engine = create_engine(
#     "postgresql+psycopg2://postgres:postgres@postgres:5432/etl_db"
# )

# df_test = pd.read_sql(
#     """
#     SELECT COUNT(*) AS row_count
#     FROM bronze.stations
#     """,
#     con=engine
# )

# df_test


In [5]:
# # ----------------------------------
# # Trunucate For Testing -- UNCOMMENT to use
# # ----------------------------------

# from sqlalchemy import create_engine, text

# engine = create_engine(
#     "postgresql+psycopg2://postgres:postgres@postgres:5432/etl_db"
# )

# with engine.begin() as conn:
#     conn.execute(text("TRUNCATE TABLE bronze.stations"))

# print("üßπ bronze.stations truncated")
