In [5]:
import pandas as pd
from sqlalchemy import create_engine


In [None]:
# -----------------------------
# Config
# -----------------------------

csv_path = "/app/data/landing/accidents/US_Accidents_March23.csv"

engine = create_engine(
    "postgresql+psycopg2://postgres:postgres@postgres:5432/etl_db"
)

table_name = "us_accidents"
schema = "bronze"
chunk_size = 100_000

# -----------------------------
# Ingest (chunked)
# -----------------------------

chunk_iter = pd.read_csv(
    csv_path,
    chunksize=chunk_size,
    low_memory=False
)

total_rows = 0

for i, chunk in enumerate(chunk_iter, start=1):

    # Normalize column names to match bronze schema
    chunk.columns = (
        chunk.columns
        .str.strip()
        .str.lower()
        .str.replace("(", "", regex=False)
        .str.replace(")", "", regex=False)
        .str.replace("%", "pct", regex=False)
        .str.replace("/", "_", regex=False)
        .str.replace(" ", "_", regex=False)
    )

    # Insert chunk
    chunk.to_sql(
        table_name,
        engine,
        schema=schema,
        if_exists="append",
        index=False,
        method="multi"
    )

    rows = len(chunk)
    total_rows += rows

    print(f"Chunk {i}: inserted {rows:,} rows")

print(f"\nIngest complete â€” {total_rows:,} total rows inserted")


In [3]:
from sqlalchemy import create_engine, inspect

engine = create_engine(
    "postgresql+psycopg2://postgres:postgres@postgres:5432/etl_db"
)

inspector = inspect(engine)

columns = inspector.get_columns(
    "us_accidents",
    schema="bronze"
)

headers = [col["name"] for col in columns]

headers


['id',
 'source',
 'severity',
 'start_time',
 'end_time',
 'start_lat',
 'start_lng',
 'end_lat',
 'end_lng',
 'distance_mi',
 'description',
 'street',
 'city',
 'county',
 'state',
 'zipcode',
 'country',
 'timezone',
 'airport_code',
 'weather_timestamp',
 'temperature_f',
 'wind_chill_f',
 'humidity_pct',
 'pressure_in',
 'visibility_mi',
 'wind_direction',
 'wind_speed_mph',
 'precipitation_in',
 'weather_condition',
 'amenity',
 'bump',
 'crossing',
 'give_way',
 'junction',
 'no_exit',
 'railway',
 'roundabout',
 'station',
 'stop',
 'traffic_calming',
 'traffic_signal',
 'turning_loop',
 'sunrise_sunset',
 'civil_twilight',
 'nautical_twilight',
 'astronomical_twilight']