In [1]:
import pandas as pd
import os
from sqlalchemy import create_engine
import logging
import time

logging.basicConfig(
    filename="logs/ingestion.log",
    level=logging.INFO,
    format="%(asctime)s - %(levelname)s - %(message)s",
    filemode="a"
)

engine = create_engine("sqlite:///inventory.db")

def ingest_db(df, table_name, engine, if_exists):
    df.to_sql(
        table_name,
        con=engine,
        if_exists=if_exists,
        index=False,
        chunksize=10_000,
        method="multi"
    )

def load_raw_data():
    start = time.time()

    for file in os.listdir():
        if file.endswith(".csv"):
            table_name = file[:-4]
            logging.info(f"Processing {file}")

            first_chunk = True
            for chunk in pd.read_csv(file, chunksize=50_000):
                ingest_db(
                    chunk,
                    table_name,
                    engine,
                    if_exists="replace" if first_chunk else "append"
                )
                first_chunk = False

    total_time = (time.time() - start) / 60
    logging.info("--------------- Ingestion Complete ---------------")
    logging.info(f"\nTotal time taken: {total_time:.2f} minutes")

if __name__ == "__main__":
    load_raw_data()