In [None]:
import logging
import pandas as pd
from scripts import create_tables, insert_dataframe, with_db_session
from scripts import load_config, setup_logging, clean_data
from sqlalchemy.orm import Session
from orm_models import NCAAMTourneySeeds, Base

### Config

In [None]:
try:
    config = load_config()
    setup_logging(config["paths"]["log_path"])
    logging.info("Starting the data analysis project")
except Exception as e:
    logging.error(f"Initialization failed: {e}")
    raise

### Data Loading Pipeline

In [None]:
@with_db_session
def main_pipeline(session: Session):
    """Main ETL pipeline executed within a database session"""
    try:
        # 1. Create tables
        create_tables(session, [NCAAMTourneySeeds])

        # 2. Load and clean data
        raw_data = pd.read_csv("../data/raw/march_madness/MNCAATourneySeeds.csv")
        cleaned_data = clean_data(raw_data)

        # 3. Transformations
        cleaned_data = cleaned_data.reset_index().rename(columns={"index": "id"})
        cleaned_data["id"] += 1  # Adjust for 1-based indexing
        cleaned_data = cleaned_data.rename(columns={"team_i_d": "team_id"})
        cleaned_data["team_id"] = cleaned_data["team_id"].astype(int)

        # 4. Insert data
        insert_dataframe(session, cleaned_data, NCAAMTourneySeeds)

        # 5. Validation
        logging.info(f"Sample inserted data:\n{cleaned_data.head()}")

    except Exception as e:
        logging.error(f"Pipeline failed: {e}")
        raise

### Execute Pipeline

In [None]:
if __name__ == "__main__":
    main_pipeline()