### Utils File Setup  

- Created a separate **utils.py** file to store all reusable functions.  
- Implemented a function to convert **camelCase / PascalCase** column names into **snake_case** dynamically.  
- All transformation logic (column renaming, UDFs, helper functions) is imported from this utils file.  
- This keeps the notebook clean and ensures reusability across Customer, Product, Store, and Sales transformations.  


In [0]:
import re
from pyspark.sql import DataFrame

#Converting from camelCase or PascalCase to snake_case
def to_snake_case(name: str) -> str:
    name = re.sub(r"([a-z0-9])([A-Z])", r"\1_\2", name) 
    name = re.sub(r"([A-Z]+)([A-Z][a-z])", r"\1_\2", name)  
    return name.lower()

#Cleaning and renaming all column names to snake_case using df.toDF()
def clean_and_snake_case_columns(df: DataFrame) -> DataFrame:
    cleaned_cols = []
    for col_name in df.columns:
        cleaned_name = re.sub(r"[ (){};\n\t=]", "", col_name).strip().replace(" ", "_")
        snake_case_name = to_snake_case(cleaned_name)
        cleaned_cols.append(snake_case_name)
    return df.toDF(*cleaned_cols)

#Reading delta table and cleaning and snake_case the column names
def read_delta_with_snake_case(spark, path: str) -> DataFrame:
    df = spark.read.format("delta").load(path)
    return clean_and_snake_case_columns(df)

# Joining the store and product on store_id
def get_store_product_data(product_df: DataFrame, store_df: DataFrame) -> DataFrame:
    return store_df.join(product_df, on="store_id", how="inner")

# Joining the sales with enriched store-product data on product_id
def enrich_sales_with_store_product(sales_df: DataFrame, store_product_df: DataFrame) -> DataFrame:
    return sales_df.join(store_product_df, on="product_id", how="inner")