**Bronze to Silver Transformation**

Basic transformation on tables include :
- Date transformation
- Check and remove duplicate records
- And removal of empty rows.

In [2]:
from pyspark.sql.functions import col, to_date

# Define Bronze path
bronze_path = "Files/bronze"

# Tables to process (schema: tables)
tables = {
    "Production": ["Product", "ProductCategory", "ProductSubcategory"],
    "Purchasing": ["PurchaseOrderHeader", "PurchaseOrderDetail", "ProductVendor", "Vendor"]
}

for schema, table_list in tables.items():
    for table in table_list:
        print(f"Processing {schema}.{table}...")

        # Load CSV from Bronze path
        df = spark.read.format("csv") \
            .options(header=True, inferSchema=True) \
            .load(f"{bronze_path}/{schema}/{table}.csv")

        # Drop empty and duplicate rows
        df_clean = df.dropna(how="all").dropDuplicates()

        # Apply date transformations where applicable
        if table == "PurchaseOrderHeader":
            df_clean = df_clean.withColumn("OrderDate", to_date(col("OrderDate"))) \
                               .withColumn("ShipDate", to_date(col("ShipDate")))

        if table == "PurchaseOrderDetail":
            df_clean = df_clean.withColumn("DueDate", to_date(col("DueDate")))

        # Compose table name for Silver layer
        final_table_name = f"silver_{schema.lower()}_{table.lower()}"

        # Save as managed Delta table
        df_clean.write.format("delta") \
            .mode("overwrite") \
            .saveAsTable(final_table_name)

        print(f"✅ Saved and registered: {final_table_name}")


StatementMeta(, 504a9b71-aa53-4f48-b48a-02b634f3355d, 4, Finished, Available, Finished)

Processing Production.Product...
✅ Saved and registered: silver_production_product
Processing Production.ProductCategory...
✅ Saved and registered: silver_production_productcategory
Processing Production.ProductSubcategory...
✅ Saved and registered: silver_production_productsubcategory
Processing Purchasing.PurchaseOrderHeader...
✅ Saved and registered: silver_purchasing_purchaseorderheader
Processing Purchasing.PurchaseOrderDetail...
✅ Saved and registered: silver_purchasing_purchaseorderdetail
Processing Purchasing.ProductVendor...
✅ Saved and registered: silver_purchasing_productvendor
Processing Purchasing.Vendor...
✅ Saved and registered: silver_purchasing_vendor
