In [0]:

from pyspark.sql.functions import (
    col, to_date, when, length
)

#1. Ensure schema exists
spark.sql("CREATE DATABASE IF NOT EXISTS lakehouse.silver")

# 2. Read Bronze table
bronze_df = spark.table("lakehouse.bronze.crm_sales")

# 3. Clean data (equivalent to DELETE WHERE sls_ord_num IS NULL)
bronze_clean_df = bronze_df.filter(col("sls_ord_num").isNotNull())

# 4. Transformations
silver_df = (
    bronze_clean_df
    .withColumn("sls_ord_num", col("sls_ord_num").cast("string"))
    .withColumn("sls_prd_key", col("sls_prd_key").cast("string"))
    .withColumn("sls_cust_id", col("sls_cust_id").cast("string"))

    # TRY_TO_DATE equivalent in PySpark
    .withColumn(
        "sls_order_dt",
        to_date(
            when(length(col("sls_order_dt")) == 8, col("sls_order_dt")),
            "yyyyMMdd"
        )
    )
    .withColumn(
        "sls_ship_dt",
        to_date(
            when(length(col("sls_ship_dt")) == 8, col("sls_ship_dt")),
            "yyyyMMdd"
        )
    )
    .withColumn(
        "sls_due_dt",
        to_date(
            when(length(col("sls_due_dt")) == 8, col("sls_due_dt")),
            "yyyyMMdd"
        )
    )

    .withColumn("sls_sales", col("sls_sales").cast("double"))
    .withColumn("sls_quantity", col("sls_quantity").cast("int"))
    .withColumn("sls_price", col("sls_price").cast("double"))
)

# 5. Write to Silver layer
(
    silver_df
    .write
    .mode("overwrite")
    .format("delta")
    .saveAsTable("lakehouse.silver.crm_sales")
)

print("Silver CRM sales table created successfully")
