In [0]:
from pyspark.sql.functions import col, substring

# 1. Ensure schema exists
spark.sql("CREATE DATABASE IF NOT EXISTS lakehouse.silver")

# 2. Read Bronze table
bronze_df = spark.table("lakehouse.bronze.erp_cust")

# 3. Clean data (equivalent Ã  DELETE WHERE CID IS NULL)
bronze_clean_df = bronze_df.filter(col("CID").isNotNull())

# 4. Transformations
silver_df = (
    bronze_clean_df
    .withColumn("Customer_id", col("CID").cast("string"))
    .withColumn("Date_birth", col("BDATE").cast("date"))
    .withColumn("CUST_ID", substring(col("CID"), 4, 100))
    .withColumn("Customer_Gender", col("GEN").cast("string"))
)

# 5. Write to Silver layer
(
    silver_df
    .write
    .mode("overwrite")
    .format("delta")
    .saveAsTable("lakehouse.silver.erp_custt")
)

print("Silver ERP customer table created successfully")
