In [0]:
from pyspark.sql.functions import col, when, round, current_timestamp, to_date
from pyspark.sql.types import DecimalType, LongType, IntegerType

BRONZE_TABLE = "workspace.retail.bronze_sales"
SILVER_TABLE = "workspace.retail.silver_sales"
QUARANTINE_TABLE = "workspace.retail.quarantine_sales"

def clean_and_enrich(df):
 
    df_enriched = df.withColumn("total_amount", round(col("quantity") * col("price"), 2).cast(DecimalType(18, 2))) \
                    .withColumn("customer_id", col("customer_id").cast(LongType())) \
                    .withColumn("price", col("price").cast(DecimalType(12,2))) \
                    .withColumn("quantity", col("quantity").cast(IntegerType())) \
                    .withColumn("invoice_date_only", to_date(col("invoice_date")))
    
    is_valid = (
        (col("quantity") > 0) & 
        (col("price") > 0) & 
        (col("customer_id").isNotNull()) &
        (col("description").isNotNull())
    )
    
    silver_df = df_enriched \
        .filter(is_valid) \
        .withColumn("silver_load_at", current_timestamp())

    quarantine_df = df_enriched.filter(~is_valid) \
        .withColumn("reason", 
            when(col("quantity") <= 0, "Negative Quantity")
            .when(col("price") <= 0, "Non-positive Price")
            .when(col("customer_id").isNull(), "Missing CustomerID")
            .otherwise("Other Data Quality Issue")
        )
    
    return silver_df, quarantine_df

def main():
    bronze_df = spark.table(BRONZE_TABLE)
    
    silver_df, quarantine_df = clean_and_enrich(bronze_df)
    
    silver_df.write \
        .format("delta") \
        .mode("overwrite") \
        .saveAsTable(SILVER_TABLE)
    
    quarantine_df.write \
        .format("delta") \
        .mode("overwrite") \
        .saveAsTable(QUARANTINE_TABLE)
    
if __name__ == "__main__":
    main()