# Silver Layer: CRM Sales Details Transformation
Cleaning transactional sales data from `crm_sales_details`.
- **Framework**: `silver_engine` for metadata and schema consistency.
- **Transformations**: 
    - **Data Quality**: Validates and converts integer dates (yyyyMMdd) to DateType, handling '0' and invalid lengths.
    - **Price Correction**: Derives unit price from total sales and quantity where data is missing or zero.
- **Output**: Delta table `workspace.silver.crm_sales`.



In [0]:
%run ../../helpers/silver_engine.ipynb

In [0]:
%python
import pyspark.sql.functions as F
from pyspark.sql.functions import col, length

def logic(df):
    # 1. Date Transformation: Converting YYYYMMDD (Integer/String) to Date format
    # Invalid dates like 0 or incorrect lengths are treated as NULL to maintain data integrity
    df = df.withColumn("sls_order_dt", 
            F.when((col("sls_order_dt") == 0) | (length(col("sls_order_dt")) != 8), None)
             .otherwise(F.to_date(col("sls_order_dt").cast("string"), "yyyyMMdd"))) \
           .withColumn("sls_ship_dt", 
            F.when((col("sls_ship_dt") == 0) | (length(col("sls_ship_dt")) != 8), None)
             .otherwise(F.to_date(col("sls_ship_dt").cast("string"), "yyyyMMdd"))) \
           .withColumn("sls_due_dt", 
            F.when((col("sls_due_dt") == 0) | (length(col("sls_due_dt")) != 8), None)
             .otherwise(F.to_date(col("sls_due_dt").cast("string"), "yyyyMMdd")))

    # 2. Price Logic: If price is missing or zero, recalculate it from Sales and Quantity
    df = df.withColumn("sls_price", 
            F.when((col("sls_price").isNull()) | (col("sls_price") <= 0), 
                F.when(col("sls_quantity") != 0, col("sls_sales") / col("sls_quantity"))
                 .otherwise(None))
             .otherwise(col("sls_price")))

    # 3. Renaming & Final Selection: Transitioning to clean, SQL-friendly column names
    return df.withColumnRenamed("sls_ord_num", "order_number") \
             .withColumnRenamed("sls_prd_key", "product_number") \
             .withColumnRenamed("sls_cust_id", "customer_id") \
             .withColumnRenamed("sls_order_dt", "order_date") \
             .withColumnRenamed("sls_ship_dt", "ship_date") \
             .withColumnRenamed("sls_due_dt", "due_date") \
             .withColumnRenamed("sls_sales", "sales_amount") \
             .withColumnRenamed("sls_quantity", "quantity") \
             .withColumnRenamed("sls_price", "price") \
             .select(
                 "order_number", 
                 "product_number",
                 "customer_id", 
                 "order_date", 
                 "ship_date", 
                 "due_date", 
                 "sales_amount", 
                 "quantity", 
                 "price"
             )

# Executing the automated transformation pipeline
run_silver_pipeline("crm_sales_details", "crm_sales", logic)

In [0]:
%sql
-- Final sanity check on cleansed transactional data
SELECT * FROM workspace.silver.crm_sales;