# Silver Layer: CRM Products Transformation
Standardizing product master data from `crm_prd_info`.
- **Framework**: Centralized `silver_engine` handles universal string trimming.
- **Transformations**: 
    - Parses `prd_key` to extract `category_id` and `product_number`.
    - Handles missing costs by coalescing `NULL` values to `0`.
    - Renames legacy column names to business-friendly aliases.
- **Output**: Delta table `workspace.silver.crm_products`.

In [0]:
%run ../../helpers/silver_engine.ipynb

In [0]:
%python
from pyspark.sql.types import DateType
import pyspark.sql.functions as F

def logic(df):
    return (
        df
        # 1. Key Parsing: Extracting category and product identifiers
        .withColumn("category_id", F.regexp_replace(F.substring(F.col("prd_key"), 1, 5), "-", "_"))
        .withColumn("product_number", F.substring(F.col("prd_key"), 7, 20))
        
        # 2. Data Cleaning: Handling NULL costs and casting dates
        .withColumn("product_cost", F.coalesce(F.col("prd_cost"), F.lit(0)))
        .withColumn("start_date", F.col("prd_start_dt").cast(DateType()))
        .withColumn("end_date", F.col("prd_end_dt").cast(DateType()))
        
        # 3. Normalization: Converting product line codes to full names
        .withColumn("product_line", 
            F.when(F.upper(F.col("prd_line")) == "M", "Mountain")
             .when(F.upper(F.col("prd_line")) == "R", "Road")
             .when(F.upper(F.col("prd_line")) == "S", "Other Sales")
             .when(F.upper(F.col("prd_line")) == "T", "Touring")
             .otherwise("n/a") 
        )
        
        # 4. Renaming and Final Selection
        .withColumnRenamed("prd_id", "product_id")
        .withColumnRenamed("prd_nm", "product_name")
        .select(
            "product_id", 
            "category_id", 
            "product_number", 
            "product_name", 
            "product_cost", 
            "product_line", 
            "start_date", 
            "end_date"
        )
    )

# Executing the standardized silver pipeline
run_silver_pipeline("crm_prd_info", "crm_products", logic)

In [0]:
%sql
-- Quick check to verify the transformed product catalog
SELECT * FROM workspace.silver.crm_products;