In [0]:
import dlt
from pyspark.sql.functions import *

# -----------------------------
# Silver Table: CRM - cust_info
# -----------------------------
@dlt.table(name="silver_crm_cust_info", comment="Cleaned CRM customer info")
def silver_crm_cust_info():
    df = dlt.read("bronze_crm_cust_info")
    return (
        df.filter(col("cst_id").isNotNull())
          .withColumn("cst_firstname", trim(col("cst_firstname")))
          .withColumn("cst_lastname", trim(col("cst_lastname")))
          .withColumn("cst_marital_status", when(upper(trim(col("cst_marital_status"))) == 'S', 'Single')
                                             .when(upper(trim(col("cst_marital_status"))) == 'M', 'Married')
                                             .otherwise('n/a'))
          .withColumn("cst_gndr", when(upper(trim(col("cst_gndr"))) == 'F', 'Female')
                                  .when(upper(trim(col("cst_gndr"))) == 'M', 'Male')
                                  .otherwise('n/a'))
    )

# -----------------------------
# Silver Table: CRM - prd_info
# -----------------------------
@dlt.table(name="silver_crm_prd_info", comment="Cleaned CRM product info")
def silver_crm_prd_info():
    df = dlt.read("bronze_crm_prd_info")
    return (
        df.withColumn("cat_id", regexp_replace(substring(col("prd_key"), 1, 5), "-", "_"))
          .withColumn("prd_key", substring(col("prd_key"), 7, length(col("prd_key"))))
          .withColumn("prd_cost", coalesce(col("prd_cost"), lit(0)))
          .withColumn("prd_line", when(upper(trim(col("prd_line"))) == 'M', 'Mountain')
                                  .when(upper(trim(col("prd_line"))) == 'R', 'Road')
                                  .when(upper(trim(col("prd_line"))) == 'S', 'Other Sales')
                                  .when(upper(trim(col("prd_line"))) == 'T', 'Touring')
                                  .otherwise('n/a'))
    )

# -----------------------------
# Silver Table: CRM - sales_details
# -----------------------------
@dlt.table(name="silver_crm_sales_details", comment="Cleaned CRM sales details")
def silver_crm_sales_details():
    df = dlt.read("bronze_crm_sales_details")
    return df

# -----------------------------
# Silver Table: ERP - cust_az12
# -----------------------------
@dlt.table(name="silver_erp_cust_az12", comment="Cleaned ERP customer az12")
def silver_erp_cust_az12():
    df = dlt.read("bronze_erp_cust_az12")
    return df.withColumnRenamed("CID", "cid")

# -----------------------------
# Silver Table: ERP - loc_a101
# -----------------------------
@dlt.table(name="silver_erp_loc_a101", comment="Cleaned ERP location a101")
def silver_erp_loc_a101():
    df = dlt.read("bronze_erp_loc_a101")
    return df.withColumnRenamed("CID", "cid")

# -----------------------------
# Silver Table: ERP - px_cat_g1v2
# -----------------------------
@dlt.table(name="silver_erp_px_cat_g1v2", comment="Cleaned ERP px cat g1v2")
def silver_erp_px_cat_g1v2():
    df = dlt.read("bronze_erp_px_cat_g1v2")
    return df
