**Initialization**

In [0]:
from pyspark.sql import functions as F
from pyspark.sql.types import StringType
from pyspark.sql.functions import col, trim,upper

**Read Bronze Table**

In [0]:

df = spark.read.table("workspace.bronze.crm_cust_info")



**Silver Transformations**

**Trim all string columns**

In [0]:
for field in df.schema.fields:
    if isinstance(field.dataType, StringType):
        df = df.withColumn(field.name, trim(col(field.name)))



**Normalization**

In [0]:
df = (
    df
   
    .withColumn(
        "cst_marital_status",
        F.when(upper(trim(col("cst_marital_status"))) == "S", "Single")
         .when(upper(trim(col("cst_marital_status"))) == "M", "Married")
         .otherwise("Unknown")
    )
   
    .withColumn(
        "cst_gndr",
        F.when(upper(trim(col("cst_gndr"))) == "F", "Female")
         .when(upper(trim(col("cst_gndr"))) == "M", "Male")
         .otherwise("Unknown")
    )
)



**Rename columns**

In [0]:
df = (
    df
    .withColumnRenamed("cst_id", "customer_id")
    .withColumnRenamed("cst_key", "customer_number")
    .withColumnRenamed("cst_firstname", "firstname")
    .withColumnRenamed("cst_lastname", "lastname")
    .withColumnRenamed("cst_marital_status", "marital_status")
    .withColumnRenamed("cst_gndr", "gender")
    .withColumnRenamed("cst_create_date", "created_date")
   
)



**Remove records with missing customer ID**


In [0]:
df = df.filter(col("customer_id").isNotNull())


**Remove Duplicates**

In [0]:
df = df.dropDuplicates(["customer_id"])


**Sanity checks of dataframe**

In [0]:
df.limit(20).display()


**Writing Silver Table**

In [0]:
df.write \
  .mode("overwrite") \
  .format("delta") \
  .saveAsTable("workspace.silver.crm_customers")


**Sanity checks of Silver Table**

In [0]:
%sql
SELECT * FROM workspace.silver.crm_customers;