# Init

In [0]:
import pyspark.sql.functions as f
from pyspark.sql.types import StringType
from pyspark.sql.functions import trim, col

# Read from Bronze Layer

In [0]:
df = spark.table("workspace.bronze.crm_cust_info")

# Data Transformations 


In [0]:
df.display()

## Trim strings

In [0]:

for field in df.schema.fields:
    if isinstance(field.dataType, StringType): 
        df = df.withColumn(field.name, trim(col(field.name)))

In [0]:
df.display()

## Normalization

In [0]:
df = (
    df
    .withColumn(
        "cst_marital_status",
        f.when(f.upper(f.col("cst_marital_status")) == "S", "Single")
         .when(f.upper(f.col("cst_marital_status")) == "M", "Married")
         .otherwise("N/A")
    )
    .withColumn(
        "cst_gndr",
        f.when(f.upper(f.col("cst_gndr")) == "F", "Female")
         .when(f.upper(f.col("cst_gndr")) == "M", "Male")
         .otherwise("N/A")
    )
)


In [0]:
df.display()

## Remove records with missing customer ID

In [0]:
df = df.filter(col("cst_id").isNotNull())

## Rename columns

In [0]:
RENAME_MAP = {
    "cst_id": "customer_id",
    "cst_key": "customer_key",
    "cst_firstname": "first_name",
    "cst_lastname": "last_name",
    "cst_marital_status": "marital_status",
    "cst_gndr": "gender",
    "cst_create_date": "created_date"
}

for old_name, new_name in RENAME_MAP.items():
  df = df.withColumnRenamed(old_name, new_name)

In [0]:
df.limit(10).display()

# Write into Silver table

In [0]:
df.write.mode("overwrite").format("delta").saveAsTable("silver.crm_customers")

In [0]:

%sql
SELECT * FROM workspace.silver.crm_customers LIMIT 10