# Initialization

In [0]:
%run "/Workspace/Users/amberasad0299@gmail.com/databricks_data_lakehouse_project/scripts/silver/silver_util"

In [0]:
import pyspark.sql.functions as F
from pyspark.sql.types import StringType

# Read Bronze Table

In [0]:
bronze_table_name = "crm_cust_info" 
df = read_bronze(spark, bronze_table_name)

# Silver Transformations

## Trimming

In [0]:
df = trim_all_strings(df)

## Normalization

In [0]:
df = (
    df
    .withColumn(
        "cst_marital_status",
        F.when(F.upper(F.col("cst_marital_status")) == "S", "Single")
         .when(F.upper(F.col("cst_marital_status")) == "M", "Married")
         .otherwise("n/a")
    )
    .withColumn(
        "cst_gndr",
        F.when(F.upper(F.col("cst_gndr")) == "F", "Female")
         .when(F.upper(F.col("cst_gndr")) == "M", "Male")
         .otherwise("n/a")
    )
)

## Remove Records with Missing Customer ID

In [0]:
df = df.filter(col("cst_id").isNotNull())

## Renaming Columns

In [0]:
RENAME_MAP = {
    "cst_id": "customer_id",
    "cst_key": "customer_number",
    "cst_firstname": "first_name",
    "cst_lastname": "last_name",
    "cst_marital_status": "marital_status",
    "cst_gndr": "gender",
    "cst_create_date": "created_date"
}

df = rename_columns(df, RENAME_MAP)

## Sanity Check before Writing

In [0]:
display(df.limit(10))

# Writing Silver Table

In [0]:
silver_table_name = "crm_customers" 
write_silver(df, silver_table_name)

## Sanity Check after Writing

In [0]:
%sql
SELECT * FROM workspace.silver.crm_customers LIMIT 10