#Read dataset

In [0]:
df = spark.table("workspace.bronze.crm_cust")
df.display()

#Transformations

##Remove unwanted spaces

In [0]:
import pyspark.sql.functions as F
from pyspark.sql.functions import trim
from pyspark.sql.functions import col
from pyspark.sql.types import StringType

In [0]:
for field in df.schema.fields:
  if isinstance(field.dataType, StringType):
    df.withColumn(field.name, trim(col(field.name)))

##Normalization

In [0]:
df= df.withColumn(
    "cst_marital_status",
    F.when(F.upper(F.col("cst_marital_status")) == "M", "Married")
    .when(F.upper(F.col("cst_marital_status")) == "S", "Single")
    .otherwise("n/a")
)\
    .withColumn(
        "cst_gndr",
        F.when(F.upper(F.col("cst_gndr")) == "M", "Male")
        .when(F.upper(F.col("cst_gndr")) == "F", "Female")
        .otherwise("n/a")
    )
df.display()



##Remove Records with missing cst_id

In [0]:
df = df.filter(col("cst_id").isNotNull())
df.display()


##rename column

In [0]:
df = df.withColumnRenamed("cst_id", "customer_id")\
    .withColumnRenamed("cst_key", "customer_number")\
    .withColumnRenamed("cst_firstname", "first_name")\
    .withColumnRenamed("cst_lastname", "last_name")\
    .withColumnRenamed("cst_marital_status", "marital_status")\
    .withColumnRenamed("cst_gndr", "gender")\
    .withColumnRenamed("cst_create_date", "create_date")
df.display()


#Writing silver table

In [0]:
df.write.mode("overwrite").saveAsTable("workspace.silver.crm_cust")

##checks dataset of silver table

In [0]:
%sql
select*from workspace.silver.crm_cust