# Init

In [0]:
import pyspark.sql.functions as f
from pyspark.sql.types import StringType
from pyspark.sql.functions import trim, col

# Read from Bronze Layer


In [0]:
df = spark.table("workspace.bronze.erp_cust_az12")

# Data Transformations


In [0]:
df.display()

# Trim columns

In [0]:
for field in df.schema.fields:
    if isinstance(field.dataType, StringType):
        df = df.withColumn(field.name, trim(col(field.name)))

## Remove NAS prefix from CID

In [0]:
df = (
    df.withColumn(
        "CID",
        f.when(col("CID").startswith("NAS"),
               f.substring(col("CID"), 4, f.length(col("CID")))
            ).otherwise(col("CID")
        )
    )
)

In [0]:
df.display()

## Birthdate validation

In [0]:
df = (
    df.withColumn(
        "BDATE",
        f.when(col("BDATE") > f.current_date(),
               None
            ).otherwise(col("BDATE")
        )
    )
)

In [0]:
df.display()

## Rename columns


In [0]:
RENAME_MAP = {
    "CID": "customer_number",
    "BDATE": "birth_date",
    "GEN": "gender"
}

for old_name, new_name in RENAME_MAP.items():
  df = df.withColumnRenamed(old_name, new_name)

In [0]:
df.display()

# Write into Silver Table


In [0]:
df.write.mode("overwrite").format("delta").saveAsTable("silver.erp_cust_az12")

In [0]:
%sql
SELECT * FROM workspace.silver.erp_cust_az12 LIMIT 10