# Init

In [0]:
import pyspark.sql.functions as f
from pyspark.sql.types import StringType
from pyspark.sql.functions import trim, col

# Read from Bronze Layer

In [0]:
df = spark.table("workspace.bronze.erp_loc_a101")

In [0]:
df.display()

# Data Transformations

## Trim columns

In [0]:
for field in df.schema.fields:
    if isinstance(field.dataType, StringType):
        df = df.withColumn(field.name, trim(col(field.name)))

In [0]:
df.display()

## Customer ID cleanup

In [0]:
df = df.withColumn("CID", f.regexp_replace(col("CID"), "-", ""))

In [0]:
df.display()

## Country normalization

In [0]:
df = df.withColumn(
    "CNTRY",
    f.when(col("CNTRY") == "DE", "Germany")
     .when(col("CNTRY").isin("US", "USA"), "United States")
     .when((col("CNTRY").isNull()) | (col("CNTRY") == ""), "N/A")
     .otherwise(col("CNTRY"))
)


In [0]:
display(df.select("CNTRY").distinct())

In [0]:
df.display()

## Rename columns

In [0]:
RENAME_MAP = {
    "CID": "customer_number",
    "CNTRY": "country"
}

for old_name, new_name in RENAME_MAP.items():
  df = df.withColumnRenamed(old_name, new_name)

In [0]:
df.display()

# Write into Silver Table

In [0]:
df.write.mode("overwrite").format("delta").saveAsTable("silver.erp_loc_a101")

In [0]:
%sql
SELECT * FROM workspace.silver.erp_loc_a101 LIMIT 10