# Init

In [0]:
import pyspark.sql.functions as f
from pyspark.sql.types import StringType
from pyspark.sql.functions import trim, col

# Read from Bronze Layer

In [0]:
df = spark.table("workspace.bronze.erp_px_cat_g1v2")

In [0]:
df.display()

# Data Transformations

## Trim columns


In [0]:
for field in df.schema.fields:
    if isinstance(field.dataType, StringType):
        df = df.withColumn(field.name, trim(col(field.name)))

In [0]:
df.display()

## Normalize maintenance to boolean


In [0]:
df = df.withColumn (
    "MAINTENANCE", 
    f.when(f.upper(col("MAINTENANCE")) == "YES", f.lit(True))
    .when(f.upper(col("MAINTENANCE")) == "NO", f.lit(False))
    .otherwise(None)
)

In [0]:
df.display()

## Rename columns


In [0]:
RENAME_MAP = {
    "ID": "category_id",
    "CAT": "category",
    "SUBCAT": "subcategory",
    "MAINTENANCE": "maintenance_flag"
}

for old_name, new_name in RENAME_MAP.items():
  df = df.withColumnRenamed(old_name, new_name)

In [0]:
df.display()

# Write into Silver Table


In [0]:
df.write.mode("overwrite").format("delta").saveAsTable("silver.erp_px_cat_g1v2")

In [0]:
%sql
SELECT * FROM workspace.silver.erp_px_cat_g1v2 LIMIT 10