In [0]:
%load_ext autoreload
%autoreload 2

import sys
from config.table_config import PROJECT_PATH

if PROJECT_PATH not in sys.path:
    sys.path.append(PROJECT_PATH)

from config import *
from utils import *
from pyspark.sql import DataFrame
from pyspark.sql.functions import col, to_date, upper, trim, regexp_replace

configure_spark(spark)

log_info("=" * 60)
log_info("SILVER LAYER: Sellers Transformation")
log_info("=" * 60)
log_info(f"Source: {BRONZE_SELLERS_TABLE}")
log_info(f"Target: {SILVER_SELLERS_TABLE}")

In [0]:
def sellers_transform(df: DataFrame) -> DataFrame:
    """
    Sellers specific transformations:
    - Cast seller_rating to DoubleType
    - Cast seller_active_date to DateType
    - Standardize country/state to uppercase
    - Trim whitespace
    """
    return df \
        .withColumn("seller_rating", col("seller_rating").cast(DoubleType())) \
        .withColumn("seller_active_date", to_date(col("seller_active_date"))) \
        .withColumn("seller_country", upper(trim(col("seller_country")))) \
        .withColumn("seller_state", upper(trim(col("seller_state")))) \
        .withColumn("seller_address", trim(col("seller_address"))) \
        .withColumn("seller_company_name", trim(col("seller_company_name")))

In [0]:
SilverTransformation(
    spark=spark,
    table_display_name="SELLERS",
    source_table=BRONZE_SELLERS_TABLE,
    target_table=SILVER_SELLERS_TABLE
).run(
    catalog_name=CATALOG_NAME,
    silver_schema=SILVER_SCHEMA,
    silver_container=SILVER_BASE,
    key_columns=["seller_id"],
    drop_cols=None,
    partition_cols=["seller_country"],
    custom_transform=sellers_transform,
    num_partitions=16
)