In [0]:
# Cell 1: Setup
%load_ext autoreload
%autoreload 2

import sys
from config.table_config import PROJECT_PATH

if PROJECT_PATH not in sys.path:
    sys.path.append(PROJECT_PATH)

from config import *
from utils import SilverTransformation
from pyspark.sql import DataFrame
from pyspark.sql.functions import col, lit
from pyspark.sql.types import StringType, TimestampType

configure_spark(spark)

In [0]:
# Cell 2: Custom transform for legacy
def orders_legacy_transform(df: DataFrame) -> DataFrame:
    """
    Legacy orders transformation:
    - Ensure order_id is string
    - Ensure order_date is timestamp
    - Add source_system tag
    - Drop partition columns
    """
    return df \
        .withColumn("order_id", col("order_id").cast(StringType())) \
        .withColumn("order_date", col("order_date").cast(TimestampType())) \
        .withColumn("source_system", lit("legacy"))

In [0]:
# Cell 3: Run transformation
SilverTransformation(
    spark=spark,
    table_display_name="ORDERS_LEGACY",
    source_table=BRONZE_ORDERS_TABLE,
    target_table=SILVER_ORDERS_TABLE
).run(
    catalog_name=CATALOG_NAME,
    silver_schema=SILVER_SCHEMA,
    silver_container=SILVER_BASE,
    key_columns=["order_id"],
    drop_cols=["order_year", "order_month"],
    custom_transform=orders_legacy_transform,
    skip_bronze_dqa=False,
    dqa_sample_fraction=0.01,
    num_partitions=32
)