# Transfromation Explorations for Suppliers

In [0]:
"""
Silver Layer - Suppliers
======================================================================
This script performs data transformation on the 'ap.bronze.suppliers' table.

"""

# ============================================================================
# DEPENDENCIES
# ============================================================================
from pyspark.sql.functions import *



# ============================================================================
# CONFIGURATION
# ============================================================================
INPUT_TABLE = "ap.bronze.suppliers"
OUTPUT_TABLE = "ap.silver.suppliers"
TEXT_FIELDS = ["supplier"]


# ============================================================================
# FUNCTIONS
# ============================================================================
def clean_text_fields(df: DataFrame, text_cols: list) -> DataFrame:
    """Clean text fields: trim spaces and title case."""
    for col_name in text_cols:
        df = df.withColumn(
            col_name,
            initcap(trim(col(col_name)))
        )
    return df

def drop_ingest_time(df: DataFrame) -> DataFrame:
    """Drop the 'ingest_time' column if it exists."""
    if "ingest_time" in df.columns:
        return df.drop("ingest_time")
    return df


# ============================================================================
# TABLE CREATION
# ============================================================================
# Execute the AP Invoices data transformation pipeline.

print(f"Loading source data from {INPUT_TABLE}...")
df = spark.table(INPUT_TABLE)

print("Cleaning text fields...")
df = clean_text_fields(df, TEXT_FIELDS)

print("Dropping 'ingest_time' column...")
df = drop_ingest_time(df)

print(f"Writing enriched data to {OUTPUT_TABLE}...")
df.write.format("delta").mode("overwrite").saveAsTable(OUTPUT_TABLE)

print("Transformation pipeline completed successfully!")
