In [0]:
import dlt
from pyspark.sql.functions import (
    col, count, sum as _sum, avg, max as _max, datediff, lit, expr,
    when, concat_ws, lower
)

@dlt.table(
  name="guest_persona_summary",
  comment="Aggregated guest-level profile data",
  schema="gold"
)
def gold_guest_persona_summary():
    df = dlt.read("silver.retail_transactions_clean")
    linked_df = df.filter(col("MASTER_GUEST_ID").isNotNull())

    ref_date = linked_df.agg(_max("TXN_TIMESTAMP").alias("max_ts")).collect()[0]["max_ts"]

    def mode_expr(col_name):
        return expr(f"element_at(array_sort(aggregate(collect_list({col_name}), map(), "
                    f"(acc, x) -> map_concat(acc, map(x, coalesce(acc[x], 0) + 1)), "
                    f"acc -> acc)), -1).key")

    result = linked_df.groupBy("MASTER_GUEST_ID").agg(
        count("TRANSACTION_ID").alias("total_transactions"),
        _sum("EXTENDED_PRICE_USD").alias("total_spend_usd"),
        avg("DISCOUNT_PCT").alias("avg_discount_pct"),
        mode_expr("MMS_DEPT_NAME").alias("top_category"),
        mode_expr("MASTER_ORDER_ORIGIN").alias("top_channel"),
        mode_expr("SELLING_MARKET").alias("top_market"),
        mode_expr("COUPON_CODE").alias("top_coupon"),
        _max("TXN_TIMESTAMP").alias("last_txn_date")
    ).withColumn(
        "days_since_last_txn", datediff(lit(ref_date), col("last_txn_date"))
    )

    result = result \
        .withColumn("frequency",
            when(col("total_transactions") >= 10, "frequent")
            .when(col("total_transactions") >= 5, "regular")
            .otherwise("occasional")
        ).withColumn("spender",
            when(col("total_spend_usd") >= 500, "high spender")
            .when(col("total_spend_usd") >= 200, "mid-level spender")
            .otherwise("low spender")
        ).withColumn("deal_type",
            when(col("avg_discount_pct") >= 20, "deal-seeker")
            .when(col("avg_discount_pct") >= 5, "value-conscious")
            .otherwise("full-price shopper")
        ).withColumn("recency",
            when(col("days_since_last_txn") <= 30, "recent")
            .when(col("days_since_last_txn") <= 90, "somewhat active")
            .otherwise("lapsed")
        ).withColumn("coupon_phrase",
            when(col("top_coupon").isNotNull(),
                 concat_ws(" ", lit("frequently uses promo codes like"), col("top_coupon")))
            .otherwise(lit("rarely uses promotions"))
        ).withColumn("persona_sentence",
            concat_ws(" ",
                col("frequency"),
                lower(col("top_channel")), lit("shopper from"),
                col("top_market"), lit("focused on"),
                concat_ws("", col("top_category"), lit(", a")),
                col("spender"), lit("and"), concat_ws("", col("deal_type"), lit(",")),
                col("recency"), lit("guest who"), concat_ws("", col("coupon_phrase"), lit(",")),
                lit("with last purchase"), col("days_since_last_txn").cast("string"), lit("days ago.")
            )
        )

    return result
