In [0]:
# Databricks notebook source
# 03_orchestration_uc.py - KYC Gold Aggregation using Unity Catalog

from pyspark.sql.functions import (
    col,
    when,
    lit,
    to_date,
    concat_ws,
    sum as _sum,
    count,
    max as _max
)
from pyspark.sql import SparkSession

# -----------------------------------------------
# Step 1: Define catalog and schema
# -----------------------------------------------
dbutils.widgets.text("catalog", "governance_risk")
dbutils.widgets.text("schema", "kyc_project")
catalog = dbutils.widgets.get("catalog")
schema = dbutils.widgets.get("schema")

# -----------------------------------------------
# Step 2: Read Silver table from Unity Catalog
# -----------------------------------------------
silver_table = f"{catalog}.{schema}.client_transactions_risk"
tables = [row.tableName for row in spark.sql(f"SHOW TABLES IN {catalog}.{schema}").collect()]
if "client_transactions_risk" not in tables:
    raise Exception(f"Silver table '{silver_table}' not found. Run the processing notebook first.")

risk_df = spark.read.table(silver_table)

# -----------------------------------------------
# Step 3: Aggregate data
# -----------------------------------------------
def aggregate_by_client(risk_df):
    aggr_df = (
        risk_df.groupBy("client_id")
        .agg(
            count("*").alias("total_transactions"),
            _sum("transaction_amount").alias("total_amount"),
            _sum(when(col("risk_flag"), 1).otherwise(0)).alias("total_high_risk_transactions"),
            _max("risk_score").alias("max_risk_score"),
            _max("is_high_risk_country").alias("ever_high_risk_country")
        )
        .withColumn("high_risk_ratio", col("high_risk_transactions") / col("total_transactions"))
    )
    return aggr_df

aggr_df = aggregate_by_client(risk_df)

# -----------------------------------------------
# Step 4: Write Gold table to Unity Catalog
# -----------------------------------------------
aggr_df.write.format("delta") \
    .mode("overwrite") \
    .saveAsTable(f"{catalog}.{schema}.aggregated_client_risk")

print("✅ Aggregated Gold table 'aggregated_client_risk' successfully saved to Unity Catalog.")

# -----------------------------------------------
#  display results
# -----------------------------------------------
display(aggr_df)
