In [None]:
# ============================================================
# nb_silver_10_transactions
# ------------------------------------------------------------
# Rôle :
#   - Transformer bronze_transactions_raw -> silver_transactions
#   - Appliquer normalisation, typage, déduplication
#   - Ajouter traçabilité & audit
#   - Écrire la table Silver avec partitionnement mensuel
# ============================================================

# ------------------------------------------------------------
# 1) Import des utilitaires Silver
# ------------------------------------------------------------
%run ./nb_silver_utils

from pyspark.sql import functions as F

# ------------------------------------------------------------
# 2) Lecture de la source Bronze
# ------------------------------------------------------------
df_bronze = spark.table("bronze_transactions_raw")

# ------------------------------------------------------------
# 3) Contrôles structurels (fail fast)
# ------------------------------------------------------------
assert_required_columns(
    df_bronze,
    [
        "id",
        "date",
        "client_id",
        "card_id",
        "merchant_id",
        "amount",
        "mcc",
        "errors",
        "source_file",
        "ingestion_ts",
        "ingestion_date"
    ]
)

# ------------------------------------------------------------
# 4) Renommage des colonnes métier
# ------------------------------------------------------------
df = (
    df_bronze
    .withColumnRenamed("id", "transaction_id")
    .withColumnRenamed("date", "txn_ts")
    .withColumnRenamed("errors", "error_code")
)

# ------------------------------------------------------------
# 5) Normalisations Silver (via utils)
# ------------------------------------------------------------

# 5.1 Dates transactionnelles (jour + mois)
df = add_txn_dates(df, ts_col="txn_ts")

# 5.2 Montant financier (double -> decimal)
df = cast_amount(df, col="amount", precision=18, scale=2)

# 5.3 MCC normalisé (string 4 chars)
df = normalize_mcc(df, col="mcc")

# 5.4 Règle métier simple
df = df.withColumn("is_success", F.col("error_code") == 0)

# ------------------------------------------------------------
# 6) Colonnes techniques & traçabilité
# ------------------------------------------------------------
df = add_tech_columns(
    df,
    source_file_col="source_file"
)

# ------------------------------------------------------------
# 7) Hash métier (stabilité & audit)
# ------------------------------------------------------------
df = add_record_hash(
    df,
    cols=[
        "transaction_id",
        "client_id",
        "card_id",
        "txn_ts",
        "amount",
        "mcc_code"
    ]
)

# ------------------------------------------------------------
# 8) Déduplication Silver
# ------------------------------------------------------------
df = deduplicate_latest(
    df,
    key_cols=["transaction_id"],
    order_col="ingestion_ts"
)

# ------------------------------------------------------------
# 9) projection finale du contrat Silver
# ------------------------------------------------------------

df = df.select(
    # --- Clé & temps ---
    "transaction_id",
    "txn_ts",
    "txn_date",
    "txn_month",

    # --- Clés business ---
    "client_id",
    "card_id",
    "merchant_id",
    "mcc_code",

    # --- Mesures & attributs ---
    "amount",
    "use_chip",
    "merchant_city",
    "merchant_state",
    "zip",
    "error_code",
    "is_success",

    # --- Audit ---
    "source_file",
    "ingestion_date",
    "ingestion_ts",
    "record_hash"
)

# ------------------------------------------------------------
# 10) Écriture Silver gouvernée (partition mensuelle)
# ------------------------------------------------------------
write_silver_transactions(
    df,
    table_name="silver_transactions",
    mode="overwrite"   # explicite
)

# ------------------------------------------------------------
# 11) Fin du notebook
# ------------------------------------------------------------
print("silver_transactions successfully written.")
