In [None]:
# ============================================================
# nb_silver_fx
# ------------------------------------------------------------
# Rôle :
#   - Transformer bronze_fx_raw -> silver_fx_rates
#   - Appliquer normalisation, typage, déduplication
#   - Ajouter traçabilité & audit
#   - Écrire la table Silver avec partitionnement mensuel
# ============================================================

# ------------------------------------------------------------
# 0) Paramètres d'exécution (STANDARD ENTITY INTERFACE)
# ------------------------------------------------------------
# Dans Microsoft Fabric, déclarez ces paramètres dans l'UI du notebook
# (première cellule). Ils seront injectés comme variables Python :
#   - run_id
#   - entity_code
#   - load_mode
#   - as_of_date
#
# Defaults ci-dessous uniquement pour exécution manuelle (interactive).
from datetime import datetime, timezone
import time
import json

try:
    run_id
except NameError:
    run_id = f"manual-{datetime.now(timezone.utc).strftime('%Y%m%dT%H%M%SZ')}"
    entity_code = "fx_rates"   # doit matcher ctl_entity_silver.entity_code
    load_mode = "full"         # full|incremental
    as_of_date = ""            # YYYY-MM-DD ou vide

def _iso_utc(dt: datetime) -> str:
    return dt.astimezone(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")

# Timing (contract v1.0)
_t0 = time.time()
_started = datetime.now(timezone.utc)



In [None]:
# ------------------------------------------------------------
# 1) Import des utilitaires Silver
# ------------------------------------------------------------
# The command is not a standard IPython magic command. It is designed for use within Fabric notebooks only.

In [None]:
%run ./nb_silver_utils

In [None]:
from pyspark.sql import functions as F
import json

# ------------------------------------------------------------
# 2) Lecture de la source Bronze
# ------------------------------------------------------------
df_bronze = spark.table("bronze_fx_raw")


# ------------------------------------------------------------
# 2bis) Filtrage incremental (OPTIONNEL) + métrique row_in (STANDARD)
# ------------------------------------------------------------
# Pour FX daily rates: incremental minimal = filtrer sur la date demandée (as_of_date)
# - as_of_date attendu au format YYYY-MM-DD
if (load_mode or "").strip().lower() == "incremental" and (as_of_date or "").strip():
    df_bronze = df_bronze.where(F.col("date") == F.to_date(F.lit(as_of_date)))

# row_in = nombre de lignes lues depuis Bronze après filtre incremental
row_in = df_bronze.count()

# Contract normalization
as_of_date_norm = (as_of_date if (as_of_date or "").strip() else None)

# ------------------------------------------------------------
# 3) Contrôles structurels (fail fast)
# ------------------------------------------------------------
assert_required_columns(
    df_bronze,
    [
        "currency",
        "base_currency",
        "currency_name",
        "rate_vs_usd",
        "date",
        "source_file",
        "ingestion_ts",
        "ingestion_date"
    ]
)

# ------------------------------------------------------------
# 4) Renommage des colonnes métier
# ------------------------------------------------------------
df = (
    df_bronze
    .withColumnRenamed("date", "fx_date")
    .withColumnRenamed("rate_vs_usd", "rate")
)


# ------------------------------------------------------------
# 5) Normalisations Silver (via utils)
# ------------------------------------------------------------

# 5.1 Dates FX (jour + mois)
df = add_fx_dates(df, date_col="fx_date")

# 5.2 Normalisation des codes devise (ISO)
df = normalize_currency_codes(df, cols=["currency", "base_currency"])

# 5.3 Taux FX (double -> decimal)
df = cast_rate(df, col="rate", precision=18, scale=8)

# ------------------------------------------------------------
# 6) Colonnes techniques & traçabilité
# ------------------------------------------------------------
df = add_tech_columns(
    df,
    source_file_col="source_file"
)

# ------------------------------------------------------------
# 7) Hash métier (stabilité & audit)
# ------------------------------------------------------------
df = add_record_hash(
    df,
    cols=[
        "currency",
        "base_currency",
        "fx_date",
        "rate"
    ]
)


# ------------------------------------------------------------
# 8) Déduplication Silver
# ------------------------------------------------------------

# Dedup metrics: measure drops (contract v1.0)
_pre_dedup_count = df.count()

# Clé naturelle Bronze = (currency, date)
# On garde la forme robuste (base_currency, currency, fx_date)
df = deduplicate_latest(
    df,
    key_cols=["base_currency", "currency", "fx_date"],
    order_col="ingestion_ts"
)

_post_dedup_count = df.count()
dedup_dropped = int(_pre_dedup_count - _post_dedup_count)

# ------------------------------------------------------------
# 9) Projection finale du contrat Silver
# ------------------------------------------------------------
df = df.select(
    # --- Clés & temps ---
    "base_currency",
    "currency",
    "currency_name",
    "fx_date",
    "fx_month",

    # --- Mesure ---
    "rate",

    # --- Audit ---
    "source_file",
    "ingestion_date",
    "ingestion_ts",
    "record_hash"
)


# ------------------------------------------------------------
# 9bis) Contrôles gouvernance spécifiques FX (fail fast)
# ------------------------------------------------------------
fail_fast_checks = []

bad_base = df.filter(
    F.col("base_currency").isNull() |
    (F.col("base_currency") != F.lit("EUR"))
)

if bad_base.limit(1).count() > 0:
    raise ValueError(
        "Invalid base_currency detected: expected EUR (EUR-based FX dataset)."
    )

fail_fast_checks.append({"name": "base_currency_is_EUR", "passed": True})

# Natural keys not null (fail fast)
bad_keys = df.filter(
    F.col("base_currency").isNull() |
    F.col("currency").isNull() |
    F.col("fx_date").isNull()
)

if bad_keys.limit(1).count() > 0:
    raise ValueError("Natural key NULL detected in FX rates (base_currency, currency, fx_date).")

fail_fast_checks.append({"name": "natural_keys_not_null", "passed": True})

# ------------------------------------------------------------
# 10) Écriture Silver gouvernée (partition mensuelle)
# ------------------------------------------------------------

# --- métriques STANDARD (row_out / partition_count) ---
# row_out = nombre de lignes écrites (DF final, après dedupe/projection)
row_out = df.count()

# partition_count = nb partitions mensuelles réellement présentes (fx_month)
partition_count = df.select("fx_month").distinct().count()

write_silver_fx_rates(
    df,
    table_name="silver_fx_rates",
    mode="overwrite"
)

# ------------------------------------------------------------
# 11) Fin du notebook
# ------------------------------------------------------------
_ended = datetime.now(timezone.utc)
duration_ms = int((time.time() - _t0) * 1000)

# Runtime payload contract v1.0 (Files/governance/runtime/silver/entity_payload.json)
payload = {
    "contract_version": "1.0",
    "layer": "silver",
    "run_id": run_id,
    "entity_code": entity_code,
    "load_mode": load_mode,
    "as_of_date": as_of_date_norm,
    "status": "SUCCESS",
    "metrics": {
        "row_in": int(row_in) if row_in is not None else None,
        "row_out": int(row_out),
        "partition_count": int(partition_count) if partition_count is not None else 0,
        "dedup_dropped": int(dedup_dropped)
    },
    "table": {
        "target_table": "silver_fx_rates",
        "partition_cols": ["fx_month"]
    },
    "timing": {
        "started_utc": _iso_utc(_started),
        "ended_utc": _iso_utc(_ended),
        "duration_ms": duration_ms
    },
    "quality": {
        "fail_fast_checks": fail_fast_checks
    },
    "notes": {
        "message": None
    }
}

# IMPORTANT: nb_load_silver récupère ce JSON (mssparkutils.notebook.run -> string)
mssparkutils.notebook.exit(json.dumps(payload))
