In [None]:
# ============================================================
# nb_silver_22_cards
# ============================================================

# %run ./nb_silver_utils
from pyspark.sql import functions as F

df_bronze = spark.table("bronze_card_raw")

assert_required_columns(
    df_bronze,
    ["id", "client_id", "card_brand", "card_type", "card_number",
     "expires", "cvv", "has_chip", "num_cards_issued", "credit_limit",
     "acct_open_date", "year_pin_last_changed", "card_on_dark_web",
     "source_file", "ingestion_ts", "ingestion_date"]
)

# Renaming
df = df_bronze.withColumnRenamed("id", "card_id")

# Normalisation texte
df = normalize_text(df, cols=["card_brand", "card_type"])

# Boolean parsing (has_chip / card_on_dark_web viennent en STRING) :contentReference[oaicite:13]{index=13}
df = parse_bool_yn(df, "has_chip")
df = parse_bool_yn(df, "card_on_dark_web")

# credit_limit -> DECIMAL(18,2)
df = cast_decimal(df, "credit_limit", precision=18, scale=2)

# Dates : acct_open_date (string) => DATE
df = parse_date_multi(df, "acct_open_date", formats=["yyyy-MM-dd", "MM/dd/yyyy", "M/d/yyyy"])

# expires : garder aussi en string, mais on crée une date normalisée (optionnelle)
# Hypothèse fréquente: "MM/YY"
df = df.withColumn("expires_raw", F.col("expires").cast("string"))
df = df.withColumn(
    "expires_month",
    F.to_date(F.concat(F.lit("01/"), F.col("expires_raw")), "dd/MM/yy")
)

# Colonnes techniques
df = add_tech_columns(df, source_file_col="source_file")

# Hash
hash_cols = [
    "card_id", "client_id", "card_brand", "card_type", "card_number",
    "expires_raw", "expires_month", "cvv", "has_chip", "num_cards_issued",
    "credit_limit", "acct_open_date", "year_pin_last_changed", "card_on_dark_web"
]
df = add_record_hash(df, cols=[c for c in hash_cols if c in df.columns])

# Dédup (clé naturelle = id -> card_id) :contentReference[oaicite:14]{index=14}
df = deduplicate_latest(df, key_cols=["card_id"], order_col="ingestion_ts")

# Projection Silver
df = df.select(
    "card_id",
    "client_id",
    "card_brand",
    "card_type",
    "card_number",
    "expires_raw",
    "expires_month",
    "cvv",
    "has_chip",
    "num_cards_issued",
    "credit_limit",
    "acct_open_date",
    "year_pin_last_changed",
    "card_on_dark_web",
    "source_file",
    "ingestion_date",
    "ingestion_ts",
    "record_hash"
)

# Fail fast : clé non nulle
if df.filter(F.col("card_id").isNull()).limit(1).count() > 0:
    raise ValueError("Null card_id detected in silver_cards.")

# Fail fast supplémentaire : client_id requis pour la jointure
if df.filter(F.col("client_id").isNull()).limit(1).count() > 0:
    raise ValueError("Null client_id detected in silver_cards (required for joins with users/transactions).")

write_silver_cards(df, table_name="silver_cards", mode="overwrite")
print("silver_cards successfully written.")

