In [0]:
import dlt
from pyspark.sql.functions import col, sha2, concat_ws

@dlt.table(
    name="silver_crypto_market",
    comment="Cleaned and modeled crypto dataset ready for analytics.",
    table_properties={"quality": "silver"},
)
@dlt.expect_or_drop("valid_price", "price_usd > 0")
@dlt.expect_or_drop("valid_id", "crypto_id IS NOT NULL")
@dlt.expect_or_drop("valid_timestamp", "snapshot_ts IS NOT NULL")
def silver_crypto_market():

    # Read Bronze table
    bronze_df = spark.table("crypto_lake.bronze.bronze_crypto_market")

    # Select only relevant columns
    selected_df = bronze_df.select(
        "id",
        "symbol",
        "name",
        "current_price",
        "market_cap",
        "market_cap_rank",
        "total_volume",
        "high_24h",
        "low_24h",
        "price_change_24h",
        "price_change_percentage_24h",
        "market_cap_change_percentage_24h",
        "circulating_supply",
        "max_supply",
        "ath",
        "ath_change_percentage",
        "ath_date",
        "last_updated",
        "ingestion_timestamp"
    )

    # Rename fields to follow naming + clarity standards
    rename_map = {
        "id": "crypto_id",
        "current_price": "price_usd",
        "total_volume": "volume_24h",
        "price_change_percentage_24h": "price_change_pct_24h",
        "market_cap_change_percentage_24h": "market_cap_pct_change_24h",
        "ath": "ath_usd",
        "ath_change_percentage": "pct_from_ath",
        "last_updated": "snapshot_ts",
        "ingestion_timestamp": "ingestion_ts"
    }

    for old, new in rename_map.items():
        selected_df = selected_df.withColumnRenamed(old, new)

    # Enforce correct datatypes
    type_casts = {
        "price_usd": "DECIMAL(18,2)",
        "market_cap": "BIGINT",
        "market_cap_rank": "INT",
        "volume_24h": "BIGINT",
        "high_24h": "DECIMAL(18,2)",
        "low_24h": "DECIMAL(18,2)",
        "price_change_24h": "DECIMAL(18,2)",
        "price_change_pct_24h": "DECIMAL(10,2)",
        "market_cap_pct_change_24h": "DECIMAL(10,2)",
        "circulating_supply": "DECIMAL(18,2)",
        "max_supply": "DECIMAL(18,2)",
        "ath_usd": "DECIMAL(18,2)",
        "pct_from_ath": "DECIMAL(10,2)",
        "ath_date": "TIMESTAMP",
        "snapshot_ts": "TIMESTAMP"
    }

    for column, dtype in type_casts.items():
        selected_df = selected_df.withColumn(column, col(column).cast(dtype))

    # Create surrogate unique identifier
    selected_df = selected_df.withColumn(
        "record_id",
        sha2(concat_ws("||", col("crypto_id"), col("snapshot_ts")), 256)
    )

    return selected_df





