In [0]:
from pyspark.sql.functions import col, when, log1p

# Read Bronze stream
bronze_df = spark.readStream.table(
    "angad_kumar91.fraud_detection_bronzelayer.stream_bronze_data"
)

# Type casting & basic cleaning
silver_base_df = (
    bronze_df
        .withColumn("TransactionID", col("TransactionID").cast("long"))
        .withColumn("TransactionDT", col("TransactionDT").cast("long"))
        .withColumn("TransactionAmt", col("TransactionAmt").cast("double"))
        .withColumn("isFraud", col("isFraud").cast("int"))
        .withColumn("card1", col("card1").cast("int"))
        .withColumn("event_timestamp", col("event_timestamp").cast("timestamp"))

        # Data quality
        .filter(col("TransactionAmt").isNotNull())

        # Simple features (row-level only)
        .withColumn("is_high_value_txn", when(col("TransactionAmt") > 1000, 1).otherwise(0))
        .withColumn("log_transaction_amount", log1p(col("TransactionAmt")))

        # ONE watermark only
        .withWatermark("event_timestamp", "10 minutes")
)

# Write Silver Base
(
    silver_base_df.writeStream
        .format("delta")
        .outputMode("append")
        .option(
            "checkpointLocation",
            "/Volumes/angad_kumar91/fraud_detection_raw_data_files/checkpoints/silver_base/"
        )
        .trigger(availableNow=True)
        .table(
            "angad_kumar91.fraud_detection_silverlayer.silver_transactions_base"
        )
)
