In [0]:
# dbutils.fs.rm(
#     "/Volumes/angad_kumar91/fraud_detection_raw_data_files/checkpoints/silver_features/",
#     recurse=True
# )


In [0]:
# %sql
# DROP TABLE IF EXISTS angad_kumar91.fraud_detection_silverlayer.silver_txn_features_5min;


In [0]:
from pyspark.sql.functions import col, window, count, avg, stddev, approx_count_distinct, lit

# =========================
# Read Silver Base as STREAM
# =========================
silver_base_stream = spark.readStream.table(
    "angad_kumar91.fraud_detection_silverlayer.silver_transactions_base"
)

# =========================
# Windowed aggregations
# =========================
silver_features_df = (
    silver_base_stream
        .groupBy(
            col("card1"),
            window(col("event_timestamp"), "5 minutes")
        )
        .agg(
            count("*").alias("txn_count_5min"),
            avg("TransactionAmt").alias("avg_amount_5min"),
            stddev("TransactionAmt").alias("stddev_amount_5min"),
            approx_count_distinct("ProductCD").alias("product_diversity_5min")
        )
        .select(
            col("card1"),
            col("window.start").alias("window_start"),
            col("window.end").alias("window_end"),
            lit("5min").alias("window_size"),
            "txn_count_5min",
            "avg_amount_5min",
            "stddev_amount_5min",
            "product_diversity_5min"
        )
)

# =========================
# Write Silver Features
# =========================
(
    silver_features_df.writeStream
        .format("delta")
        .outputMode("append")
        .option(
            "checkpointLocation",
            "/Volumes/angad_kumar91/fraud_detection_raw_data_files/checkpoints/silver_features/"
        )
        .trigger(availableNow=True)
        .table(
            "angad_kumar91.fraud_detection_silverlayer.silver_txn_features_5min"
        )
)


In [0]:
%sql
DESCRIBE TABLE angad_kumar91.fraud_detection_silverlayer.silver_transactions_base;


In [0]:
%sql
DESCRIBE TABLE angad_kumar91.fraud_detection_silverlayer.silver_txn_features_5min;
