In [0]:
#03_fraud_conditional_routing notebook
# Detection Flagging and Conditional Routing
from pyspark.sql.functions import col, lit
from pyspark.sql.types import StructType, StructField, BooleanType, IntegerType, TimestampType, StringType, FloatType
from datetime import datetime

def main():
    # =====================================================
    # 1. Validate input tables exist and have data
    # =====================================================
    print("Starting fraud conditional routing...")
    
    # Check if temp VIEWS exist (not tables)
    critical_alerts_exists = spark.catalog.tableExists("critical_alerts_temp")
    ml_anomalies_exists = spark.catalog.tableExists("ml_anomalies_temp")
    
    # =====================================================
    # 2. Read and count alerts with validation
    # =====================================================
    try:
        if critical_alerts_exists:
            critical_alerts = spark.table("critical_alerts_temp")
            critical_count = critical_alerts.count()
        else:
            print("No critical alerts found - creating empty dataframe")
            schema = StructType([
                StructField("claim_id", StringType(), True),
                StructField("enhanced_fraud_score", FloatType(), True),
                StructField("alert_severity", StringType(), True),
                StructField("claim_amount", FloatType(), True)
            ])
            critical_alerts = spark.createDataFrame([], schema)
            critical_count = 0
        
        print(f"Critical alerts analysis:")
        if critical_count > 0:
            display(critical_alerts.select("claim_id", "enhanced_fraud_score", "alert_severity", "claim_amount").limit(5))
        else:
            print("No critical alerts detected")
            
    except Exception as e:
        print(f"Error reading critical alerts: {e}")
        critical_count = 0
        critical_alerts = None
    
    try:
        if ml_anomalies_exists:
            ml_anomalies = spark.table("ml_anomalies_temp")
            ml_count = ml_anomalies.count()
        else:
            print("No ML anomalies found - creating empty dataframe")
            schema = StructType([
                StructField("claim_id", StringType(), True),
                StructField("prediction", IntegerType(), True),
                StructField("claim_amount", FloatType(), True),
                StructField("fraud_risk_score", FloatType(), True)
            ])
            ml_anomalies = spark.createDataFrame([], schema)
            ml_count = 0
        
        print(f"ML anomalies analysis:")
        if ml_count > 0:
            display(ml_anomalies.select("claim_id", "prediction", "claim_amount", "fraud_risk_score").limit(5))
        else:
            print("No ML anomalies detected")
            
    except Exception as e:
        print(f"Error reading ML anomalies: {e}")
        ml_count = 0
        ml_anomalies = None
    
    # =====================================================
    # 3. Decision Logic with detailed analysis
    # =====================================================
    has_fraud = critical_count > 0 or ml_count > 0
    
    # Additional analysis for better decision context
    high_confidence_fraud = critical_count > 0 and critical_count >= 3
    ml_only_fraud = ml_count > 0 and critical_count == 0
    
    print("="*60)
    print("FRAUD DECISION ANALYSIS")
    print("="*60)
    print(f"Critical Alerts Count: {critical_count}")
    print(f"ML Anomalies Count: {ml_count}")
    print(f"Any Fraud Detected: {has_fraud}")
    print(f"High Confidence Fraud (3+ critical): {high_confidence_fraud}")
    print(f"ML-Only Fraud Detection: {ml_only_fraud}")
    print("="*60)
    
    # =====================================================
    # 4. Save decision as temporary view (not table)
    # =====================================================
    decision_schema = StructType([
        StructField("has_fraud", BooleanType(), True),
        StructField("critical_count", IntegerType(), True),
        StructField("ml_count", IntegerType(), True),
        StructField("high_confidence", BooleanType(), True),
        StructField("ml_only", BooleanType(), True),
        StructField("total_alerts", IntegerType(), True),
        StructField("decision_timestamp", TimestampType(), True),
        StructField("routing_path", StringType(), True)
    ])
    
    routing_path = "FRAUD_DETECTED" if has_fraud else "NO_FRAUD"
    
    decision_data = [(
        has_fraud,
        critical_count,
        ml_count,
        high_confidence_fraud,
        ml_only_fraud,
        critical_count + ml_count,
        datetime.now(),
        routing_path
    )]
    
    decision_df = spark.createDataFrame(decision_data, decision_schema)
    
    # Save decision as temporary view for downstream tasks
    decision_df.createOrReplaceTempView("fraud_decision_temp")
    print("✅ Decision saved as temporary view: fraud_decision_temp")
    
    # =====================================================
    # 5. Additional monitoring: Alert distribution
    # =====================================================
    try:
        if critical_count > 0 and critical_alerts:
            alert_distribution = critical_alerts.groupBy("alert_severity") \
                .agg(
                    lit("count").alias("count"),
                    col("enhanced_fraud_score").avg().alias("avg_score"),
                    col("claim_amount").sum().alias("total_amount")
                )
            
            print("Alert severity distribution:")
            display(alert_distribution)
            
    except Exception as e:
        print(f"Alert distribution analysis skipped: {e}")
    
    # =====================================================
    # 6. Final summary and routing recommendation
    # =====================================================
    print("="*60)
    print("ROUTING DECISION COMPLETE")
    print("="*60)
    print(f"Fraud Detection Result: {'FRAUD DETECTED' if has_fraud else '✅ NO FRAUD'}")
    print(f"Critical Alerts: {critical_count}")
    print(f"ML Anomalies: {ml_count}")
    print(f"Routing Path: {routing_path}")
    
    if has_fraud:
        print("Next: Proceed to alert generation and compliance reporting")
    else:
        print("Next: Proceed to audit logging and clean transaction processing")
    print("="*60)
    
    return has_fraud

# Execute main function
if __name__ == "__main__":
    result = main()
    dbutils.jobs.taskValues.set("has_fraud", "true" if result else "false")
    dbutils.notebook.exit(result)