In [0]:
# 04_alert_generation.py
from datetime import datetime, date
from pyspark.sql.functions import col, lit, current_timestamp
from pyspark.sql.types import StructType, StructField, BooleanType, TimestampType, IntegerType

def send_fraud_alert_email(alert_data, ml_anomalies_count):
    critical_count = alert_data.count()
    
    if critical_count > 0:
        subject = f"🚨 MediSure Fraud Alert: {critical_count} Critical Cases Detected"
        message = f"""
        <h3>Critical Fraud Alerts - {date.today()}</h3>
        <p>Number of critical alerts: <strong>{critical_count}</strong></p>
        <p>ML Anomalies Detected: <strong>{ml_anomalies_count}</strong></p>
        <h4>Top 5 Critical Cases:</h4>
        <table border='1'>
        <tr>
            <th>Claim ID</th>
            <th>Member</th>
            <th>Provider</th>
            <th>Amount</th>
            <th>Fraud Score</th>
            <th>Reason</th>
        </tr>
        """
        
        for row in alert_data.limit(5).collect():
            message += f"""
            <tr>
                <td>{row.claim_id}</td>
                <td>{row.first_name or 'Unknown'} {row.last_name or ''}</td>
                <td>{row.provider_name or 'Unknown Provider'}</td>
                <td>${float(row.claim_amount or 0.0):,.2f}</td>
                <td>{float(getattr(row, 'enhanced_fraud_score', 0.0)):.2f}</td>
                <td>{row.alert_reason or 'Unknown'}</td>
            </tr>
            """
        
        message += "</table>"
        
        print(f"Would send email with subject: {subject}")
        print(f"To: icon.montalbar@gmail.com")
        print(f"Body: {message}")
        
        return True
    return False

def main():
    print("="*60)
    print("ALERT GENERATION STARTED")
    print("="*60)
    
    # =====================================================
    # 1. Validate input and read decision
    # =====================================================
    try:
        decision_df = spark.table("medisure_jen.temp.fraud_decision")
        decision = decision_df.first() if decision_df.count() > 0 else None
        
        if not decision:
            print("No decision data found. Creating default no-fraud scenario.")
            # Create default decision
            schema = StructType([
                StructField("has_fraud", BooleanType(), True),
                StructField("critical_count", IntegerType(), True),
                StructField("ml_count", IntegerType(), True)
            ])
            decision_df = spark.createDataFrame([(False, 0, 0)], schema)
            decision = decision_df.first()
            
    except Exception as e:
        print(f"Error reading decision table: {e}. Assuming no fraud.")
        schema = StructType([
            StructField("has_fraud", BooleanType(), True),
            StructField("critical_count", IntegerType(), True),
            StructField("ml_count", IntegerType(), True)
        ])
        decision_df = spark.createDataFrame([(False, 0, 0)], schema)
        decision = decision_df.first()
    
    print(f"Decision: Fraud detected = {decision.has_fraud}")
    print(f"Critical alerts: {decision.critical_count}, ML anomalies: {decision.ml_count}")
    
    # =====================================================
    # 2. Handle FRAUD DETECTED path
    # =====================================================
    if decision and decision.has_fraud:
        print("🚨 FRAUD DETECTED - Generating alerts and compliance records")
        
        try:
            # Read detected alerts
            critical_alerts = spark.table("medisure_jen.temp.critical_alerts")
            ml_anomalies = spark.table("medisure_jen.temp.ml_anomalies")
            
            print(f"Processing {critical_alerts.count()} critical alerts")
            print(f"Processing {ml_anomalies.count()} ML anomalies")
            
            # Display sample for monitoring
            if critical_alerts.count() > 0:
                print("Sample critical alerts:")
                display(critical_alerts.select("claim_id", "claim_amount", "enhanced_fraud_score", "alert_severity").limit(5))
            
            # Send email alerts
            email_sent = send_fraud_alert_email(critical_alerts, ml_anomalies.count())
            
            # Create operational output
            operational_output = critical_alerts.withColumn("alert_type", lit("fraud_detected")) \
                                               .withColumn("processing_timestamp", current_timestamp())
            
            # Save operational data
            operational_output.write.mode("overwrite").saveAsTable("medisure_jen.temp.alert_true_fraud_detected")
            
            # 🚨 CRITICAL: Save restricted view for compliance team
            print("Creating fraud_alerts_restricted table for compliance...")
            spark.sql("""
            CREATE OR REPLACE TABLE medisure_jen.audit.fraud_alerts_restricted
            AS SELECT 
              claim_id, provider_id, claim_amount, alert_severity,
              enhanced_fraud_score, alert_reason,
              CASE 
                WHEN current_user() LIKE '%compliance%' THEN member_id
                ELSE 'REDACTED'
              END as member_id,
              alert_timestamp,
              current_timestamp() as restricted_view_created
            FROM medisure_jen.temp.critical_alerts
            """)
            print("✅ fraud_alerts_restricted table created successfully!")
            
            # 🚨 CRITICAL: Save ML anomalies for audit
            if ml_anomalies.count() > 0:
                print("Saving ML anomalies for audit...")
                ml_anomalies.write.mode("overwrite") \
                    .option("mergeSchema", "true") \
                    .saveAsTable("medisure_jen.audit.ml_anomalies_daily")
                print("✅ ML anomalies saved for audit")
            
        except Exception as e:
            print(f"Error processing fraud detected path: {e}")
            email_sent = False
    
    # =====================================================
    # 3. Handle NO FRAUD path
    # =====================================================
    else:
        print("✅ NO FRAUD DETECTED - Generating audit records for clean transactions")
        
        # Create audit record for clean day
        false_path_output = decision_df.withColumn("alert_type", lit("no_fraud_detected")) \
                                      .withColumn("processing_timestamp", current_timestamp())
        
        # Save audit data
        false_path_output.write.mode("overwrite").saveAsTable("medisure_jen.temp.alert_false_no_fraud")
        email_sent = False
    
    # =====================================================
    # 4. Save email status for audit
    # =====================================================
    email_status = spark.createDataFrame([(email_sent,)], ["email_sent"])
    email_status.write.mode("overwrite").saveAsTable("medisure_jen.temp.email_status")
    
    # =====================================================
    # 5. Final summary
    # =====================================================
    print("="*60)
    print("ALERT GENERATION COMPLETED")
    print("="*60)
    print(f"Fraud Detected: {decision.has_fraud if decision else False}")
    print(f"Email Sent: {email_sent}")
    print(f"Critical Alerts Processed: {decision.critical_count if decision else 0}")
    print(f"ML Anomalies Processed: {decision.ml_count if decision else 0}")
    
    if decision and decision.has_fraud:
        print("📋 Output: fraud_alerts_restricted, ml_anomalies_daily, alert_true_fraud_detected")
    else:
        print("📋 Output: alert_false_no_fraud (audit record)")
    print("="*60)

# Execute main function
if __name__ == "__main__":
    main()