In [None]:
# Databricks notebook source
# COMMAND ----------
import logging
from pyspark.sql import functions as F
from pyspark.sql.utils import AnalysisException

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# COMMAND ----------
# Step 1: Data Source Configuration
try:
    # Load data from Unity Catalog tables
    policy_df = spark.table("catalog.db.policy")
    claims_df = spark.table("catalog.db.claims")
    demographics_df = spark.table("catalog.db.demographics")
    scores_df = spark.table("catalog.db.scores")
    aiml_insights_df = spark.table("catalog.db.aiml_insights")
    logger.info("Data loaded successfully from Unity Catalog tables.")
except AnalysisException as e:
    logger.error(f"Error loading data from Unity Catalog: {e}")
    raise

# COMMAND ----------
# Step 2: Data Selection
try:
    selected_demographics_df = demographics_df.select(
        "Customer_ID", "Customer_Name", "Email", "Phone_Number", "Address", "City", "State", "Postal_Code",
        "Date_of_Birth", "Gender", "Marital_Status", "Occupation", "Income_Level", "Customer_Segment"
    )

    selected_claims_df = claims_df.select(
        "Claim_ID", "Policy_ID", "Claim_Date", "Claim_Type", "Claim_Status", "Claim_Amount", "Claim_Payout"
    )

    selected_policy_df = policy_df.select(
        "policy_id", "customer_id", "policy_type", "policy_status", "policy_start_date", "policy_end_date",
        "policy_term", "policy_premium", "total_premium_paid", "renewal_status", "policy_addons"
    )
    logger.info("Data selection completed.")
except Exception as e:
    logger.error(f"Error during data selection: {e}")
    raise

# COMMAND ----------
# Step 3: Data Integration
try:
    # Join demographics and policy data on Customer_ID
    joined_df = selected_demographics_df.join(
        selected_policy_df, selected_demographics_df.Customer_ID == selected_policy_df.customer_id, "inner"
    ).drop(selected_policy_df.customer_id)

    # Join the result with claims data on Policy_ID
    final_joined_df = joined_df.join(
        selected_claims_df, joined_df.policy_id == selected_claims_df.Policy_ID, "inner"
    ).drop(selected_claims_df.Policy_ID)
    logger.info("Data integration completed.")
except Exception as e:
    logger.error(f"Error during data integration: {e}")
    raise

# COMMAND ----------
# Step 4: Data Aggregation
try:
    aggregated_df = final_joined_df.groupBy("Customer_ID").agg(
        F.count("Claim_ID").alias("Total_Claims"),
        F.count("policy_id").alias("Policy_Count"),
        F.max("Claim_Date").alias("Recent_Claim_Date"),
        F.avg("Claim_Amount").alias("Average_Claim_Amount")
    )
    logger.info("Data aggregation completed.")
except Exception as e:
    logger.error(f"Error during data aggregation: {e}")
    raise

# COMMAND ----------
# Step 5: Custom Calculations
try:
    # Calculate Age
    final_joined_df = final_joined_df.withColumn("Age", F.datediff(F.current_date(), F.col("Date_of_Birth")) / 365)

    # Calculate Claim to Premium Ratio
    final_joined_df = final_joined_df.withColumn(
        "Claim_To_Premium_Ratio",
        F.when(F.col("total_premium_paid") != 0, F.col("Claim_Amount") / F.col("total_premium_paid")).otherwise(0)
    )

    # Calculate Claims Per Policy
    final_joined_df = final_joined_df.withColumn(
        "Claims_Per_Policy",
        F.when(F.col("Policy_Count") != 0, F.col("Total_Claims") / F.col("Policy_Count")).otherwise(0)
    )

    # Add static values for Retention Rate, Cross-Sell Opportunities, and Upsell Potential
    final_joined_df = final_joined_df.withColumn("Retention_Rate", F.lit(0.85))
    final_joined_df = final_joined_df.withColumn("Cross_Sell_Opportunities", F.lit("Multi-Policy Discount, Home Coverage Add-on"))
    final_joined_df = final_joined_df.withColumn("Upsell_Potential", F.lit("Premium Vehicle Coverage"))
    logger.info("Custom calculations completed.")
except Exception as e:
    logger.error(f"Error during custom calculations: {e}")
    raise

# COMMAND ----------
# Step 6: Comprehensive Data Assembly
try:
    # Join with AIML insights and scores data
    final_dataset = final_joined_df.join(aiml_insights_df, "Customer_ID", "inner").join(scores_df, "Customer_ID", "inner")

    # Select final fields for Customer 360 Output
    customer_360_df = final_dataset.select(
        "Customer_ID", "Customer_Name", "Email", "Phone_Number", "Address", "City", "State", "Postal_Code",
        "Date_of_Birth", "Gender", "Marital_Status", "Occupation", "Income_Level", "Customer_Segment",
        "policy_id", "policy_type", "policy_status", "policy_start_date", "policy_end_date", "policy_term",
        "policy_premium", "total_premium_paid", "renewal_status", "policy_addons", "Claim_ID", "Claim_Date",
        "Claim_Type", "Claim_Status", "Claim_Amount", "Claim_Payout", "Total_Claims", "Policy_Count",
        "Recent_Claim_Date", "Average_Claim_Amount", "Age", "Claim_To_Premium_Ratio", "Claims_Per_Policy",
        "Retention_Rate", "Cross_Sell_Opportunities", "Upsell_Potential", "Churn_Probability", "Next_Best_Offer",
        "Claims_Fraud_Probability", "Revenue_Potential", "Credit_Score", "Fraud_Score", "Customer_Risk_Score"
    )
    logger.info("Comprehensive data assembly completed.")
except Exception as e:
    logger.error(f"Error during comprehensive data assembly: {e}")
    raise

# COMMAND ----------
# Step 7: Output Data
try:
    customer_360_df.write.format("delta").mode("overwrite").saveAsTable("catalog.target_db.customer_360")
    logger.info("Customer 360 data successfully written to Unity Catalog table.")
except Exception as e:
    logger.error(f"Error writing Customer 360 data to Unity Catalog table: {e}")
    raise
