In [None]:
# Databricks notebook source
# Customer 360 Data Processing
# This notebook processes customer data from Unity Catalog tables, performs data integration, aggregation, and custom calculations, and writes the results back to a Unity Catalog table.

# COMMAND ----------
import logging
from pyspark.sql import functions as F
from pyspark.sql.types import DoubleType, DateType
from pyspark.sql.utils import AnalysisException

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# COMMAND ----------
# Step 1: Data Selection and Filtering
# Load and select relevant fields from Unity Catalog tables.

try:
    logger.info("Loading and selecting relevant fields from Unity Catalog tables.")
    
    demographics_df = spark.table("genai_demo.jnj.demographics").select(
        "Customer_ID", "Customer_Name", "Email", "Phone_Number", "Address", "City", "State", 
        "Postal_Code", "Date_of_Birth", "Gender", "Marital_Status", "Occupation", 
        "Income_Level", "Customer_Segment"
    ).withColumn("Date_of_Birth", F.col("Date_of_Birth").cast(DateType()))
    
    claims_df = spark.table("genai_demo.jnj.claims").select(
        "Claim_ID", "Policy_ID", "Claim_Date", "Claim_Type", "Claim_Status", 
        "Claim_Amount", "Claim_Payout"
    ).withColumn("Claim_Date", F.col("Claim_Date").cast(DateType())) \
     .withColumn("Claim_Amount", F.col("Claim_Amount").cast(DoubleType()))
    
    policy_df = spark.table("genai_demo.jnj.policy").select(
        "policy_id", "customer_id", "policy_type", "policy_status", "policy_start_date", 
        "policy_end_date", "policy_term", "policy_premium", "total_premium_paid", 
        "renewal_status", "policy_addons"
    ).withColumn("total_premium_paid", F.col("total_premium_paid").cast(DoubleType()))

# COMMAND ----------
# Step 2: Data Integration
# Join datasets on common identifiers.

    logger.info("Joining datasets on common identifiers.")
    
    joined_df = demographics_df.join(
        policy_df, demographics_df.Customer_ID == policy_df.customer_id, "inner"
    ).drop(policy_df.customer_id)
    
    final_joined_df = joined_df.join(
        claims_df, joined_df.policy_id == claims_df.Policy_ID, "inner"
    ).drop(claims_df.Policy_ID)

# COMMAND ----------
# Step 3: Data Aggregation
# Aggregate data to calculate metrics.

    logger.info("Aggregating data to calculate metrics.")
    
    aggregated_df = final_joined_df.groupBy("Customer_ID").agg(
        F.count("Claim_ID").alias("Total_Claims"),
        F.count("policy_id").alias("Policy_Count"),
        F.max("Claim_Date").alias("Recent_Claim_Date"),
        F.avg("Claim_Amount").alias("Average_Claim_Amount")
    )

# COMMAND ----------
# Step 4: Custom Calculations
# Perform custom calculations on the aggregated data.

    logger.info("Performing custom calculations.")
    
    final_df = aggregated_df.withColumn(
        "Age", F.datediff(F.current_date(), F.col("Date_of_Birth")) / 365
    ).withColumn(
        "Claim_To_Premium_Ratio", 
        F.when(F.col("total_premium_paid") != 0, F.col("Average_Claim_Amount") / F.col("total_premium_paid")).otherwise(0)
    ).withColumn(
        "Claims_Per_Policy", 
        F.when(F.col("Policy_Count") != 0, F.col("Total_Claims") / F.col("Policy_Count")).otherwise(0)
    ).withColumn(
        "Retention_Rate", F.lit(0.85)
    ).withColumn(
        "Cross_Sell_Opportunities", F.lit("Multi-Policy Discount, Home Coverage Add-on")
    ).withColumn(
        "Upsell_Potential", F.lit("Premium Vehicle Coverage")
    )

# COMMAND ----------
# Step 5: Output Generation
# Write the final DataFrame to a Unity Catalog table.

    logger.info("Writing the final DataFrame to Unity Catalog table.")
    
    final_df.write.format("delta").mode("overwrite").saveAsTable("genai_demo.jnj.customer_360")

    logger.info("Data successfully written to Unity Catalog table.")

except AnalysisException as e:
    logger.error(f"AnalysisException occurred: {e}")
except Exception as e:
    logger.error(f"An unexpected error occurred: {e}")
