In [0]:
# Databricks notebook source
# MAGIC %md
# MAGIC # Customer 360 Profile Creation
# MAGIC This notebook loads data from Unity Catalog tables, processes it, and writes a comprehensive customer profile back to Unity Catalog.

# COMMAND ----------

import logging
from pyspark.sql.functions import col, when, datediff, current_date, count, avg, max, lit

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# COMMAND ----------

# Assume the Spark session is pre-initialized as 'spark'
# Load data from Unity Catalog tables
try:
    logger.info("Loading data from Unity Catalog tables...")
    policy_df = spark.table("genai_demo.jnj.policy")
    claims_df = spark.table("genai_demo.jnj.claims")
    demographics_df = spark.table("genai_demo.jnj.demographics")
    scores_df = spark.table("genai_demo.jnj.scores")
    aiml_insights_df = spark.table("genai_demo.jnj.aiml_insights")
    logger.info("Data loaded successfully.")
except Exception as e:
    logger.error(f"Error loading data: {e}")
    raise

# COMMAND ----------

# Select relevant fields from each dataset
try:
    logger.info("Selecting relevant fields from datasets...")
    demographics_selected = demographics_df.select(
        "Customer_ID", "Customer_Name", "Email", "Phone_Number", "Address", "City", "State", "Postal_Code",
        "Date_of_Birth", "Gender", "Marital_Status", "Occupation", "Income_Level", "Customer_Segment"
    )
    claims_selected = claims_df.select(
        "Claim_ID", "Policy_ID", "Claim_Date", "Claim_Type", "Claim_Status", "Claim_Amount", "Claim_Payout"
    )
    policy_selected = policy_df.select(
        "policy_id", "customer_id", "policy_type", "policy_status", "policy_start_date", "policy_end_date",
        "policy_term", "policy_premium", "total_premium_paid", "renewal_status", "policy_addons"
    )
    logger.info("Field selection completed.")
except Exception as e:
    logger.error(f"Error selecting fields: {e}")
    raise

# COMMAND ----------

# Perform joins to integrate datasets based on common identifiers
try:
    logger.info("Joining datasets...")
    joined_df = demographics_selected.join(
        policy_selected, demographics_selected.Customer_ID == policy_selected.customer_id, "inner"
    ).join(
        claims_selected, policy_selected.policy_id == claims_selected.Policy_ID, "inner"
    )
    logger.info("Datasets joined successfully.")
except Exception as e:
    logger.error(f"Error joining datasets: {e}")
    raise

# COMMAND ----------

# Compute aggregate metrics
try:
    logger.info("Computing aggregate metrics...")
    aggregated_df = joined_df.groupBy("Customer_ID").agg(
        count("Claim_ID").alias("Total_Claims"),
        count("policy_id").alias("Policy_Count"),
        max("Claim_Date").alias("Recent_Claim_Date"),
        avg("Claim_Amount").alias("Average_Claim_Amount")
    )
    logger.info("Aggregate metrics computed.")
except Exception as e:
    logger.error(f"Error computing aggregate metrics: {e}")
    raise

# COMMAND ----------

# Implement custom calculations
try:
    logger.info("Implementing custom calculations...")
    final_df = aggregated_df.withColumn(
        "Age", datediff(current_date(), col("Date_of_Birth")) / 365
    ).withColumn(
        "Claim_To_Premium_Ratio", when(col("total_premium_paid") != 0, col("Average_Claim_Amount") / col("total_premium_paid")).otherwise(0)
    ).withColumn(
        "Claims_Per_Policy", when(col("Policy_Count") != 0, col("Total_Claims") / col("Policy_Count")).otherwise(0)
    ).withColumn(
        "Retention_Rate", lit(0.85)
    ).withColumn(
        "Cross_Sell_Opportunities", lit("Multi-Policy Discount, Home Coverage Add-on")
    ).withColumn(
        "Upsell_Potential", lit("Premium Vehicle Coverage")
    )
    logger.info("Custom calculations implemented.")
except Exception as e:
    logger.error(f"Error implementing custom calculations: {e}")
    raise

# COMMAND ----------

# Combine all data sources into a single, comprehensive customer profile
try:
    logger.info("Combining all data sources into a comprehensive customer profile...")
    customer_360_df = final_df.join(scores_df, "Customer_ID", "inner").join(aiml_insights_df, "Customer_ID", "inner")
    logger.info("Data sources combined successfully.")
except Exception as e:
    logger.error(f"Error combining data sources: {e}")
    raise

# COMMAND ----------

# Write the final customer 360 profile to a Unity Catalog table
try:
    logger.info("Writing the final customer 360 profile to a Unity Catalog table...")
    customer_360_df.write.format("delta").mode("overwrite").saveAsTable("genai_demo.jnj.customer_360")
    logger.info("Customer 360 profile written to Unity Catalog table successfully.")
except Exception as e:
    logger.error(f"Error writing to Unity Catalog table: {e}")
    raise
