In [None]:
# Databricks notebook source
# COMMAND ----------
# MAGIC %md
# MAGIC # Customer 360 Data Processing
# MAGIC This notebook processes customer data from various sources to create a comprehensive Customer 360 view. The data is loaded, transformed, and written to Unity Catalog.

# COMMAND ----------
# MAGIC
# Initialize logging
import logging
from pyspark.sql import functions as F
from pyspark.sql import SparkSession

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Assume the Spark session is already available as 'spark'

# COMMAND ----------
# MAGIC
# Load data from Unity Catalog tables
try:
    policy_df = spark.read.csv("tfs://dataeconomy-9k42/62457/uploads/62457/29aabe9d-c354-4176-8f98-2dd7a5fd7216/policycsv", header=True, inferSchema=True)
    claims_df = spark.read.csv("tfs://dataeconomy-9k42/62457/uploads/62457/29aabe9d-c354-4176-8f98-2dd7a5fd7216/claimscsv", header=True, inferSchema=True)
    demographics_df = spark.read.csv("tfs://dataeconomy-9k42/62457/uploads/62457/29aabe9d-c354-4176-8f98-2dd7a5fd7216/demographicscsv", header=True, inferSchema=True)
    scores_df = spark.read.csv("tfs://dataeconomy-9k42/62457/uploads/62457/29aabe9d-c354-4176-8f98-2dd7a5fd7216/scorescsv", header=True, inferSchema=True)
    aiml_insights_df = spark.read.csv("tfs://dataeconomy-9k42/62457/uploads/62457/29aabe9d-c354-4176-8f98-2dd7a5fd7216/aiml_insightscsv", header=True, inferSchema=True)
    logger.info("Data loaded successfully from CSV files.")
except Exception as e:
    logger.error(f"Error loading data: {e}")
    raise

# COMMAND ----------
# MAGIC
# Select required fields
demographics_df = demographics_df.select(
    "Customer_ID", "Customer_Name", "Email", "Phone_Number", "Address", "City", "State", "Postal_Code", 
    "Date_of_Birth", "Gender", "Marital_Status", "Occupation", "Income_Level", "Customer_Segment"
)
claims_df = claims_df.select("Claim_ID", "Policy_ID", "Claim_Date", "Claim_Type", "Claim_Status", "Claim_Amount", "Claim_Payout")
policy_df = policy_df.select("policy_id", "customer_id", "policy_type", "policy_status", "policy_start_date", "policy_end_date", "policy_term", "policy_premium", "total_premium_paid", "renewal_status", "policy_addons")
scores_df = scores_df.select("Customer_ID", "Credit_Score", "Fraud_Score", "Customer_Risk_Score")
aiml_insights_df = aiml_insights_df.select("Customer_ID", "Churn_Probability", "Next_Best_Offer", "Claims_Fraud_Probability", "Revenue_Potential")

# COMMAND ----------
# MAGIC
# Perform joins
try:
    joined_df = demographics_df.join(policy_df, F.col('Customer_ID') == F.col('customer_id'), "inner") \
                               .join(claims_df, F.col('policy_id') == F.col('Policy_ID'), "inner")
    logger.info("Data joined successfully.")
except Exception as e:
    logger.error(f"Error during join operations: {e}")
    raise

# COMMAND ----------
# MAGIC
# Aggregate claims data
try:
    summarized_df = claims_df.groupBy("Customer_ID").agg(
        F.count("Claim_ID").alias("Total_Claims"),
        F.count("Policy_ID").alias("Policy_Count"),
        F.max("Claim_Date").alias("Recent_Claim_Date"),
        F.avg("Claim_Amount").alias("Average_Claim_Amount")
    )
    logger.info("Claims data aggregated successfully.")
except Exception as e:
    logger.error(f"Error during aggregation: {e}")
    raise

# COMMAND ----------
# MAGIC
# Join summarized data
try:
    final_df = joined_df.join(summarized_df, "Customer_ID", "inner")
    logger.info("Summarized data joined successfully.")
except Exception as e:
    logger.error(f"Error during final join: {e}")
    raise

# COMMAND ----------
# MAGIC
# Calculate custom metrics
try:
    # Define complex conditions separately for readability
    claim_to_premium_ratio = F.when(F.col("total_premium_paid") > 0, F.col("Claim_Amount") / F.col("total_premium_paid")).otherwise(0)
    claims_per_policy = F.when(F.col("Policy_Count") > 0, F.col("Total_Claims") / F.col("Policy_Count")).otherwise(0)

    final_df = final_df.withColumn("Age", F.datediff(F.current_date(), F.col("Date_of_Birth")) / 365) \
                       .withColumn("Claim_To_Premium_Ratio", claim_to_premium_ratio) \
                       .withColumn("Claims_Per_Policy", claims_per_policy) \
                       .withColumn("Retention_Rate", F.lit(0.85)) \
                       .withColumn("Cross_Sell_Opportunities", F.lit("['MultiPolicy Discount', 'Home Coverage Addon']")) \
                       .withColumn("Upsell_Potential", F.lit("Premium Vehicle Coverage"))
    logger.info("Custom metrics calculated successfully.")
except Exception as e:
    logger.error(f"Error during custom metric calculations: {e}")
    raise

# COMMAND ----------
# MAGIC
# Combine all data
try:
    customer_360_df = final_df.join(aiml_insights_df, "Customer_ID", "inner") \
                              .join(scores_df, "Customer_ID", "inner")
    logger.info("All data combined successfully.")
except Exception as e:
    logger.error(f"Error during data combination: {e}")
    raise

# COMMAND ----------
# MAGIC
# Write output to Unity Catalog
try:
    spark.sql("DROP TABLE IF EXISTS genai_demo.jnj.Customer_360")
    customer_360_df.write.format("delta").mode("overwrite").saveAsTable("genai_demo.jnj.Customer_360")
    logger.info("Data written to Unity Catalog successfully.")
except Exception as e:
    logger.error(f"Error writing data to Unity Catalog: {e}")
    raise
