In [None]:
# Databricks notebook source
# MAGIC %md
# MAGIC # ETL Process for Customer 360 Data
# MAGIC This notebook performs an ETL process to integrate and transform data from various Unity Catalog tables into a comprehensive Customer 360 dataset.

# COMMAND ----------

import logging
from pyspark.sql import functions as F
from pyspark.sql.types import IntegerType, DoubleType, DateType

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# COMMAND ----------

try:
    # Load data from Unity Catalog tables
    logger.info("Loading data from Unity Catalog tables...")
    policy_df = spark.table("catalog.source_db.policy")
    claims_df = spark.table("catalog.source_db.claims")
    demographics_df = spark.table("catalog.source_db.demographics")
    scores_df = spark.table("catalog.source_db.scores")
    aiml_insights_df = spark.table("catalog.source_db.aiml_insights")

# COMMAND ----------

    # Type Conversion
    logger.info("Converting data types...")
    policy_df = policy_df.withColumn("policy_start_date", F.to_date("policy_start_date", "yyyy-MM-dd")) \
                         .withColumn("policy_end_date", F.to_date("policy_end_date", "yyyy-MM-dd")) \
                         .withColumn("policy_term", policy_df["policy_term"].cast(IntegerType())) \
                         .withColumn("policy_premium", policy_df["policy_premium"].cast(DoubleType())) \
                         .withColumn("total_premium_paid", policy_df["total_premium_paid"].cast(DoubleType()))

    claims_df = claims_df.withColumn("Claim_Date", F.to_date("Claim_Date", "yyyy-MM-dd")) \
                         .withColumn("Claim_Amount", claims_df["Claim_Amount"].cast(DoubleType())) \
                         .withColumn("Claim_Payout", claims_df["Claim_Payout"].cast(DoubleType()))

    demographics_df = demographics_df.withColumn("Date_of_Birth", F.to_date("Date_of_Birth", "yyyy-MM-dd"))

# COMMAND ----------

    # Data Integration
    logger.info("Integrating data...")
    demographics_policy_df = demographics_df.join(policy_df, demographics_df.Customer_ID == policy_df.customer_id, "inner")
    integrated_df = demographics_policy_df.join(claims_df, demographics_policy_df.policy_id == claims_df.Policy_ID, "inner")

# COMMAND ----------

    # Data Aggregation
    logger.info("Aggregating data...")
    aggregated_df = integrated_df.groupBy("Customer_ID").agg(
        F.count("Claim_ID").alias("Total_Claims"),
        F.count("policy_id").alias("Policy_Count"),
        F.max("Claim_Date").alias("Recent_Claim_Date"),
        F.avg("Claim_Amount").alias("Average_Claim_Amount")
    )

# COMMAND ----------

    # Custom Calculations
    logger.info("Performing custom calculations...")
    final_df = aggregated_df.withColumn("Age", F.datediff(F.current_date(), "Date_of_Birth") / 365) \
                            .withColumn("Claim_To_Premium_Ratio", F.when(F.col("total_premium_paid") != 0, F.col("Claim_Amount") / F.col("total_premium_paid")).otherwise(0)) \
                            .withColumn("Claims_Per_Policy", F.when(F.col("Policy_Count") != 0, F.col("Total_Claims") / F.col("Policy_Count")).otherwise(0)) \
                            .withColumn("Retention_Rate", F.lit(0.85)) \
                            .withColumn("Cross_Sell_Opportunities", F.lit("Multi-Policy Discount, Home Coverage Add-on")) \
                            .withColumn("Upsell_Potential", F.lit("Premium Vehicle Coverage"))

# COMMAND ----------

    # Join with AI/ML Insights and Scores
    logger.info("Joining with AI/ML insights and scores...")
    final_df = final_df.join(scores_df, "Customer_ID", "inner") \
                       .join(aiml_insights_df, "Customer_ID", "inner")

# COMMAND ----------

    # Write to Unity Catalog target table
    logger.info("Writing final data to Unity Catalog target table...")
    final_df.write.format("delta").mode("overwrite").saveAsTable("catalog.target_db.customer_360")

    logger.info("ETL process completed successfully.")

except Exception as e:
    logger.error(f"An error occurred during the ETL process: {e}")
    raise
