In [None]:
"# Databricks notebook source\n# MAGIC %md\n# MAGIC # ETL Process for Customer 360 View\n# MAGIC This notebook performs an ETL process to create a comprehensive customer 360 view by integrating data from various sources.\n\n# COMMAND ----------\n\nimport logging\nfrom pyspark.sql import functions as F\nfrom pyspark.sql.types import DoubleType, DateType\n\n# Configure logging\nlogging.basicConfig(level=logging.INFO)\nlogger = logging.getLogger(__name__)\n\n# COMMAND ----------\n\ntry:\n    # Load data from Unity Catalog tables\n    logger.info(\"Loading data from Unity Catalog tables...\")\n    policy_df = spark.table(\"catalog.source_db.policy_data\")\n    claims_df = spark.table(\"catalog.source_db.claims_data\")\n    demographics_df = spark.table(\"catalog.source_db.demographics_data\")\n    scores_df = spark.table(\"catalog.source_db.scores_data\")\n    aiml_insights_df = spark.table(\"catalog.source_db.aiml_insights_data\")\n\n# COMMAND ----------\n\n    # Data Selection and Filtering\n    logger.info(\"Selecting necessary fields from each data source...\")\n    policy_df = policy_df.select(\"policy_id\", \"customer_id\", \"policy_type\", \"policy_status\", \n                                 \"policy_start_date\", \"policy_end_date\", \"policy_term\", \n                                 \"policy_premium\", \"total_premium_paid\", \"renewal_status\", \"policy_addons\")\n    \n    claims_df = claims_df.select(\"Claim_ID\", \"Policy_ID\", \"Claim_Date\", \"Claim_Type\", \n                                 \"Claim_Status\", \"Claim_Amount\", \"Claim_Payout\")\n    \n    demographics_df = demographics_df.select(\"Customer_ID\", \"Customer_Name\", \"Email\", \"Phone_Number\", \n                                             \"Address\", \"City\", \"State\", \"Postal_Code\", \n                                             \"Date_of_Birth\", \"Gender\", \"Marital_Status\", \n                                             \"Occupation\", \"Income_Level\", \"Customer_Segment\")\n    \n    scores_df = scores_df.select(\"Customer_ID\", \"Credit_Score\", \"Fraud_Score\", \"Customer_Risk_Score\")\n    \n    aiml_insights_df = aiml_insights_df.select(\"Customer_ID\", \"Churn_Probability\", \"Next_Best_Offer\", \n                                               \"Claims_Fraud_Probability\", \"Revenue_Potential\")\n\n# COMMAND ----------\n\n    # Data Integration\n    logger.info(\"Joining datasets on common identifiers...\")\n    joined_df = demographics_df.join(policy_df, demographics_df.Customer_ID == policy_df.customer_id, \"inner\") \\\n                               .join(claims_df, policy_df.policy_id == claims_df.Policy_ID, \"inner\")\n\n# COMMAND ----------\n\n    # Data Aggregation\n    logger.info(\"Aggregating claims data...\")\n    aggregated_df = joined_df.groupBy(\"Customer_ID\").agg(\n        F.count(\"Claim_ID\").alias(\"Total_Claims\"),\n        F.countDistinct(\"policy_id\").alias(\"Policy_Count\"),\n        F.max(\"Claim_Date\").alias(\"Recent_Claim_Date\"),\n        F.avg(\"Claim_Amount\").alias(\"Average_Claim_Amount\")\n    )\n\n# COMMAND ----------\n\n    # Custom Calculations\n    logger.info(\"Performing custom calculations...\")\n    final_df = aggregated_df.withColumn(\"Age\", F.datediff(F.current_date(), F.to_date(\"Date_of_Birth\", \"yyyy-MM-dd\")) / 365) \\\n                            .withColumn(\"Claim_To_Premium_Ratio\", \n                                        F.when(F.col(\"total_premium_paid\") != 0, \n                                               F.col(\"Average_Claim_Amount\") / F.col(\"total_premium_paid\")).otherwise(0)) \\\n                            .withColumn(\"Claims_Per_Policy\", \n                                        F.when(F.col(\"Policy_Count\") != 0, \n                                               F.col(\"Total_Claims\") / F.col(\"Policy_Count\")).otherwise(0)) \\\n                            .withColumn(\"Retention_Rate\", F.lit(0.85)) \\\n                            .withColumn(\"Cross_Sell_Opportunities\", F.lit(\"Multi-Policy Discount, Home Coverage Add-on\")) \\\n                            .withColumn(\"Upsell_Potential\", F.lit(\"Premium Vehicle Coverage\"))\n\n# COMMAND ----------\n\n    # Join with additional insights\n    logger.info(\"Joining with additional insights...\")\n    final_df = final_df.join(scores_df, \"Customer_ID\", \"inner\") \\\n                       .join(aiml_insights_df, \"Customer_ID\", \"inner\")\n\n# COMMAND ----------\n\n    # Output Configuration\n    logger.info(\"Writing the final DataFrame to Unity Catalog table...\")\n    final_df.write.format(\"delta\").mode(\"overwrite\").saveAsTable(\"catalog.target_db.customer_360_view\")\n\n    logger.info(\"ETL process completed successfully.\")\n\nexcept Exception as e:\n    logger.error(\"An error occurred during the ETL process\", exc_info=True)\n"
