In [None]:
# COMMAND ----------
import logging
from pyspark.sql import functions as F
from pyspark.sql.types import DoubleType, IntegerType
from datetime import datetime

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# COMMAND ----------
# Helper function to calculate age
def calculate_age(birth_date):
    today = datetime.today()
    return today.year - birth_date.year - ((today.month, today.day) < (birth_date.month, birth_date.day))

# Register UDFs
calculate_age_udf = F.udf(calculate_age, IntegerType())

def claim_to_premium_ratio(claim_amount, total_premium_paid):
    return claim_amount / total_premium_paid if total_premium_paid != 0 else 0

claim_to_premium_ratio_udf = F.udf(claim_to_premium_ratio, DoubleType())

def claims_per_policy(total_claims, policy_count):
    return total_claims / policy_count if policy_count != 0 else 0

claims_per_policy_udf = F.udf(claims_per_policy, DoubleType())

# COMMAND ----------
# Load data from Unity Catalog tables
try:
    policy_df = spark.table("catalog.source_db.policy_data")
    claims_df = spark.table("catalog.source_db.claims_data")
    demographics_df = spark.table("catalog.source_db.demographics_data")
    scores_df = spark.table("catalog.source_db.scores_data")
    aiml_insights_df = spark.table("catalog.source_db.aiml_insights_data")
    logger.info("Data loaded successfully from Unity Catalog tables.")
except Exception as e:
    logger.error(f"Error loading data from Unity Catalog tables: {e}")
    raise

# COMMAND ----------
# Data Selection and Filtering
try:
    demographics_df = demographics_df.select(
        "Customer_ID", "Customer_Name", "Email", "Phone_Number", "Address", "City", "State", "Postal_Code",
        "Date_of_Birth", "Gender", "Marital_Status", "Occupation", "Income_Level", "Customer_Segment"
    )
    logger.info("Demographics data selected successfully.")
except Exception as e:
    logger.error(f"Error selecting demographics data: {e}")
    raise

# COMMAND ----------
# Data Integration
try:
    joined_df = demographics_df.join(policy_df, demographics_df.Customer_ID == policy_df.customer_id, "inner")
    logger.info("Data joined successfully between demographics and policy data.")
except Exception as e:
    logger.error(f"Error joining demographics and policy data: {e}")
    raise

# COMMAND ----------
# Further join with claims data
try:
    joined_df = joined_df.join(claims_df, joined_df.policy_id == claims_df.Policy_ID, "inner")
    logger.info("Data joined successfully with claims data.")
except Exception as e:
    logger.error(f"Error joining with claims data: {e}")
    raise

# COMMAND ----------
# Data Aggregation
try:
    aggregated_df = joined_df.groupBy("Customer_ID").agg(
        F.count("Claim_ID").alias("Total_Claims"),
        F.countDistinct("policy_id").alias("Policy_Count"),
        F.max("Claim_Date").alias("Recent_Claim_Date"),
        F.avg("Claim_Amount").alias("Average_Claim_Amount")
    )
    logger.info("Data aggregated successfully.")
except Exception as e:
    logger.error(f"Error aggregating data: {e}")
    raise

# COMMAND ----------
# Custom Calculations
try:
    final_df = aggregated_df.withColumn("Age", calculate_age_udf(F.col("Date_of_Birth"))) \
                            .withColumn("Claim_To_Premium_Ratio", claim_to_premium_ratio_udf(F.col("Claim_Amount"), F.col("total_premium_paid"))) \
                            .withColumn("Claims_Per_Policy", claims_per_policy_udf(F.col("Total_Claims"), F.col("Policy_Count"))) \
                            .withColumn("Retention_Rate", F.lit(0.85)) \
                            .withColumn("Cross_Sell_Opportunities", F.lit("Multi-Policy Discount, Home Coverage Add-on")) \
                            .withColumn("Upsell_Potential", F.lit("Premium Vehicle Coverage"))
    logger.info("Custom calculations applied successfully.")
except Exception as e:
    logger.error(f"Error applying custom calculations: {e}")
    raise

# COMMAND ----------
# Comprehensive Data Assembly
try:
    comprehensive_df = final_df.join(scores_df, "Customer_ID", "inner").join(aiml_insights_df, "Customer_ID", "inner")
    logger.info("Comprehensive data assembly completed successfully.")
except Exception as e:
    logger.error(f"Error in comprehensive data assembly: {e}")
    raise

# COMMAND ----------
# Output Data
try:
    comprehensive_df.write.format("delta").mode("overwrite").saveAsTable("catalog.target_db.customer_360")
    logger.info("Data written successfully to Unity Catalog target table.")
except Exception as e:
    logger.error(f"Error writing data to Unity Catalog target table: {e}")
    raise
