In [None]:
# Databricks notebook source
import logging
from pyspark.sql import functions as F
from pyspark.sql import DataFrame
from pyspark.sql import SparkSession

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# COMMAND ----------

def load_data_from_unity_catalog(table_name: str) -> DataFrame:
    """Load data from a Unity Catalog table."""
    try:
        logger.info(f"Loading data from table: {table_name}")
        df = spark.table(table_name)
        return df
    except Exception as e:
        logger.error(f"Error loading data from table {table_name}: {e}")
        raise

# COMMAND ----------

def calculate_total_compensation(df: DataFrame) -> DataFrame:
    """Calculate total compensation for associates."""
    try:
        logger.info("Calculating total compensation")
        df = df.withColumn(
            "Compensation",
            df["Base_Salary"] +
            (df["Commission_Percentage"] / 100) * df["Base_Salary"] +
            df["Bonus"]
        )
        return df
    except Exception as e:
        logger.error(f"Error calculating total compensation: {e}")
        raise

# COMMAND ----------

def calculate_projected_sales_growth_rate(df: DataFrame) -> DataFrame:
    """Calculate projected sales growth rate based on target year."""
    try:
        logger.info("Calculating projected sales growth rate")
        df = df.withColumn(
            "Projected_Sales_Growth_Rate",
            F.when(df["Year"] == 2024, df["Projected_Growth_Rate"] + (df["Projected_Growth_Rate"] / 100))
            .when(df["Year"] == 2025, df["Projected_Growth_Rate"] + 2 * (df["Projected_Growth_Rate"] / 100))
            .when(df["Year"] == 2026, df["Projected_Growth_Rate"] + 3 * (df["Projected_Growth_Rate"] / 100))
            .otherwise(df["Projected_Growth_Rate"])
        )
        return df
    except Exception as e:
        logger.error(f"Error calculating projected sales growth rate: {e}")
        raise

# COMMAND ----------

def main():
    try:
        # Load data from Unity Catalog tables
        employment_details_df = load_data_from_unity_catalog("genai_demo.cardinal_health.SalesAssociates_EmploymentDetails")
        compensation_guidelines_df = load_data_from_unity_catalog("genai_demo.cardinal_health.Compensation_Guidelines")
        historical_sales_df = load_data_from_unity_catalog("genai_demo.cardinal_health.Historical_Sales")
        hospital_sales_assignments_df = load_data_from_unity_catalog("genai_demo.cardinal_health.HospitalSales_Assignments")
        hospital_stats_df = load_data_from_unity_catalog("genai_demo.cardinal_health.hospital_stats_north_america")

        # Join employment details with compensation guidelines
        logger.info("Joining employment details with compensation guidelines")
        joined_df = employment_details_df.join(compensation_guidelines_df, "Associate_ID", "inner")

        # Calculate total compensation
        compensation_df = calculate_total_compensation(joined_df)

        # Join with hospital sales assignments
        logger.info("Joining with hospital sales assignments")
        sales_assignments_df = compensation_df.join(hospital_sales_assignments_df, "Associate_ID", "inner")

        # Join with hospital stats
        logger.info("Joining with hospital stats")
        final_df = sales_assignments_df.join(hospital_stats_df, "Hospital_ID", "inner")

        # Calculate projected sales growth rate
        projected_growth_df = calculate_projected_sales_growth_rate(historical_sales_df)

        # Select relevant fields
        logger.info("Selecting relevant fields")
        selected_df = final_df.select(
            "Hospital_ID", "Director_Name", "Manager_Name", "Associate_ID", "Associate_Name", "Compensation"
        )

        # Filter data
        logger.info("Filtering data for years greater than 2023")
        filtered_df = selected_df.filter(selected_df["Year"] > 2023)

        # Write to Delta table
        logger.info("Writing final data to Delta table")
        filtered_df.write.format("delta").mode("overwrite").saveAsTable("genai_demo.cardinal_health.Target_sales")

    except Exception as e:
        logger.error(f"Error in ETL process: {e}")
        raise

# COMMAND ----------

if __name__ == "__main__":
    main()
