In [None]:
# Databricks notebook source
# Import necessary libraries
import logging
from pyspark.sql import functions as F
from pyspark.sql import DataFrame

# COMMAND ----------

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# COMMAND ----------

# Function to load data from Unity Catalog tables
def load_data(table_name: str) -> DataFrame:
    try:
        logger.info(f"Loading data from table: {table_name}")
        df = spark.table(table_name)
        return df
    except Exception as e:
        logger.error(f"Error loading data from table {table_name}: {e}")
        raise

# COMMAND ----------

# Load data from Unity Catalog tables
associates_df = load_data("genai_demo.cardinal_health.associates_employment")
compensation_df = load_data("genai_demo.cardinal_health.compensation_guidelines")
hospital_assignments_df = load_data("genai_demo.cardinal_health.hospital_assignments")
hospitals_df = load_data("genai_demo.cardinal_health.hospitals_stats")
historical_sales_df = load_data("genai_demo.cardinal_health.historical_sales_trending")
growth_opportunities_df = load_data("genai_demo.cardinal_health.growth_opportunities")
logistics_channels_df = load_data("genai_demo.cardinal_health.logistics_channels")
third_party_trends_df = load_data("genai_demo.cardinal_health.third_party_trends")
company_goals_df = load_data("genai_demo.cardinal_health.company_goals_1")

# COMMAND ----------

# Data Integration: Join associates with compensation
try:
    logger.info("Joining associates with compensation data")
    associates_compensation_df = associates_df.join(compensation_df, "Associate_ID")
except Exception as e:
    logger.error(f"Error joining associates with compensation: {e}")
    raise

# COMMAND ----------

# Data Integration: Join hospital data with assignments
try:
    logger.info("Joining hospital data with assignments")
    hospital_assignments_df = hospital_assignments_df.join(hospitals_df, ["Hospital_ID", "Hospital_Name"])
except Exception as e:
    logger.error(f"Error joining hospital data with assignments: {e}")
    raise

# COMMAND ----------

# Custom Calculations: Calculate total compensation
try:
    logger.info("Calculating total compensation for associates")
    associates_compensation_df = associates_compensation_df.withColumn(
        "Total_Compensation",
        F.col("Base_Salary") + (F.col("Commission_Percentage") * F.col("Base_Salary")) + F.col("Bonus")
    )
except Exception as e:
    logger.error(f"Error calculating total compensation: {e}")
    raise

# COMMAND ----------

# Custom Calculations: Calculate projected revenue
try:
    logger.info("Calculating projected revenue")
    historical_sales_df = historical_sales_df.withColumn(
        "Projected_Revenue",
        F.when(F.col("Target_Year") == 2024, F.col("Sales_Revenue") * (F.col("Projected_Sales_Growth_Rate") / 100))
        .when(F.col("Target_Year").isin([2025, 2026]), F.col("Sales_Revenue") * (1 + F.col("Projected_Sales_Growth_Rate") / 100))
        .otherwise(F.col("Sales_Revenue"))
    )
except Exception as e:
    logger.error(f"Error calculating projected revenue: {e}")
    raise

# COMMAND ----------

# Data Filtering and Sorting
try:
    logger.info("Filtering and sorting data")
    filtered_df = historical_sales_df.filter(F.col("Target_Year") > 2023)
    sorted_df = filtered_df.orderBy("Target_Year")
except Exception as e:
    logger.error(f"Error filtering and sorting data: {e}")
    raise

# COMMAND ----------

# Data Validation and Uniqueness
try:
    logger.info("Ensuring data uniqueness")
    unique_df = sorted_df.dropDuplicates(["Channel_ID", "Channel_Type", "Hospital_ID"])
except Exception as e:
    logger.error(f"Error ensuring data uniqueness: {e}")
    raise

# COMMAND ----------

# Output Data Storage
try:
    logger.info("Writing processed data to Unity Catalog table")
    unique_df.write.format("delta").mode("overwrite").saveAsTable("genai_demo.cardinal_health.hospitals_output")
except Exception as e:
    logger.error(f"Error writing data to Unity Catalog table: {e}")
    raise

# COMMAND ----------

logger.info("ETL process completed successfully")
