In [None]:
# Databricks notebook source
# MAGIC %md
# MAGIC # ETL Process with PySpark
# MAGIC This notebook demonstrates an ETL process using PySpark, including data loading, transformation, and writing to a Unity Catalog target table.

# COMMAND ----------

import logging
from pyspark.sql import functions as F
from pyspark.sql.utils import AnalysisException

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# COMMAND ----------

# MAGIC %md
# MAGIC ## Function Definitions
# MAGIC Define functions for column validation, data loading, transformation, custom calculations, and data writing.

# COMMAND ----------

def validate_columns(df, required_columns):
    """Check if required columns exist in the DataFrame."""
    missing_columns = [col for col in required_columns if col not in df.columns]
    if missing_columns:
        raise ValueError(f"Missing required columns: {missing_columns}")

# COMMAND ----------

def load_data():
    """Load data from Unity Catalog source table."""
    logger.info("Loading data from Unity Catalog source table.")
    return spark.table("catalog.source_db.source_table")

# COMMAND ----------

def transform_data(df):
    """Apply transformations to the source data."""
    logger.info("Applying transformations to the source data.")
    validate_columns(df, ['column_name', 'existing_column'])
    return df.filter(F.col('column_name') > 0) \
             .withColumn('new_column', F.col('existing_column') * 2)

# COMMAND ----------

def apply_custom_calculations(df):
    """Apply custom calculations."""
    logger.info("Applying custom calculations.")
    validate_columns(df, ['value', 'total'])
    return df.withColumn('custom_metric', F.col('value') / F.col('total'))

# COMMAND ----------

def write_data(df):
    """Write transformed data to Unity Catalog target table."""
    logger.info("Writing transformed data to Unity Catalog target table.")
    df.write.format("delta").mode("overwrite").saveAsTable("catalog.target_db.target_table")

# COMMAND ----------

# MAGIC %md
# MAGIC ## Main ETL Process
# MAGIC Execute the ETL process by loading data, transforming it, applying custom calculations, and writing the results.

# COMMAND ----------

def main():
    try:
        # Load data
        source_df = load_data()

        # Transform data
        transformed_df = transform_data(source_df)

        # Apply custom calculations
        calculated_df = apply_custom_calculations(transformed_df)

        # Write data
        write_data(calculated_df)

        logger.info("ETL process completed successfully.")

    except AnalysisException as ae:
        logger.error("Analysis error during the ETL process: %s", ae, exc_info=True)
    except ValueError as ve:
        logger.error("Validation error: %s", ve, exc_info=True)
    except Exception as e:
        logger.error("An unexpected error occurred during the ETL process.", exc_info=True)

if __name__ == "__main__":
    main()
