In [None]:
# Databricks notebook source
# MAGIC %md
# MAGIC # ETL Process for Global Indicators
# MAGIC This notebook performs an ETL process on data from Unity Catalog tables, applying various transformations and writing the results to a Delta table.

# COMMAND ----------

import logging
from pyspark.sql import functions as F

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# COMMAND ----------

try:
    # Load data from Unity Catalog tables
    logger.info("Loading data from Unity Catalog tables.")
    global_indicators_df = spark.table("catalog.source_db.global_world_indicators_2000")
    consumer_price_indices_df = spark.table("catalog.source_db.ConsumerPriceIndices")

# COMMAND ----------

    # Transformation: Remove Columns
    logger.info("Removing unnecessary columns.")
    columns_to_drop = ["UnnecessaryColumn1", "UnnecessaryColumn2"]  # Replace with actual column names
    global_indicators_df = global_indicators_df.drop(*columns_to_drop)

# COMMAND ----------

    # Transformation: Change Column Type
    logger.info("Changing column types.")
    if "GDP" in global_indicators_df.columns:
        global_indicators_df = global_indicators_df.withColumn("GDP", F.col("GDP").cast("double"))

# COMMAND ----------

    # Transformation: Remap
    logger.info("Remapping column values.")
    global_indicators_df = global_indicators_df.withColumn(
        "Ease of Business",
        F.when(F.col("Ease of Business") == "OldValue", "NewValue").otherwise(F.col("Ease of Business"))
    )

# COMMAND ----------

    # Transformation: Add Column
    logger.info("Adding new column.")
    if "ExistingColumn" in global_indicators_df.columns:
        global_indicators_df = global_indicators_df.withColumn("NewColumn", F.col("ExistingColumn") * 2)

# COMMAND ----------

    # Transformation: Rename Column
    logger.info("Renaming columns.")
    if "OldColumnName" in global_indicators_df.columns:
        global_indicators_df = global_indicators_df.withColumnRenamed("OldColumnName", "NewColumnName")

# COMMAND ----------

    # Transformation: Filter Operation
    logger.info("Applying filter operation.")
    if "Year" in global_indicators_df.columns:
        global_indicators_df = global_indicators_df.filter(F.col("Year").cast("int") > 2000)

# COMMAND ----------

    # Transformation: Range Filter
    logger.info("Applying range filter.")
    global_indicators_df = global_indicators_df.filter((F.col("GDP") > 1000) & (F.col("GDP") < 5000))

# COMMAND ----------

    # Transformation: Aggregate
    logger.info("Performing aggregation.")
    aggregated_df = global_indicators_df.groupBy("Country/Region").agg(F.sum("GDP").alias("Total_GDP"))

# COMMAND ----------

    # Transformation: Unpivot
    # Implement unpivot logic here
    logger.info("Unpivoting data.")
    # Example unpivot logic (to be replaced with actual logic)
    # unpivoted_df = custom_unpivot_function(aggregated_df)

# COMMAND ----------

    # Join Condition
    logger.info("Joining dataframes.")
    # Check for duplicate columns in join
    common_columns = set(global_indicators_df.columns).intersection(set(consumer_price_indices_df.columns))
    common_columns.discard("Country/Region")  # Keep the join key
    consumer_price_indices_df = consumer_price_indices_df.drop(*common_columns)

    final_df = global_indicators_df.join(
        consumer_price_indices_df,
        global_indicators_df["Country/Region"] == consumer_price_indices_df["Country/Region"],
        "inner"
    )

# COMMAND ----------

    # Custom Calculation: Extract Year
    logger.info("Performing custom calculation to extract year.")
    final_df = final_df.withColumn("Year", F.substring(F.col("Year").cast("string"), 1, 4))

# COMMAND ----------

    # Write to Delta table
    logger.info("Writing final dataframe to Delta table.")
    final_df.write.format("delta").mode("overwrite").saveAsTable("catalog.target_db.Global_Indicators")

    logger.info("ETL process completed successfully.")

# COMMAND ----------

except Exception as e:
    logger.error(f"An error occurred during the ETL process: {e}")
    raise
