In [None]:
# Databricks notebook source
# MAGIC %md
# MAGIC # ETL Process for Superstore Sales Data
# MAGIC This notebook performs an ETL process on superstore sales data using PySpark in Databricks.

# COMMAND ----------

import logging
from pyspark.sql import functions as F
from pyspark.sql.types import DateType

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# COMMAND ----------

# MAGIC %md
# MAGIC ## Load Data
# MAGIC Load data from Unity Catalog tables.

# COMMAND ----------

def load_data():
    try:
        logger.info("Loading data from Unity Catalog tables...")
        orders_central_df = spark.table("catalog.db.orders_central")
        orders_west_df = spark.table("catalog.db.orders_west")
        orders_east_df = spark.table("catalog.db.orders_east")
        orders_south_df = spark.table("catalog.db.orders_south")
        quota_df = spark.table("catalog.db.quota")
        returns_df = spark.table("catalog.db.returns")
        logger.info("Data loaded successfully.")
        return orders_central_df, orders_west_df, orders_east_df, orders_south_df, quota_df, returns_df
    except Exception as e:
        logger.error(f"Error loading data: {e}")
        raise

# COMMAND ----------

# MAGIC %md
# MAGIC ## Transform Data
# MAGIC Perform data transformations on the loaded data.

# COMMAND ----------

def transform_data(orders_central_df, orders_west_df, orders_east_df, orders_south_df, quota_df, returns_df):
    try:
        logger.info("Starting data transformation...")

        # Standardize date fields for orders_central_df
        orders_central_df = orders_central_df.withColumn(
            "Order Date", 
            F.concat_ws("/", F.col("Order Day"), F.col("Order Month"), F.col("Order Year")).cast(DateType())
        ).withColumn(
            "Ship Date", 
            F.concat_ws("/", F.col("Ship Day"), F.col("Ship Month"), F.col("Ship Year")).cast(DateType())
        )

        # Exclude null Order IDs
        orders_central_df = orders_central_df.filter(F.col("Order ID").isNotNull())

        # Add calculated fields
        orders_central_df = orders_central_df.withColumn(
            "Days to Ship", 
            F.datediff(F.col("Ship Date"), F.col("Order Date"))
        ).withColumn(
            "Returned?", 
            F.when(F.col("Return Reason").isNotNull(), "Yes").otherwise("No")
        )

        # Standardize state names
        state_mapping = {"California": "CA", "New York": "NY"}  # Example mapping
        orders_central_df = orders_central_df.replace(state_mapping, subset=["State"])

        # Union all orders
        all_orders_df = orders_central_df.unionByName(orders_west_df).unionByName(orders_east_df).unionByName(orders_south_df)

        # Unpivot quota data
        quota_unpivot_df = quota_df.selectExpr(
            "Region", 
            "stack(4, '2015', `2015`, '2016', `2016`, '2017', `2017`, '2018', `2018`) as (Year, Quota)"
        )

        logger.info("Data transformation completed successfully.")
        return all_orders_df, quota_unpivot_df
    except Exception as e:
        logger.error(f"Error during data transformation: {e}")
        raise

# COMMAND ----------

# MAGIC %md
# MAGIC ## Write Data
# MAGIC Write the transformed data back to Unity Catalog tables.

# COMMAND ----------

def write_data(all_orders_df, quota_unpivot_df):
    try:
        logger.info("Writing data to Unity Catalog tables...")
        all_orders_df.write.format("delta").mode("overwrite").saveAsTable("catalog.db.superstore_sales")
        quota_unpivot_df.write.format("delta").mode("overwrite").saveAsTable("catalog.db.sales_quota")
        logger.info("Data written successfully.")
    except Exception as e:
        logger.error(f"Error writing data: {e}")
        raise

# COMMAND ----------

# MAGIC %md
# MAGIC ## Main ETL Process
# MAGIC Execute the main ETL process.

# COMMAND ----------

def main():
    try:
        # Load data
        orders_central_df, orders_west_df, orders_east_df, orders_south_df, quota_df, returns_df = load_data()

        # Transform data
        all_orders_df, quota_unpivot_df = transform_data(
            orders_central_df, orders_west_df, orders_east_df, orders_south_df, quota_df, returns_df
        )

        # Write data
        write_data(all_orders_df, quota_unpivot_df)

    except Exception as e:
        logger.error(f"ETL process failed: {e}")

# Execute the main process
if __name__ == "__main__":
    main()
