In [None]:
"# Databricks notebook source\n# MAGIC %md\n# MAGIC # Data Transformation and Loading in Databricks\n# MAGIC This notebook performs data transformation and loading using PySpark in Databricks.\n\n# COMMAND ----------\n\nimport logging\nfrom pyspark.sql import functions as F\nfrom pyspark.sql.types import DateType, DoubleType\n\n# Configure logging\nlogging.basicConfig(level=logging.INFO)\nlogger = logging.getLogger(__name__)\n\n# COMMAND ----------\n\ntry:\n    # Load data from Unity Catalog tables\n    orders_central_df = spark.table(\"catalog.db.orders_central\")\n    orders_west_df = spark.table(\"catalog.db.orders_west\")\n    orders_east_df = spark.table(\"catalog.db.orders_east\")\n    orders_south_df = spark.table(\"catalog.db.orders_south\")\n    quota_df = spark.table(\"catalog.db.quota\")\n    returns_df = spark.table(\"catalog.db.returns\")\n\n# COMMAND ----------\n\n    # Standardize date fields for Orders Central\n    orders_central_df = orders_central_df.withColumn(\n        \"Order Date\", \n        F.concat_ws(\"/\", F.col(\"Order Day\"), F.col(\"Order Month\"), F.col(\"Order Year\")).cast(DateType())\n    ).withColumn(\n        \"Ship Date\", \n        F.concat_ws(\"/\", F.col(\"Ship Day\"), F.col(\"Ship Month\"), F.col(\"Ship Year\")).cast(DateType())\n    )\n\n# COMMAND ----------\n\n    # Exclude null Order IDs\n    orders_central_df = orders_central_df.filter(F.col(\"Order ID\").isNotNull())\n\n    # Calculate Days to Ship\n    orders_central_df = orders_central_df.withColumn(\"Days to Ship\", F.datediff(F.col(\"Ship Date\"), F.col(\"Order Date\")))\n\n# COMMAND ----------\n\n    # Add Returned? column\n    returns_df = returns_df.withColumn(\"Returned?\", F.when(F.col(\"Return Reason\").isNotNull(), \"Yes\").otherwise(\"No\"))\n\n    # Add Year of Sale\n    orders_central_df = orders_central_df.withColumn(\"Year of Sale\", F.year(F.col(\"Order Date\")))\n\n# COMMAND ----------\n\n    # Rename columns for consistency\n    orders_central_df = orders_central_df.withColumnRenamed(\"Discounts\", \"Discount\").withColumnRenamed(\"Product\", \"Product Name\")\n    orders_west_df = orders_west_df.withColumnRenamed(\"Product\", \"Product Name\")\n    orders_east_df = orders_east_df.withColumnRenamed(\"Product\", \"Product Name\")\n    orders_south_df = orders_south_df.withColumnRenamed(\"Product\", \"Product Name\")\n\n# COMMAND ----------\n\n    # Clean Sales column\n    orders_central_df = orders_central_df.withColumn(\"Sales\", F.regexp_replace(F.col(\"Sales\"), \"[^0-9.]\", \"\").cast(DoubleType()))\n\n# COMMAND ----------\n\n    # Union all orders\n    all_orders_df = orders_central_df.unionByName(orders_west_df).unionByName(orders_east_df).unionByName(orders_south_df)\n\n# COMMAND ----------\n\n    # Join with returns\n    returns_df = returns_df.drop(\"Order Date\", \"Sub-Category\")\n    all_orders_with_returns_df = all_orders_df.join(returns_df, [\"Order ID\", \"Product ID\"], \"left\")\n\n# COMMAND ----------\n\n    # Aggregate data for the Annual Regional Performance Report\n    annual_performance_df = all_orders_with_returns_df.groupBy(\"Region\", \"Year of Sale\").agg(\n        F.sum(\"Profit\").alias(\"Total Profit\"),\n        F.sum(\"Sales\").alias(\"Total Sales\"),\n        F.sum(\"Quantity\").alias(\"Total Quantity\"),\n        F.avg(\"Discount\").alias(\"Average Discount\")\n    )\n\n# COMMAND ----------\n\n    # Drop existing tables if they exist\n    spark.sql(\"DROP TABLE IF EXISTS catalog.db.superstore_sales\")\n    spark.sql(\"DROP TABLE IF EXISTS catalog.db.annual_regional_performance\")\n\n# COMMAND ----------\n\n    # Save the transformed data to Unity Catalog tables\n    all_orders_with_returns_df.write.format(\"delta\").mode(\"overwrite\").saveAsTable(\"catalog.db.superstore_sales\")\n    annual_performance_df.write.format(\"delta\").mode(\"overwrite\").saveAsTable(\"catalog.db.annual_regional_performance\")\n\n    logger.info(\"Data transformation and loading completed successfully.\")\n\n# COMMAND ----------\n\nexcept Exception as e:\n    logger.error(f\"An error occurred: {e}\")\n    raise\n"
