### 3. Create an enriched table which has
order information 
- Profit rounded to 2 decimal places
- Customer name and country
- Product category and sub category


In [0]:
customer_df = spark.read.table("pei_adb_proj.silver.customer_stan_silver")
product_df = spark.read.table("pei_adb_proj.silver.product_stan_silver")
order_df = spark.read.table("pei_adb_proj.silver.order_stan_silver")

In [0]:
from pyspark.sql.functions import round

order_details_enrich = customer_df.join(order_df, customer_df.Customer_ID == order_df.Customer_ID, "left")\
                                 .join(product_df, order_df.Product_ID == product_df.Product_ID, "left")\
                                 .select(customer_df.Customer_ID.alias("Customer_ID"), 
                                         "Country", 
                                         "Category", 
                                         "Sub_Category", 
                                         round("Profit", 2).alias("Profit"),
                                         "Order_Date",
                                         "Order_ID",
                                         "Customer_Name")

display(order_details_enrich)

In [0]:
assert order_details_enrich.select("Order_ID").distinct().count() == order_df.select("Order_ID").distinct().count(), \
    "Test Case 1 Failed: Missing orders after join."
print("OK Test Case 1 Passed: All orders are preserved after join.")

In [0]:
# Check if customer details exist for each order 
from pyspark.sql.functions import col
missing_customers = order_details_enrich.filter(col("Customer_ID").isNull() & col("Order_ID").isNotNull())
assert missing_customers.count() == 0, "Test Case 2 Failed: Some orders are not linked to customers."
print("OK Test Case 2 Passed: All orders are linked to customers.")


In [0]:
spark.sql("DROP TABLE IF EXISTS pei_adb_proj.silver.enriched_silver_tb")    

In [0]:
order_details_enrich.write.format("delta").mode("overwrite").saveAsTable("pei_adb_proj.silver.enriched_silver_tb")