In [0]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, when

spark = SparkSession.builder.appName("Week4_ETL_from_CSV").getOrCreate()

product_csv_path = "/FileStore/tables/products1.csv"
stock_csv_path = "/FileStore/tables/stocks.csv"

products_df = spark.read.csv(product_csv_path, header=True, inferSchema=True)
stock_df = spark.read.csv(stock_csv_path, header=True, inferSchema=True)

print("Product and Stock DataFrames loaded from CSV files.")
products_df.show()
stock_df.show()


Product and Stock DataFrames loaded from CSV files.
+----------+------------+-----------+-----------------+
|product_id|product_name|   category|reorder_threshold|
+----------+------------+-----------+-----------------+
|       101|      Laptop|Electronics|               20|
|       102|       Mouse|Electronics|               50|
|       103|  Desk Chair|  Furniture|               15|
|       104|    Keyboard|Electronics|               30|
+----------+------------+-----------+-----------------+

+----------+------------+-------------+
|product_id|warehouse_id|current_stock|
+----------+------------+-------------+
|       101|     W_NORTH|           15|
|       102|     W_NORTH|          100|
|       103|     W_SOUTH|           25|
|       101|     W_SOUTH|           50|
|       104|     W_NORTH|           10|
+----------+------------+-------------+



In [0]:
# view of the inventory.
inventory_df = stock_df.join(products_df, on="product_id", how="left")

print("Joined Master Inventory DataFrame:")
inventory_df.show()

Joined Master Inventory DataFrame:
+----------+------------+-------------+------------+-----------+-----------------+
|product_id|warehouse_id|current_stock|product_name|   category|reorder_threshold|
+----------+------------+-------------+------------+-----------+-----------------+
|       101|     W_SOUTH|           50|      Laptop|Electronics|               20|
|       103|     W_SOUTH|           25|  Desk Chair|  Furniture|               15|
|       102|     W_NORTH|          100|       Mouse|Electronics|               50|
|       104|     W_NORTH|           10|    Keyboard|Electronics|               30|
|       101|     W_NORTH|           15|      Laptop|Electronics|               20|
+----------+------------+-------------+------------+-----------+-----------------+



In [0]:
# Create a master inventory view with a reorder flag 
master_inventory_view = inventory_df.withColumn(
    "reorder_flag",
    when(col("current_stock") < col("reorder_threshold"), True)
    .otherwise(False)
)

print("Master Inventory View with Reorder Flag:")
master_inventory_view.show()

Master Inventory View with Reorder Flag:
+----------+------------+-------------+------------+-----------+-----------------+------------+
|product_id|warehouse_id|current_stock|product_name|   category|reorder_threshold|reorder_flag|
+----------+------------+-------------+------------+-----------+-----------------+------------+
|       101|     W_SOUTH|           50|      Laptop|Electronics|               20|       false|
|       103|     W_SOUTH|           25|  Desk Chair|  Furniture|               15|       false|
|       102|     W_NORTH|          100|       Mouse|Electronics|               50|       false|
|       104|     W_NORTH|           10|    Keyboard|Electronics|               30|        true|
|       101|     W_NORTH|           15|      Laptop|Electronics|               20|        true|
+----------+------------+-------------+------------+-----------+-----------------+------------+



In [0]:
# Save as a Delta table (recommended in Databricks)
delta_path = "/mnt/inventory/master_inventory_delta"
master_inventory_view.write.format("delta").mode("overwrite").save(delta_path)
print(f"Successfully saved final inventory to Delta table at {delta_path}")

#  Save as a CSV file
csv_path = "/mnt/inventory/master_inventory_csv"
master_inventory_view.write.format("csv").option("header", "true").mode("overwrite").save(csv_path)
print(f"Successfully saved final inventory to CSV at {csv_path}")

Successfully saved final inventory to Delta table at /mnt/inventory/master_inventory_delta
Successfully saved final inventory to CSV at /mnt/inventory/master_inventory_csv
