In [5]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, when, sum as spark_sum

spark = SparkSession.builder.appName("WarehouseStockAnalysis").getOrCreate()

# 2. Load Stock Movement Data
df = spark.read.csv("stock_movements.csv", header=True, inferSchema=True)


In [7]:
warehouse_stock = (
    df.groupBy("warehouse_id", "product_id")
      .agg(spark_sum("quantity").alias("total_quantity"))
)

In [8]:
# Identify understocked
understocked = warehouse_stock.filter(col("total_quantity") <= 10)

# Identify overstocked
overstocked = warehouse_stock.filter(col("total_quantity") >= 100)

In [9]:
# Save results to CSV
warehouse_stock.write.mode("overwrite").csv("output/warehouse_stock_summary")
understocked.write.mode("overwrite").csv("output/understocked_items")
overstocked.write.mode("overwrite").csv("output/overstocked_items")

In [10]:
# Show results
print("=== Warehouse Stock Summary ===")
warehouse_stock.show(10)

print("=== Understocked Items ===")
understocked.show(10)

print("=== Overstocked Items ===")
overstocked.show(10)

=== Warehouse Stock Summary ===
+------------+----------+--------------+
|warehouse_id|product_id|total_quantity|
+------------+----------+--------------+
|          W2|       104|             5|
|          W2|       101|           120|
|          W1|       105|           300|
|          W1|       102|             8|
|          W1|       104|            50|
|          W1|       101|             5|
|          W3|       105|             9|
|          W3|       102|            15|
|          W3|       103|             2|
|          W2|       103|           200|
+------------+----------+--------------+

=== Understocked Items ===
+------------+----------+--------------+
|warehouse_id|product_id|total_quantity|
+------------+----------+--------------+
|          W2|       104|             5|
|          W1|       102|             8|
|          W1|       101|             5|
|          W3|       105|             9|
|          W3|       103|             2|
+------------+----------+-------------