# 📊 Food Delivery - Analysis Notebook

In [None]:

# Load CSV into Spark DataFrame
df = spark.read.csv("/FileStore/tables/24MBMA47_order.csv", header=True, inferSchema=True)
df.show(5)
df.printSchema()
df.createOrReplaceTempView("orders")


## 🔍 Analysis Queries

In [None]:

# Total Order Value per Customer
spark.sql("""
SELECT customer_id, SUM(order_amount) AS total_spent
FROM orders
GROUP BY customer_id
ORDER BY total_spent DESC
""" ).show()


In [None]:

# Average Delivery Time per Partner
from pyspark.sql.functions import unix_timestamp, avg
df2 = df.withColumn("delivery_duration",
                    unix_timestamp("delivery_time") - unix_timestamp("order_time"))
df2.groupBy("delivery_partner_id").agg(avg("delivery_duration").alias("avg_delivery_time")).show()


In [None]:

# Customers with Frequent Cancellations
spark.sql("""
SELECT customer_id, COUNT(*) AS cancellations
FROM orders
WHERE order_status = 'Cancelled'
GROUP BY customer_id
HAVING COUNT(*) > 1
""" ).show()


In [None]:

# Peak Order Hours
spark.sql("""
SELECT HOUR(order_time) AS order_hour, COUNT(*) AS total_orders
FROM orders
GROUP BY HOUR(order_time)
ORDER BY total_orders DESC
""" ).show()
