In [0]:
spark

<pyspark.sql.connect.session.SparkSession at 0x7fe2957e8b50>

In [0]:
from pyspark.sql.functions import *
productsdf = spark.read.csv("dbfs:/FileStore/tables/products.csv", header=True, inferSchema=True)
salesdf = spark.read.csv("dbfs:/FileStore/tables/sales.csv", header=True, inferSchema=True)
productsdf.show()
salesdf.show()

# Join product and sales data
joined = salesdf.join(productsdf, on="product_id", how="inner")

# Transform - calculate total revenue, total cost, profit margin
from pyspark.sql.functions import col, expr, round

metricsdf = joined.withColumn("revenue", col("quantity") * col("selling_price")) \
                      .withColumn("cost", col("quantity") * col("cost_price")) \
                      .withColumn("profit", col("revenue") - col("cost")) \
                      .withColumn("profit_margin", round(col("profit") / col("revenue") * 100, 2))

# Aggregate - profit margin by category
category_metrics = metricsdf.groupBy("category") \
                             .agg(
                                 expr("sum(revenue) as total_revenue"),
                                 expr("sum(cost) as total_cost"),
                                 expr("sum(profit) as total_profit"),
                                 round(expr("sum(profit) / sum(revenue) * 100"), 2).alias("profit_margin_percent")
                             )


display(category_metrics)

#Save Delta table and csv
delta_path = "dbfs:/FileStore/output/category_metrics_delta"
csv_path = "/dbfs/FileStore/output/category_metrics.csv"

(category_metrics.write.format("delta").mode("overwrite").save(delta_path))
category_metrics.write \
    .format("csv") \
    .option("header", "true") \
    .mode("overwrite") \
    .save("dbfs:/FileStore/output/category_metrics_csv")

display(dbutils.fs.ls("dbfs:/FileStore/output/"))

metricsdf.createOrReplaceTempView("product_sales_metrics")

result = spark.sql(
    """
    SELECT 
        name AS product_name, 
        SUM(quantity) AS total_quantity_sold, 
        SUM(revenue) AS total_revenue 
    FROM 
        product_sales_metrics 
    GROUP BY 
        name 
    ORDER BY 
        total_quantity_sold DESC 
    LIMIT 3
""")
result.show()


+----------+-----------------+-----------+----------+-------------+
|product_id|             name|   category|cost_price|selling_price|
+----------+-----------------+-----------+----------+-------------+
|         1|   Wireless Mouse|Electronics|        10|           15|
|         2|Bluetooth Speaker|Electronics|        20|           30|
|         3|     Office Chair|  Furniture|        50|           80|
|         4|    Standing Desk|  Furniture|       120|          180|
|         5|         Yoga Mat|     Sports|         8|           15|
|         6|    Running Shoes|     Sports|        45|           70|
|         7|      LED Monitor|Electronics|        90|          130|
|         8|     Water Bottle|     Sports|         5|           10|
|         9|     Laptop Stand|Accessories|        12|           20|
|        10|       Phone Case|Accessories|         3|            8|
|        11|        USB Cable|Electronics|         2|            5|
|        12|       Headphones|Electronics|      

category,total_revenue,total_cost,total_profit,profit_margin_percent
Accessories,136,69,67,49.26
Furniture,260,170,90,34.62
Stationery,70,36,34,48.57
Sports,315,180,135,42.86
Electronics,505,327,178,35.25


path,name,size,modificationTime
dbfs:/FileStore/output/category_metrics_csv/,category_metrics_csv/,0,1750008888000
dbfs:/FileStore/output/category_metrics_delta/,category_metrics_delta/,0,1750008887000


+------------+-------------------+-------------+
|product_name|total_quantity_sold|total_revenue|
+------------+-------------------+-------------+
|    Notebook|                 10|           30|
|Water Bottle|                 10|          100|
|     Pen Set|                  8|           40|
+------------+-------------------+-------------+

