In [None]:
# 📓 Microsoft Fabric Spark Notebook: SaaS KPI Aggregations

from pyspark.sql.functions import *
from pyspark.sql.types import *

# Load subscriptions and payments
subscriptions = spark.read.table("subscriptions")
payments = spark.read.table("payments")

# --------------------------------------
# 1. MRR and ARR by Month
# --------------------------------------
mrr = subscriptions \
    .withColumn("Month", date_format("start_date", "yyyy-MM")) \
    .groupBy("Month") \
    .agg(round(sum("monthly_price"), 2).alias("Total_MRR"))

arr = subscriptions \
    .filter(col("subscription_type") == "initial") \
    .withColumn("Year", year("start_date")) \
    .groupBy("Year") \
    .agg(round(sum(col("monthly_price") * 12), 2).alias("Total_ARR"))

# --------------------------------------
# 2. Churn Analysis
# --------------------------------------
churn = subscriptions \
    .filter(col("end_date").isNotNull()) \
    .withColumn("Churn_Month", date_format("end_date", "yyyy-MM")) \
    .groupBy("Churn_Month") \
    .agg(countDistinct("customer_id").alias("Churned_Customers"))

# --------------------------------------
# 3. Expansion Revenue (Upgrades + Seat Expansions)
# --------------------------------------
expansion = subscriptions \
    .filter(col("subscription_type").isin("plan_upgrade", "seat_expansion")) \
    .withColumn("Month", date_format("start_date", "yyyy-MM")) \
    .groupBy("Month", "subscription_type") \
    .agg(round(sum("monthly_price"), 2).alias("Expansion_MRR"))

# --------------------------------------
# 4. Write to Lakehouse Tables
# --------------------------------------
mrr.write.mode("overwrite").saveAsTable("agg_mrr")
arr.write.mode("overwrite").saveAsTable("agg_arr")
churn.write.mode("overwrite").saveAsTable("agg_churn")
expansion.write.mode("overwrite").saveAsTable("agg_expansion")

print("✅ Aggregated SaaS metrics saved to Lakehouse.")
