In [None]:
# 📓 Microsoft Fabric Spark Notebook: SaaS Unit Economics
# Analyze customer-level profitability and efficiency metrics

from pyspark.sql.functions import *
from pyspark.sql.types import *

# Load tables
subscriptions = spark.read.table("subscriptions")
customers = spark.read.table("customers")
payments = spark.read.table("payments")
support = spark.read.table("support")
usage = spark.read.table("usage_events")

# --------------------------------------
# 1. CAC: Customer Acquisition Cost
# --------------------------------------
cac = customers \
    .groupBy("customer_id") \
    .agg(
        round(first("acquisition_cost"), 2).alias("CAC"),
        first("industry").alias("industry"),
        first("company_size").alias("company_size")
    )

# --------------------------------------
# 2. LTV and Payback: Lifetime Value and CAC Payback Months
# --------------------------------------
ltv = subscriptions \
    .withColumn("Months_Active", when(col("end_date").isNotNull(),
                                       months_between(col("end_date"), col("start_date"))).otherwise(
                                       months_between(current_date(), col("start_date")))) \
    .withColumn("Estimated_LTV", round(col("monthly_price") * col("seats") * col("Months_Active"), 2)) \
    .withColumn("Monthly_Revenue", col("monthly_price") * col("seats")) \
    .groupBy("customer_id") \
    .agg(
        round(sum("Estimated_LTV"), 2).alias("LTV"),
        round(avg("Monthly_Revenue"), 2).alias("Monthly_Revenue"),
        first("plan_name").alias("plan_name"),
        first("billing_frequency").alias("billing_frequency")
    )

# --------------------------------------
# 3. Gross Revenue per Customer
# --------------------------------------
gross_revenue = payments \
    .groupBy("customer_id") \
    .agg(round(sum("amount"), 2).alias("Total_Revenue"))

# --------------------------------------
# 4. Support Sentiment Scoring
# --------------------------------------
sentiment_score = support \
    .withColumn("sentiment_value", when(col("sentiment") == "Positive", 1).when(col("sentiment") == "Neutral", 0).otherwise(-1)) \
    .groupBy("customer_id") \
    .agg(round(avg("sentiment_value"), 2).alias("Avg_Sentiment_Score"))

# --------------------------------------
# 5. Product Usage Scoring
# --------------------------------------
usage_score = usage \
    .withColumn("Month", date_format("event_date", "yyyy-MM")) \
    .groupBy("customer_id") \
    .agg(countDistinct("event_date").alias("Active_Days"), count("event_id").alias("Usage_Events")) \
    .withColumn("Usage_Score", round(col("Usage_Events") / col("Active_Days"), 2))

# --------------------------------------
# 6. Churn Flag
# --------------------------------------
churn_flag = subscriptions \
    .groupBy("customer_id") \
    .agg(max("end_date").alias("latest_end")) \
    .withColumn("Churned", when(col("latest_end").isNotNull() & (col("latest_end") < current_date()), 1).otherwise(0))

# --------------------------------------
# 7. Merge all unit economics per customer
# --------------------------------------
unit_economics = cac \
    .join(ltv, "customer_id", "inner") \
    .join(gross_revenue, "customer_id", "inner") \
    .join(sentiment_score, "customer_id", "left") \
    .join(usage_score, "customer_id", "left") \
    .join(churn_flag, "customer_id", "left") \
    .withColumn("LTV_CAC_Ratio", round(col("LTV") / col("CAC"), 2)) \
    .withColumn("CAC_Payback_Months", round(col("CAC") / col("Monthly_Revenue"), 1)) \
    .withColumn("Customer_Health_Score", round(
        coalesce(col("Avg_Sentiment_Score"), lit(0)) * 0.3 +
        coalesce(col("Usage_Score"), lit(0)) * 0.5 +
        (1 - col("Churned")) * 0.2, 2))

# --------------------------------------
# 8. Write per-customer unit economics table
# --------------------------------------
unit_economics.write.mode("overwrite").saveAsTable("agg_unit_economics")

# --------------------------------------
# 9. Segment-level Aggregation
# --------------------------------------
unit_segment = unit_economics \
    .groupBy("industry", "company_size", "plan_name", "billing_frequency") \
    .agg(
        round(avg("CAC"), 2).alias("Avg_CAC"),
        round(avg("LTV"), 2).alias("Avg_LTV"),
        round(avg("LTV_CAC_Ratio"), 2).alias("Avg_LTV_CAC"),
        round(avg("CAC_Payback_Months"), 1).alias("Avg_Payback_Months"),
        round(avg("Customer_Health_Score"), 2).alias("Avg_Health_Score"),
        countDistinct("customer_id").alias("Customer_Count")
    )

unit_segment.write.mode("overwrite").saveAsTable("agg_unit_economics_by_segment")

print("✅ Customer-level and segment-level unit economics saved to Lakehouse.")
