In [None]:
import sys
from pathlib import Path

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Make project root importable
PROJECT_ROOT = Path.cwd().parents[1]
sys.path.append(str(PROJECT_ROOT))

print("Project root:", PROJECT_ROOT)


: 

In [None]:
# Path to the telecom dataset (reuse from churn project)
DATA_PATH = Path(
    r"D:\Machine Learning Analyst Norquest MLAD\Data_Projects\telecom_churn_webapp\data\raw\telecom_customer_churn.csv"
)

df = pd.read_csv(DATA_PATH)

df.shape


In [None]:
df.columns = [c.strip() for c in df.columns]

# Create churn flag
df["churn"] = (
    df["Customer Status"]
    .astype(str)
    .str.strip()
    .str.lower()
    .eq("churned")
    .astype(int)
)

df["churn"].value_counts(normalize=True)


In [None]:
revenue_cols = ["Monthly Charge", "Tenure in Months"]

df[revenue_cols].describe()

df.loc[df["Monthly Charge"] < 0, "Monthly Charge"] = np.nan


In [None]:
overall_churn_rate = df["churn"].mean()

expected_lifetime_months = 1 / overall_churn_rate
expected_lifetime_months


In [None]:
df["expected_lifetime_months"] = expected_lifetime_months

df["clv"] = df["Monthly Charge"] * df["expected_lifetime_months"]

df[["Monthly Charge", "Tenure in Months", "clv"]].head()


In [None]:
plt.figure(figsize=(8, 5))
sns.histplot(df["clv"].dropna(), bins=50, kde=True)
plt.title("Customer Lifetime Value (CLV) Distribution")
plt.xlabel("Estimated CLV")
plt.ylabel("Customer Count")
plt.tight_layout()
plt.show()


In [None]:
df_sorted = df.sort_values("clv", ascending=False)

df_sorted["cum_revenue_pct"] = (
    df_sorted["clv"].cumsum() / df_sorted["clv"].sum()
)

df_sorted["cum_customers_pct"] = (
    np.arange(1, len(df_sorted) + 1) / len(df_sorted)
)

plt.figure(figsize=(8, 5))
plt.plot(df_sorted["cum_customers_pct"], df_sorted["cum_revenue_pct"])
plt.axhline(0.8, color="red", linestyle="--", label="80% revenue")
plt.axvline(0.2, color="green", linestyle="--", label="20% customers")
plt.xlabel("Cumulative % of Customers")
plt.ylabel("Cumulative % of Revenue (CLV)")
plt.title("Revenue Concentration (Pareto Analysis)")
plt.legend()
plt.tight_layout()
plt.show()
