# Phase 3 - Retention Insights

Blend model predictions with customer attributes to surface actionable churn intelligence.

## Load predictions and customer context

In [1]:
from pathlib import Path
import pandas as pd
import numpy as np

PROJECT_ROOT = Path.cwd()
if PROJECT_ROOT.name == "notebooks":
    PROJECT_ROOT = PROJECT_ROOT.parent

PREDICTIONS_PATH = PROJECT_ROOT / "data" / "processed" / "churn_predictions.csv"
RAW_DIR = PROJECT_ROOT / "data" / "raw"
REPORTS_DIR = PROJECT_ROOT / "reports"

predictions = pd.read_csv(PREDICTIONS_PATH)
train_raw = pd.read_csv(RAW_DIR / "customer_churn_dataset-training-master.csv")
test_raw = pd.read_csv(RAW_DIR / "customer_churn_dataset-testing-master.csv")

full_raw = pd.concat([train_raw, test_raw], ignore_index=True, sort=False)
rename_map = {
    "CustomerID": "customerID",
    "Usage Frequency": "UsageFrequency",
    "Support Calls": "SupportCalls",
    "Payment Delay": "PaymentDelay",
    "Subscription Type": "SubscriptionType",
    "Contract Length": "ContractType",
    "Total Spend": "TotalSpend",
    "Last Interaction": "LastInteraction"
}
full_raw = full_raw.rename(columns=rename_map)
full_raw["customerID"] = full_raw["customerID"].round().astype("Int64")

insights_df = predictions.merge(full_raw, on="customerID", how="left")
if "Churn_x" in insights_df.columns:
    insights_df = insights_df.rename(columns={"Churn_x": "Churn"})
if "Churn_y" in insights_df.columns:
    insights_df = insights_df.rename(columns={"Churn_y": "ChurnRaw"})
insights_df.head()


Unnamed: 0,customerID,dataset,churn_probability,Churn,churn_prediction,Age,Gender,Tenure,UsageFrequency,SupportCalls,PaymentDelay,SubscriptionType,ContractType,TotalSpend,LastInteraction,ChurnRaw
0,2.0,train,0.999907,1,1,30.0,Female,39.0,14.0,5.0,18.0,Standard,Annual,932.0,17.0,1.0
1,2.0,train,0.999907,1,1,41.0,Female,28.0,28.0,7.0,13.0,Standard,Monthly,584.0,20.0,0.0
2,3.0,train,0.999999,1,1,65.0,Female,49.0,1.0,10.0,8.0,Basic,Monthly,557.0,6.0,1.0
3,3.0,train,0.999999,1,1,47.0,Male,27.0,10.0,2.0,29.0,Premium,Annual,757.0,21.0,0.0
4,4.0,train,1.0,1,1,55.0,Female,14.0,4.0,6.0,18.0,Basic,Quarterly,185.0,3.0,1.0


## Top high-risk customers

In [2]:
high_risk = (
    insights_df
    .loc[insights_df["churn_probability"] > 0.8]
    .sort_values("churn_probability", ascending=False)
    .head(10)
    [[
        "customerID",
        "churn_probability",
        "Churn",
        "Tenure",
        "UsageFrequency",
        "SupportCalls",
        "PaymentDelay",
        "SubscriptionType",
        "ContractType",
        "TotalSpend"
    ]]
    .reset_index(drop=True)
)
high_risk

Unnamed: 0,customerID,churn_probability,Churn,Tenure,UsageFrequency,SupportCalls,PaymentDelay,SubscriptionType,ContractType,TotalSpend
0,115599.0,1.0,1,1.0,15.0,2.0,21.0,Basic,Monthly,464.0
1,222888.0,1.0,1,12.0,6.0,6.0,9.0,Premium,Annual,558.57
2,40750.0,1.0,0,20.0,5.0,10.0,17.0,Premium,Annual,493.0
3,40750.0,1.0,0,19.0,28.0,10.0,3.0,Standard,Annual,964.0
4,46402.0,1.0,1,18.0,1.0,6.0,2.0,Standard,Annual,435.0
5,35546.0,1.0,1,11.0,16.0,8.0,30.0,Premium,Monthly,564.0
6,35546.0,1.0,1,16.0,4.0,4.0,29.0,Basic,Quarterly,226.0
7,259566.0,1.0,1,5.0,29.0,0.0,24.0,Basic,Monthly,513.12
8,53746.0,1.0,1,22.0,4.0,8.0,23.0,Standard,Monthly,805.0
9,168087.0,1.0,1,13.0,4.0,5.0,24.0,Premium,Quarterly,350.0


## Churn rate by contract and subscription

In [3]:
train_insights = insights_df[insights_df["dataset"] == "train"].copy()
train_insights["churn_flag"] = train_insights["Churn"].fillna(0)

contract_summary = (
    train_insights.groupby("ContractType")
    .agg(customers=("customerID", "count"), churn_rate=("churn_flag", "mean"))
    .reset_index()
    .sort_values("churn_rate", ascending=False)
)

subscription_summary = (
    train_insights.groupby("SubscriptionType")
    .agg(customers=("customerID", "count"), churn_rate=("churn_flag", "mean"))
    .reset_index()
    .sort_values("churn_rate", ascending=False)
)
contract_summary, subscription_summary

(  ContractType  customers  churn_rate
 1      Monthly     108750    0.996101
 0       Annual     198132    0.515454
 2    Quarterly     196945    0.513910,
   SubscriptionType  customers  churn_rate
 0            Basic     164037    0.632760
 2         Standard     170169    0.612379
 1          Premium     169621    0.611139)

## Export retention summary

In [4]:
REPORTS_DIR.mkdir(parents=True, exist_ok=True)

contract_summary = contract_summary.assign(segment="contract_type", category=contract_summary["ContractType"])
subscription_summary = subscription_summary.assign(segment="subscription_type", category=subscription_summary["SubscriptionType"])

summary_export = (
    pd.concat([contract_summary, subscription_summary], ignore_index=True)
    [["segment", "category", "customers", "churn_rate"]]
)
summary_export["churn_rate"] = summary_export["churn_rate"].round(4)

summary_path = REPORTS_DIR / "churn_summary.csv"
summary_export.to_csv(summary_path, index=False)
summary_path

WindowsPath('D:/Portfolio Projects/Customer Lifetime Value & Retention Intelligence Platform/reports/churn_summary.csv')