In [7]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.preprocessing import StandardScaler
from IPython.display import display

In [25]:
data = {
    'CustomerID': range(1, 101),
    'MonthlyUsage': np.random.uniform(50, 300, 100),
    'BillAmount': np.random.uniform(20, 100, 100),
    'CustomerAge': np.random.randint(18, 70, 100),
    'TenureMonths': np.random.randint(1, 36, 100),
    'ServiceCalls': np.random.randint(0, 10, 100),
    'ContractType': np.random.choice(['Monthly', 'Yearly'], 100, p=[0.7, 0.3]),
    'Churn': np.random.choice([0, 1], 100, p=[0.8, 0.2])  # 0 = Not Churned, 1 = Churned
}

df = pd.DataFrame(data)
df.head(10)

Unnamed: 0,CustomerID,MonthlyUsage,BillAmount,CustomerAge,TenureMonths,ServiceCalls,ContractType,Churn
0,1,182.409042,89.248009,60,17,4,Monthly,0
1,2,244.438061,98.390929,58,25,4,Yearly,0
2,3,92.342375,76.549162,54,3,5,Yearly,0
3,4,131.793511,66.622103,21,12,6,Monthly,1
4,5,275.282835,42.892766,62,14,1,Monthly,0
5,6,94.399594,76.820559,53,6,2,Monthly,0
6,7,188.18272,65.844911,50,33,5,Monthly,1
7,8,250.967864,87.002432,21,18,1,Monthly,0
8,9,58.05521,50.42337,28,30,4,Monthly,1
9,10,194.222968,56.329309,19,1,5,Monthly,0


### Feature Engineering


#### Onehot encoding (yearly or monthly)
#### calculate average monthly spending
#### separate training input and output.

In [26]:
# One-hot encode categorical features
df = pd.get_dummies(df, columns=['ContractType'], drop_first=True)
print('-------after one hot encoding-------')
display(df)
# 2. Feature Engineering
df['AvgMonthlySpend'] = df['BillAmount'] / df['TenureMonths']
df.fillna(0, inplace=True)  # Handle division by zero
print('-------average monthly spending-------')
display(df)

# 3. Prepare data for training
X = df.drop(['CustomerID', 'Churn'], axis=1)
y = df['Churn']



-------after one hot encoding-------


Unnamed: 0,CustomerID,MonthlyUsage,BillAmount,CustomerAge,TenureMonths,ServiceCalls,Churn,ContractType_Yearly
0,1,182.409042,89.248009,60,17,4,0,False
1,2,244.438061,98.390929,58,25,4,0,True
2,3,92.342375,76.549162,54,3,5,0,True
3,4,131.793511,66.622103,21,12,6,1,False
4,5,275.282835,42.892766,62,14,1,0,False
...,...,...,...,...,...,...,...,...
95,96,287.187774,80.275417,28,5,9,0,True
96,97,200.875625,50.414737,33,24,3,0,True
97,98,168.861665,96.804260,40,15,7,0,False
98,99,257.882177,32.020408,40,11,5,0,True


-------average monthly spending-------


Unnamed: 0,CustomerID,MonthlyUsage,BillAmount,CustomerAge,TenureMonths,ServiceCalls,Churn,ContractType_Yearly,AvgMonthlySpend
0,1,182.409042,89.248009,60,17,4,0,False,5.249883
1,2,244.438061,98.390929,58,25,4,0,True,3.935637
2,3,92.342375,76.549162,54,3,5,0,True,25.516387
3,4,131.793511,66.622103,21,12,6,1,False,5.551842
4,5,275.282835,42.892766,62,14,1,0,False,3.063769
...,...,...,...,...,...,...,...,...,...
95,96,287.187774,80.275417,28,5,9,0,True,16.055083
96,97,200.875625,50.414737,33,24,3,0,True,2.100614
97,98,168.861665,96.804260,40,15,7,0,False,6.453617
98,99,257.882177,32.020408,40,11,5,0,True,2.910946


#### Normalization

In [27]:
# Normalize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)


### Apply Machine Learning
#### Train the RandomForestClassifier

In [28]:
# 4. Train a prediction model
model = RandomForestClassifier(random_state=42, n_estimators=30)
model.fit(X_train, y_train)

# Predictions
y_pred = model.predict(X_test)
y_proba = model.predict_proba(X_test)[:, 1]

In [29]:
# Evaluate the model
print("Classification Report:\n", classification_report(y_test, y_pred))
print("Accuracy:", accuracy_score(y_test, y_pred))

Classification Report:
               precision    recall  f1-score   support

           0       0.84      0.94      0.89        17
           1       0.00      0.00      0.00         3

    accuracy                           0.80        20
   macro avg       0.42      0.47      0.44        20
weighted avg       0.72      0.80      0.76        20

Accuracy: 0.8


### Intelligent Targeting
#### Target customers with high churn probability

In [30]:
X_test_df = pd.DataFrame(X_test, columns=X.columns)

X_test_df['ChurnProbability'] = y_proba
X_test_df['ActualChurn'] = y_test.values

display(X_test_df[:10])

Unnamed: 0,MonthlyUsage,BillAmount,CustomerAge,TenureMonths,ServiceCalls,ContractType_Yearly,AvgMonthlySpend,ChurnProbability,ActualChurn
0,0.256449,-0.300675,-0.764229,0.926273,0.470477,-0.546536,-0.446078,0.133333,0
1,-0.641555,-0.47998,-0.169241,-0.672472,0.097083,-0.546536,-0.210914,0.433333,0
2,-1.141539,1.604652,-1.226998,-1.272001,1.59066,-0.546536,1.119614,0.4,0
3,0.080884,-1.346836,-1.160888,1.725645,0.470477,-0.546536,-0.547451,0.3,0
4,-1.503885,1.276397,-1.226998,-0.772394,0.097083,1.829707,0.179647,0.866667,0
5,1.5454,-0.232669,0.227418,-1.671687,-0.649706,1.829707,4.470856,0.2,0
6,1.010624,0.63549,0.227418,0.926273,0.470477,-0.546536,-0.37725,0.233333,0
7,-1.304673,0.659853,1.086845,0.226822,0.470477,1.829707,-0.28637,0.2,1
8,-1.124531,-0.86244,-0.235351,-1.471844,1.217266,-0.546536,0.653574,0.3,1
9,0.034987,1.172271,1.020735,-0.072943,-0.276312,-0.546536,-0.165895,0.2,0


In [32]:
# Target customers with a churn probability > 0.6
target_customers = X_test_df[X_test_df['ChurnProbability'] > 0.6]
print("\nHigh-Risk Customers for Retention Campaign:")
print(target_customers)


High-Risk Customers for Retention Campaign:
   MonthlyUsage  BillAmount  CustomerAge  TenureMonths  ServiceCalls  \
4     -1.503885    1.276397    -1.226998     -0.772394      0.097083   

   ContractType_Yearly  AvgMonthlySpend  ChurnProbability  ActualChurn  
4             1.829707         0.179647          0.866667            0  
