In [49]:
# building the dataframe for churn analysis

import pandas as pd
import numpy as np

np.random.seed(42)

#Simulate 500 customers
n = 500
df = pd.DataFrame({
    "Age" : np.random.randint(18, 70, n),
    "ContractMonth" : np.random.randint(1, 48, n),
    "MonthlySpend" : np.random.randint(30, 300, n),
    "SupportCalls" : np.random.randint(0, 10, n),
    "PremiumPlan" : np.random.choice([0,1], size=n)
})

#Target Variable: churn
df["Churn"] = (
    (df["ContractMonth"] < 6) |
    (df["SupportCalls"] < 5) |
    (df["PremiumPlan"] == 0) & (df["MonthlySpend"] > 200)
).astype(int)

df.describe()

Unnamed: 0,Age,ContractMonth,MonthlySpend,SupportCalls,PremiumPlan,Churn
count,500.0,500.0,500.0,500.0,500.0,500.0
mean,44.22,23.97,162.774,4.468,0.516,0.62
std,15.036082,13.381451,78.68434,2.905488,0.500244,0.485873
min,18.0,1.0,30.0,0.0,0.0,0.0
25%,32.0,12.0,92.0,2.0,0.0,0.0
50%,45.0,24.5,163.0,4.0,1.0,1.0
75%,57.0,35.0,229.0,7.0,1.0,1.0
max,69.0,47.0,299.0,9.0,1.0,1.0


In [51]:
import plotly.express as px

fig = px.histogram(df, x="PremiumPlan", color="Churn", template="plotly_dark")
fig.show()

In [14]:
fig = px.histogram(df, x="MonthlySpend", color="Churn", template="plotly_dark")
fig.show()

In [50]:
fig = px.histogram(df, x="ContractMonth", color="Churn", template="plotly_dark")
fig.show()

In [48]:
fig = px.histogram(df, x="SupportCalls", color="Churn", template="plotly_dark")
fig.show()

In [54]:
from sklearn import linear_model
from sklearn import model_selection
from sklearn import metrics

reg = linear_model.LogisticRegression(max_iter=1000)

X = df[["Age","ContractMonth","MonthlySpend","SupportCalls","PremiumPlan"]]
y = df["Churn"]

X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.3, random_state=42)

reg.fit(X_train, y_train)

y_pred = reg.predict(X_test)
y_prob = reg.predict_proba(X_test)[:,1]

print("Accuracy: \n", metrics.accuracy_score(y_test,y_pred))
print("Confusion Matrix: \n", metrics.confusion_matrix(y_test,y_pred))
print("Classification Report: \n", metrics.classification_report(y_test,y_pred))


Accuracy: 
 0.8333333333333334
Confusion Matrix: 
 [[42 18]
 [ 7 83]]
Classification Report: 
               precision    recall  f1-score   support

           0       0.86      0.70      0.77        60
           1       0.82      0.92      0.87        90

    accuracy                           0.83       150
   macro avg       0.84      0.81      0.82       150
weighted avg       0.84      0.83      0.83       150



In [76]:
coefs = pd.DataFrame({
    "Features":X.columns,
    "Coeficients":reg.coef_[0],
    "Odds Ratio": np.exp(reg.coef_[0])
}).sort_values(by="Coeficients", ascending=False)

coefs

Unnamed: 0,Features,Coeficients,Odds Ratio
2,MonthlySpend,0.007764,1.007794
0,Age,-0.006198,0.993821
1,ContractMonth,-0.074682,0.928039
3,SupportCalls,-0.90246,0.405571
4,PremiumPlan,-1.062089,0.345733
