In [1]:
import numpy as np
import pandas as pd

from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report


In [2]:
# Simulated customer churn dataset
X, y = make_classification(
    n_samples=2000,
    n_features=10,
    n_informative=6,
    n_redundant=2,
    n_classes=2,
    random_state=42
)

feature_names = [
    "monthly_charges", "tenure", "num_products", "support_calls",
    "contract_length", "online_security", "payment_score",
    "usage_score", "satisfaction_score", "discount_level"
]

df = pd.DataFrame(X, columns=feature_names)
df["churn"] = y

df.head()


Unnamed: 0,monthly_charges,tenure,num_products,support_calls,contract_length,online_security,payment_score,usage_score,satisfaction_score,discount_level,churn
0,-3.230485,1.044975,0.695293,4.547094,0.709948,-2.87327,1.622875,1.197423,-1.870208,7.149389,1
1,-0.969457,-0.027843,0.491814,-0.904605,0.095751,1.047343,-0.838283,-0.857172,-0.848319,0.039613,0
2,-0.233425,-1.991787,-0.457817,-0.40707,-2.221362,-2.598014,0.857989,-0.579129,-1.251154,0.165376,0
3,-1.746842,1.005994,-1.276618,-2.529162,-1.537575,-2.303886,-0.133311,-0.748383,0.509019,2.254463,0
4,-1.003851,-1.531813,1.791782,0.729542,0.189583,-0.549143,1.422158,-0.000132,1.236738,-0.529772,1


In [3]:
X = df[feature_names]
y = df["churn"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

X_train.shape, X_test.shape


((1600, 10), (400, 10))

In [6]:
log_reg = LogisticRegression(max_iter=1000)
log_reg.fit(X_train, y_train)

y_pred = log_reg.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print("Logistic Regression accuracy:", round(accuracy, 3))


Logistic Regression accuracy: 0.845


In [7]:
cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:\n", cm)

print("\nClassification Report:\n")
print(classification_report(y_test, y_pred))


Confusion Matrix:
 [[173  27]
 [ 35 165]]

Classification Report:

              precision    recall  f1-score   support

           0       0.83      0.86      0.85       200
           1       0.86      0.82      0.84       200

    accuracy                           0.84       400
   macro avg       0.85      0.84      0.84       400
weighted avg       0.85      0.84      0.84       400



In [8]:
print(f"Model achieved {round(accuracy*100, 2)}% accuracy on the test set.")
print("This simple model can be a baseline for further churn prediction improvements.")


Model achieved 84.5% accuracy on the test set.
This simple model can be a baseline for further churn prediction improvements.
