In [1]:
import pandas as pd

dataset = pd.read_csv("customer_churn.csv")
x = dataset.iloc[:,:-1]
y = dataset["churn"]

This notebook compares multiple classification models
to understand biasâ€“variance tradeoffs and model suitability.


In [2]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

x_train,x_test, y_train,y_test = train_test_split(
    x, y, test_size = 0.2, random_state = 42
)

scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled = scaler.transform(x_test)

In [4]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

log_model = LogisticRegression()
log_model.fit(x_train_scaled,y_train)

y_log_pred = log_model.predict(x_test_scaled)

In [5]:
from sklearn.tree import DecisionTreeClassifier

tree_model = DecisionTreeClassifier(random_state=42)
tree_model.fit(x_train,y_train)

y_tree_pred = tree_model.predict(x_test)

In [6]:
print("Logistic Regression")
print(classification_report(y_test, y_log_pred))

print("Decision Tree")
print(classification_report(y_test, y_tree_pred))

Logistic Regression
              precision    recall  f1-score   support

           1       1.00      1.00      1.00         2

    accuracy                           1.00         2
   macro avg       1.00      1.00      1.00         2
weighted avg       1.00      1.00      1.00         2

Decision Tree
              precision    recall  f1-score   support

           1       1.00      1.00      1.00         2

    accuracy                           1.00         2
   macro avg       1.00      1.00      1.00         2
weighted avg       1.00      1.00      1.00         2



Logistic regression shows higher bias but lower variance,
resulting in stable generalization.

Decision Tree classification has lower bias but higher variance,
making it prone to overfitting on small datasets.


In [11]:
tree_limited = DecisionTreeClassifier(max_depth=4, random_state=42)
tree_limited.fit(x_train,y_train)

y_tree_limited = tree_limited.predict(x_test)

print("Controlled Decision Tree")
print(classification_report(y_test, y_tree_limited))

Controlled Decision Tree
              precision    recall  f1-score   support

           1       1.00      1.00      1.00         2

    accuracy                           1.00         2
   macro avg       1.00      1.00      1.00         2
weighted avg       1.00      1.00      1.00         2



# Conclusion:
Model selection should balance bias and variance.

For this dataset, logistic regression provides better generalization,
while decision trees require careful tuning to avoid overfitting.
