In [None]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    confusion_matrix,
    classification_report
)

import matplotlib.pyplot as plt
import seaborn as sns

sns.set_style("whitegrid")

In [None]:
df = pd.read_csv("../data/raw/telco_churn.csv")
df.head()

In [None]:
df['TotalCharges'] = pd.to_numeric(df['TotalCharges'], errors='coerce')
df = df.dropna()

In [None]:
df['Churn'] = df['Churn'].map({'Yes': 1, 'No': 0})

In [None]:
categorical_cols = df.select_dtypes(include='object').columns

le = LabelEncoder()

for col in categorical_cols:
    df[col] = le.fit_transform(df[col])

In [None]:
X = df.drop('Churn', axis=1)
y = df['Churn']

print("Feature shape:", X.shape)
print("Target shape:", y.shape)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    random_state=42,
    stratify=y
)

In [None]:
model = LogisticRegression(max_iter=1000)

model.fit(X_train, y_train)


In [None]:
y_pred = model.predict(X_test)

In [None]:
print("Accuracy :", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred))
print("Recall   :", recall_score(y_test, y_pred))
print("F1 Score :", f1_score(y_test, y_pred))

In [None]:
print(classification_report(y_test, y_pred))

In [None]:
cm = confusion_matrix(y_test, y_pred)

sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix â€“ Baseline Model")
plt.show()

Logistic Regression provides a reasonable starting point
Accuracy is misleading due to class imbalance
Recall for churned customers is relatively low
Label encoding for categorical variables is suboptimal
No feature scaling was applied
Model struggles to capture complex patterns

Assumes linear relationships
Treats categorical variables improperly
Ignores feature scaling
No class imbalance handling
Default hyperparameters