In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

df = pd.read_csv("/content/sample_data/WA_Fn-UseC_-Telco-Customer-Churn.csv")
df.drop('customerID', axis=1, inplace=True)

# Convert TotalCharges to numeric
df['TotalCharges'] = pd.to_numeric(df['TotalCharges'], errors='coerce')
df.dropna(inplace=True)

# Encode categorical features
for column in df.select_dtypes(include='object'):
    if df[column].nunique() == 2:
        df[column] = LabelEncoder().fit_transform(df[column])
    else:
        df = pd.get_dummies(df, columns=[column])

# Split features and target
X = df.drop('Churn', axis=1)
y = df['Churn']


In [2]:
from imblearn.over_sampling import SMOTE

smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)


In [3]:
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, roc_auc_score

X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)

# Logistic Regression
log_model = LogisticRegression(max_iter=1000)
log_model.fit(X_train, y_train)
log_pred = log_model.predict(X_test)

# Decision Tree
tree_model = DecisionTreeClassifier()
tree_model.fit(X_train, y_train)
tree_pred = tree_model.predict(X_test)

print("Logistic Regression:")
print(classification_report(y_test, log_pred))
print("AUC:", roc_auc_score(y_test, log_model.predict_proba(X_test)[:,1]))

print("\nDecision Tree:")
print(classification_report(y_test, tree_pred))
print("AUC:", roc_auc_score(y_test, tree_model.predict_proba(X_test)[:,1]))


Logistic Regression:
              precision    recall  f1-score   support

           0       0.84      0.86      0.85      1037
           1       0.85      0.83      0.84      1029

    accuracy                           0.84      2066
   macro avg       0.84      0.84      0.84      2066
weighted avg       0.84      0.84      0.84      2066

AUC: 0.9292733486837357

Decision Tree:
              precision    recall  f1-score   support

           0       0.79      0.82      0.81      1037
           1       0.81      0.79      0.80      1029

    accuracy                           0.80      2066
   macro avg       0.80      0.80      0.80      2066
weighted avg       0.80      0.80      0.80      2066

AUC: 0.8051614088258254


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


| Metric    | Description                      |
| --------- | -------------------------------- |
| Accuracy  | % of correct predictions         |
| Precision | TP / (TP + FP)                   |
| Recall    | TP / (TP + FN)                   |
| F1-Score  | Balance between precision/recall |
| AUC-ROC   | Model's ability to distinguish   |
