In [1]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, roc_auc_score
from sklearn.utils.class_weight import compute_class_weight

import pickle


In [2]:
df = pd.read_excel("Telco_customer_churn.xlsx")
scaler = StandardScaler()

In [3]:
columns_to_drop = [
    "CustomerID",
    "Churn Score",
    "Churn Reason",
    "Lat Long",
    "Country", "State", "City", "Zip Code", "Latitude", "Longitude"
]

df = df.drop(columns=columns_to_drop)

In [4]:
df = df.drop(columns=["CLTV"])

In [5]:
df["Total Charges"] = pd.to_numeric(df["Total Charges"], errors="coerce")

In [6]:
df["Total Charges"].fillna(df["Total Charges"].median(), inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df["Total Charges"].fillna(df["Total Charges"].median(), inplace=True)


In [7]:
X = df.drop(columns=["Churn Value", "Churn Label"])
y = df["Churn Value"]

In [8]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    random_state=42,
    stratify=y
)

In [9]:
X = pd.get_dummies(X, drop_first=True)
print(X.dtypes.unique())

[dtype('int64') dtype('float64') dtype('bool')]


In [10]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    random_state=42,
    stratify=y
)

In [11]:
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [12]:
class_weights = compute_class_weight(
    class_weight="balanced",
    classes=np.unique(y_train),
    y=y_train
)

class_weight_dict = {
    0: class_weights[0],
    1: class_weights[1]
}

In [13]:
log_model = LogisticRegression(
    max_iter=1000,
    class_weight=class_weight_dict,
    solver="lbfgs"
)

log_model.fit(X_train_scaled, y_train)


In [14]:
y_pred = log_model.predict(X_test_scaled)
y_proba = log_model.predict_proba(X_test_scaled)[:, 1]

print("Classification Report:\n")
print(classification_report(y_test, y_pred))

print("ROC-AUC Score:", roc_auc_score(y_test, y_proba))

Classification Report:

              precision    recall  f1-score   support

           0       0.90      0.73      0.81      1035
           1       0.51      0.78      0.62       374

    accuracy                           0.74      1409
   macro avg       0.71      0.76      0.71      1409
weighted avg       0.80      0.74      0.76      1409

ROC-AUC Score: 0.8486011005192591


In [15]:
model_package = {
    "model": log_model,
    "scaler": scaler,
    "columns": X.columns
}

with open("logistic_churn_model.pkl", "wb") as f:
    pickle.dump(model_package, f)

print("Model saved successfully.")


Model saved successfully.


In [16]:
coef_df = pd.DataFrame({
    "Feature": X.columns,
    "Coefficient": log_model.coef_[0]
}).sort_values(by="Coefficient", ascending=False)

print(coef_df.head(10))
print(coef_df.tail(10))

                            Feature  Coefficient
11     Internet Service_Fiber optic     0.691924
3                     Total Charges     0.496175
24             Streaming Movies_Yes     0.249412
22                 Streaming TV_Yes     0.238110
10               Multiple Lines_Yes     0.194495
29  Payment Method_Electronic check     0.186322
27            Paperless Billing_Yes     0.156499
6                       Partner_Yes     0.140241
18            Device Protection_Yes     0.036268
30      Payment Method_Mailed check     0.014987
                                  Feature  Coefficient
19       Tech Support_No internet service    -0.085369
17  Device Protection_No internet service    -0.085369
23   Streaming Movies_No internet service    -0.085369
20                       Tech Support_Yes    -0.095406
14                    Online Security_Yes    -0.112140
25                      Contract_One year    -0.322299
26                      Contract_Two year    -0.609402
7                    