In [2]:
import pandas as pd

data = pd.read_csv("WA_Fn-UseC_-Telco-Customer-Churn.csv")
print(data)

      customerID  gender  SeniorCitizen Partner Dependents  tenure  \
0     7590-VHVEG  Female              0     Yes         No       1   
1     5575-GNVDE    Male              0      No         No      34   
2     3668-QPYBK    Male              0      No         No       2   
3     7795-CFOCW    Male              0      No         No      45   
4     9237-HQITU  Female              0      No         No       2   
...          ...     ...            ...     ...        ...     ...   
7038  6840-RESVB    Male              0     Yes        Yes      24   
7039  2234-XADUH  Female              0     Yes        Yes      72   
7040  4801-JZAZL  Female              0     Yes        Yes      11   
7041  8361-LTMKD    Male              1     Yes         No       4   
7042  3186-AJIEK    Male              0      No         No      66   

     PhoneService     MultipleLines InternetService OnlineSecurity  ...  \
0              No  No phone service             DSL             No  ...   
1        

In [8]:
# CUSTOMER CHURN PREDICTION
# MODEL: LOGISTIC REGRESSION
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report


# 1. LOAD DATASET
data = pd.read_csv("WA_Fn-UseC_-Telco-Customer-Churn.csv")


# 2. SELECT IMPORTANT FEATURES ONLY
data = data[
    ['tenure', 'MonthlyCharges', 'Contract', 'PaymentMethod', 'TechSupport', 'Churn']
]


# 3. ENCODE TARGET VARIABLE
data['Churn'] = data['Churn'].map({'Yes': 1, 'No': 0})


# 4. ONE-HOT ENCODE CATEGORICAL FEATURES
data = pd.get_dummies(
    data,
    columns=['Contract', 'PaymentMethod', 'TechSupport'],
    drop_first=True
)


# 5. SPLIT FEATURES AND TARGET
X = data.drop('Churn', axis=1)
y = data['Churn']


# 6. TRAIN-TEST SPLIT
X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.2,
    random_state=42,
    stratify=y
)


# 7. FEATURE SCALING
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


# 8. TRAIN LOGISTIC REGRESSION MODEL
model = LogisticRegression(max_iter=1000)
model.fit(X_train_scaled, y_train)


# 9. MODEL EVALUATION (OPTIONAL)
y_pred = model.predict(X_test_scaled)
print("MODEL PERFORMANCE:\n")
print(classification_report(y_test, y_pred))


# ================================
# NEW CUSTOMER PREDICTION
# ================================

# 10. CREATE NEW CUSTOMER INPUT
new_customer = pd.DataFrame({
    'tenure': [3],
    'MonthlyCharges': [85],
    'Contract_One year': [0],
    'Contract_Two year': [0],
    'PaymentMethod_Credit card (automatic)': [0],
    'PaymentMethod_Electronic check': [1],
    'PaymentMethod_Mailed check': [0],   
    'TechSupport_Yes': [0]
})


# 11. MATCH TRAINING COLUMNS (FIXES KEYERROR)
new_customer = new_customer.reindex(columns=X.columns, fill_value=0)


# 12. SCALE NEW CUSTOMER DATA
new_customer_scaled = scaler.transform(new_customer)


# 13. PREDICT CHURN
prediction = model.predict(new_customer_scaled)
probability = model.predict_proba(new_customer_scaled)[0][1]


# 14. FINAL OUTPUT
if prediction[0] == 1:
    print(f"Customer WILL churn (Probability: {probability:.2f})")
else:
    print(f"Customer will NOT churn (Probability: {probability:.2f})")


MODEL PERFORMANCE:

              precision    recall  f1-score   support

           0       0.83      0.89      0.86      1035
           1       0.61      0.50      0.55       374

    accuracy                           0.78      1409
   macro avg       0.72      0.69      0.70      1409
weighted avg       0.77      0.78      0.78      1409

Customer WILL churn (Probability: 0.73)
