In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report


In [23]:
data = pd.read_csv("Telco-Customer-Churn.csv")
data.drop("customerID",axis=1,inplace=True)

In [29]:
print(data.dtypes)
data["TotalCharges"] = pd.to_numeric(data["TotalCharges"], errors='coerce')
data["TotalCharges"].fillna(data['TotalCharges'].median(),inplace=True)
data.isnull().any()
data['gender'].unique()
# data['TotalCharges'].fillna(data['TotalCharges'].median(), inplace=True)

gender               object
SeniorCitizen         int64
Partner              object
Dependents           object
tenure                int64
PhoneService         object
MultipleLines        object
InternetService      object
OnlineSecurity       object
OnlineBackup         object
DeviceProtection     object
TechSupport          object
StreamingTV          object
StreamingMovies      object
Contract             object
PaperlessBilling     object
PaymentMethod        object
MonthlyCharges      float64
TotalCharges        float64
Churn                object
dtype: object


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  data["TotalCharges"].fillna(data['TotalCharges'].median(),inplace=True)


array(['Female', 'Male'], dtype=object)

In [30]:
from sklearn.preprocessing import LabelEncoder
# for col in data.select_dtypes(include='object').columns:
#     if data[col].nunique() == 2:
#         # Binary columns: Yes/No ‚Üí 1/0
#         data[col] = data[col].map({'Yes': 1, 'No': 0})
#     else:
#         # Non-binary categorical columns ‚Üí Label Encoding
#         data[col] = LabelEncoder().fit_transform(data[col])

for col in data.select_dtypes(include='object').columns:
    data[col] = LabelEncoder().fit_transform(data[col])

In [31]:

X = data.drop('Churn', axis=1)
y = data['Churn']

In [32]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [33]:
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42, stratify=y
)

# -------------------------------
# 4Ô∏è‚É£ Model Building
# -------------------------------

# K-Nearest Neighbors
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)
y_pred_knn = knn.predict(X_test)

# Support Vector Machine
svm = SVC(kernel='rbf', random_state=42)
svm.fit(X_train, y_train)
y_pred_svm = svm.predict(X_test)

# Decision Tree
dt = DecisionTreeClassifier(max_depth=5, random_state=42)
dt.fit(X_train, y_train)
y_pred_dt = dt.predict(X_test)

# -------------------------------
# 5Ô∏è‚É£ Model Evaluation
# -------------------------------

def evaluate_model(y_test, y_pred, model_name):
    acc = accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred)
    rec = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    print(f"--- {model_name} ---")
    print(f"Accuracy : {acc:.4f}")
    print(f"Precision: {prec:.4f}")
    print(f"Recall   : {rec:.4f}")
    print(f"F1 Score : {f1:.4f}\n")
    return acc, prec, rec, f1

results = {}

results['KNN'] = evaluate_model(y_test, y_pred_knn, "KNN Classifier")
results['SVM'] = evaluate_model(y_test, y_pred_svm, "SVM Classifier")
results['Decision Tree'] = evaluate_model(y_test, y_pred_dt, "Decision Tree Classifier")

# -------------------------------
# 6Ô∏è‚É£ Model Comparison
# -------------------------------

comparison_df = pd.DataFrame(results, index=['Accuracy', 'Precision', 'Recall', 'F1-Score']).T
print("üìä Model Performance Comparison:\n")
print(comparison_df)

--- KNN Classifier ---
Accuracy : 0.7410
Precision: 0.5123
Recall   : 0.5027
F1 Score : 0.5074

--- SVM Classifier ---
Accuracy : 0.7935
Precision: 0.6509
Recall   : 0.4786
F1 Score : 0.5516

--- Decision Tree Classifier ---
Accuracy : 0.7850
Precision: 0.6035
Recall   : 0.5535
F1 Score : 0.5774

üìä Model Performance Comparison:

               Accuracy  Precision    Recall  F1-Score
KNN            0.740951   0.512262  0.502674  0.507422
SVM            0.793471   0.650909  0.478610  0.551618
Decision Tree  0.784954   0.603499  0.553476  0.577406
