In [1]:
import pandas as pd
df = pd.read_csv("churn_customer.csv")

In [2]:
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.compose import ColumnTransformer

# Separate features and target
X = df.drop('Churn', axis=1)
y = df['Churn']

# Encode target (Yes/No → 1/0)
le = LabelEncoder()
y = le.fit_transform(y)

# Select categorical columns
categorical_cols = X.select_dtypes(include=['object']).columns

# One-hot encode categorical features
X = pd.get_dummies(X, columns=categorical_cols, drop_first=True)


In [3]:
from sklearn.preprocessing import StandardScaler

# Identify numeric columns
numeric_cols = X.select_dtypes(include=['int64','float64']).columns

# Scale them
scaler = StandardScaler()
X[numeric_cols] = scaler.fit_transform(X[numeric_cols])


In [4]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

print("Train shape:", X_train.shape)
print("Test shape:", X_test.shape)


Train shape: (5634, 13601)
Test shape: (1409, 13601)


In [None]:
!pip install xgboost

from xgboost import XGBClassifier
from sklearn.model_selection import GridSearchCV
# Define model
xgb = XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42)

# Hyperparameter grid
xgb_param_grid = {
    'n_estimators': [100, 200],
    'max_depth': [3, 5, 7],
    'learning_rate': [0.01, 0.1, 0.2],
    'subsample': [0.8, 1]
}

# GridSearch
xgb_grid = GridSearchCV(
    estimator=xgb,
    param_grid=xgb_param_grid,
    cv=3,
    scoring='roc_auc',
    verbose=1,
    n_jobs=-1
)

xgb_grid.fit(X_train, y_train)




[notice] A new release of pip is available: 25.0.1 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


Collecting xgboost
  Downloading xgboost-3.0.5-py3-none-win_amd64.whl.metadata (2.1 kB)
Downloading xgboost-3.0.5-py3-none-win_amd64.whl (56.8 MB)
   ---------------------------------------- 0.0/56.8 MB ? eta -:--:--
   ---------------------------------------- 0.3/56.8 MB ? eta -:--:--
    --------------------------------------- 0.8/56.8 MB 1.9 MB/s eta 0:00:29
    --------------------------------------- 1.0/56.8 MB 2.1 MB/s eta 0:00:27
   - -------------------------------------- 1.6/56.8 MB 2.1 MB/s eta 0:00:26
   - -------------------------------------- 2.1/56.8 MB 2.0 MB/s eta 0:00:28
   - -------------------------------------- 2.4/56.8 MB 2.1 MB/s eta 0:00:27
   -- ------------------------------------- 2.9/56.8 MB 2.1 MB/s eta 0:00:26
   -- ------------------------------------- 3.4/56.8 MB 2.1 MB/s eta 0:00:26
   -- ------------------------------------- 3.7/56.8 MB 2.0 MB/s eta 0:00:27
   -- ------------------------------------- 4.2/56.8 MB 2.0 MB/s eta 0:00:26
   --- -------------

In [None]:
print("🔹 Best XGBoost Params:", xgb_grid.best_params_)
print("Best ROC-AUC:", xgb_grid.best_score_)

# Evaluate on test
y_pred_xgb = xgb_grid.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred_xgb))
print(classification_report(y_test, y_pred_xgb))
print("ROC-AUC:", roc_auc_score(y_test, xgb_grid.predict_proba(X_test)[:,1]))
