### Install Required Libraries

In [None]:
!pip install lightgbm   # for lightgbm model

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from lightgbm import LGBMClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report


Defaulting to user installation because normal site-packages is not writeable


ERROR: Invalid requirement: '#'


### Load and process the Data

In [11]:
# load dataset
data = pd.read_csv('Databel - Data.csv')

# convert object columns to categorical types
data['State'] = data['State'].astype('category')
data['Churn Category'] = data['Churn Category'].astype('category')
data['Churn Reason'] = data['Churn Reason'].astype('category')

# encodeing categorical variables except 'Churn Category' and 'Churn Reason'
categorical_cols = ['Intl Plan', 'Intl Active', 'Unlimited Data Plan', 'Gender', 'Under 30', 'Senior',
                    'Group', 'Device Protection & Online Backup', 'Contract Type', 'Payment Method', 'Churn']
for col in categorical_cols:
    if col in data.columns:
        data[col] = data[col].astype('category')

# separate 'Churn Category' and 'Churn Reason' before training
churn_info = data[['Churn Category', 'Churn Reason']]
data = data.drop(columns=['Churn Category', 'Churn Reason'])

# define features and target variable
X = data.drop(['Churn', 'Customer ID', 'Phone Number'], axis=1)
y = data['Churn'].map({'No': 0, 'Yes': 1})

# split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

# Initialize the LightGBM classifier with optimized parameters
model = LGBMClassifier(
    boosting_type='gbdt', learning_rate=0.1, n_estimators=200, max_depth=5, subsample=0.8, colsample_bytree=0.8
)



### Train the XGBoost Model

In [12]:
# Train the model
model.fit(X_train, y_train)

# Predict on the test dataset
y_pred_test = model.predict(X_test)
y_pred_proba_test = model.predict_proba(X_test)

[LightGBM] [Info] Number of positive: 1257, number of negative: 3423
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000973 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1932
[LightGBM] [Info] Number of data points in the train set: 4680, number of used features: 24
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.268590 -> initscore=-1.001789
[LightGBM] [Info] Start training from score -1.001789


### Make Predictions

In [13]:
# add predictions to the test dataset
test_results = X_test.copy()
test_results['Actual_Churn'] = y_test.values
test_results['Predicted_Churn'] = y_pred_test
test_results['False_Prediction_Probability'] = y_pred_proba_test[:, 0]
test_results['True_Prediction_Probability'] = y_pred_proba_test[:, 1]

# reattach 'Churn Category' and 'Churn Reason'
test_results = test_results.merge(churn_info, left_index=True, right_index=True)

# save the test dataset with results
test_results.to_csv('predicted_dataset.csv', index=False)



### accuracy_score, Classification_report

In [14]:
# evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred_test))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_test))
print("Classification Report:\n", classification_report(y_test, y_pred_test))

Accuracy: 0.9013452914798207
Confusion Matrix:
 [[1381   87]
 [ 111  428]]
Classification Report:
               precision    recall  f1-score   support

           0       0.93      0.94      0.93      1468
           1       0.83      0.79      0.81       539

    accuracy                           0.90      2007
   macro avg       0.88      0.87      0.87      2007
weighted avg       0.90      0.90      0.90      2007

