In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.stats as stats
import statsmodels.api as sm
from statsmodels.stats.outliers_influence import variance_inflation_factor as VIF
from sklearn.preprocessing import StandardScaler,OrdinalEncoder
from sklearn.model_selection import train_test_split,GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier,plot_tree
from sklearn.ensemble import RandomForestClassifier,AdaBoostClassifier,GradientBoostingClassifier,BaggingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score,confusion_matrix,classification_report,cohen_kappa_score,roc_auc_score,roc_curve
from sklearn.metrics import precision_score,recall_score,f1_score
from sklearn.feature_selection import SequentialFeatureSelector as sfs,RFE
from xgboost import XGBClassifier
import warnings
warnings.filterwarnings('ignore')
plt.rcParams['figure.figsize']=[20,10]

In [2]:
df = pd.read_csv('data1.csv')

In [3]:
df.head()

Unnamed: 0.1,Unnamed: 0,gender,SeniorCitizen,Partner,Dependents,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,0,0,0,1,0,0,1,0,0,2,0,0,0,0,0,1,2,-1.165523,-1.285566,0
1,1,1,0,0,0,1,0,0,2,0,2,0,0,0,1,0,3,-0.264071,0.060346,0
2,2,1,0,0,0,1,0,0,2,2,0,0,0,0,0,1,3,-0.367189,-1.244781,1
3,3,1,0,0,0,0,1,0,2,0,2,2,0,0,1,0,0,-0.751387,0.508983,0
4,4,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,2,0.193308,-1.244781,1


In [4]:
df.drop('Unnamed: 0', axis=1, inplace=True)

In [5]:
y = df.Churn
X = df.drop('Churn', axis=1)

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=42)

In [8]:
print('X_train: ',X_train.shape)
print('X_test: ',X_test.shape)
print('y_train: ',y_train.shape)
print('y_test: ',y_test.shape)

X_train:  (4907, 18)
X_test:  (2103, 18)
y_train:  (4907,)
y_test:  (2103,)


## AdaBoosting:

In [10]:
ada = AdaBoostClassifier()

In [11]:
ada_model = ada.fit(X_train, y_train)

#### Performance on train set

In [12]:
y_pred = ada_model.predict(X_train)

In [13]:
print(classification_report(y_train, y_pred))

              precision    recall  f1-score   support

           0       0.84      0.90      0.87      3589
           1       0.66      0.54      0.59      1318

    accuracy                           0.80      4907
   macro avg       0.75      0.72      0.73      4907
weighted avg       0.79      0.80      0.79      4907



#### Performance on test set

In [14]:
y_test_pred = ada_model.predict(X_test)

In [15]:
print(classification_report(y_test, y_test_pred))

              precision    recall  f1-score   support

           0       0.85      0.91      0.88      1564
           1       0.67      0.54      0.59       539

    accuracy                           0.81      2103
   macro avg       0.76      0.72      0.74      2103
weighted avg       0.80      0.81      0.81      2103



## GradientBoosting:

In [16]:
gbm = GradientBoostingClassifier(learning_rate=0.1, n_estimators=100, random_state=42)

In [17]:
gbm_model = gbm.fit(X_train, y_train)

#### Performance on train set

In [18]:
y_pred = gbm_model.predict(X_train)

In [19]:
print(classification_report(y_train, y_pred))

              precision    recall  f1-score   support

           0       0.85      0.92      0.88      3589
           1       0.72      0.57      0.64      1318

    accuracy                           0.82      4907
   macro avg       0.78      0.74      0.76      4907
weighted avg       0.82      0.82      0.82      4907



#### Performance on test set

In [20]:
y_test_pred = gbm_model.predict(X_test)

In [21]:
print(classification_report(y_test, y_test_pred))

              precision    recall  f1-score   support

           0       0.85      0.91      0.88      1564
           1       0.68      0.54      0.60       539

    accuracy                           0.82      2103
   macro avg       0.76      0.72      0.74      2103
weighted avg       0.81      0.82      0.81      2103



## XGBoosting:

In [22]:
xgb = XGBClassifier(n_estimators=100, max_depth=2)

In [23]:
xgb_model = xgb.fit(X_train, y_train)

#### Performance on train set

In [24]:
y_pred = xgb_model.predict(X_train)

In [25]:
print(classification_report(y_train, y_pred))

              precision    recall  f1-score   support

           0       0.85      0.91      0.88      3589
           1       0.70      0.57      0.63      1318

    accuracy                           0.82      4907
   macro avg       0.77      0.74      0.75      4907
weighted avg       0.81      0.82      0.81      4907



#### Performance on test set

In [26]:
y_test_pred = xgb_model.predict(X_test)

In [27]:
print(classification_report(y_test, y_test_pred))

              precision    recall  f1-score   support

           0       0.85      0.91      0.88      1564
           1       0.68      0.55      0.61       539

    accuracy                           0.82      2103
   macro avg       0.77      0.73      0.75      2103
weighted avg       0.81      0.82      0.81      2103

