In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import gradient_boosting
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.tree import DecisionTreeClassifier
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline

In [2]:
path = r"Data\bank_data.csv"

In [3]:
data = pd.read_csv(path)

In [4]:
X = data.drop(columns="deposit")
y = data.deposit
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

In [5]:
dt_clf = DecisionTreeClassifier(max_depth=1, random_state=0)
dt_clf.fit(X_train, y_train)
dt_score = dt_clf.score(X_test, y_test)
print("Decision Tree(weak classifier) score : ", dt_score)

Decision Tree(weak classifier) score :  0.7094655120931621


In [6]:
ada_clf = AdaBoostClassifier(base_estimator=dt_clf, random_state=0)
ada_clf.fit(X_train, y_train)
ada_score = ada_clf.score(X_test, y_test)
print("AdaBoost score : ", ada_score)

AdaBoost score :  0.8244252015527023


In [7]:
gb_clf = GradientBoostingClassifier(random_state=0)
gb_clf.fit(X_train, y_train)
gb_score = gb_clf.score(X_test, y_test)
print("Gradient Boost score : ", gb_score)

Gradient Boost score :  0.8462227530606151


In [8]:
from xgboost import XGBClassifier

In [9]:
xgb_clf = XGBClassifier(base_estimator=dt_clf, random_state=0)
xgb_clf.fit(X_train, y_train)
xgb_score = xgb_clf.score(X_test, y_test)
print("XGBoost score : ", xgb_score)

XGBoost score :  0.8438339802926247


# Assignment

In [63]:
path = r"Data\telecom_churn.csv"

In [72]:
df = pd.read_csv(path)

In [73]:
X = df.drop(columns=["customerID", "Churn"])
y = df.Churn
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0, test_size=0.3)

In [66]:
from sklearn.preprocessing import LabelEncoder

In [74]:
X_train["TotalCharges"] = X_train["TotalCharges"].apply(lambda x: np.NaN if x==" " else x)
X_test["TotalCharges"] = X_test["TotalCharges"].apply(lambda x: np.NaN if x==" " else x)

In [75]:
X_train.TotalCharges = X_train.TotalCharges.astype('float')
X_test.TotalCharges = X_test.TotalCharges.astype('float')

In [86]:
X_train.TotalCharges = X_train[["TotalCharges"]].fillna(value=X_train.mean())
X_test.TotalCharges = X_test[["TotalCharges"]].fillna(value=X_test.mean())

In [87]:
dt_cat_cols = X_train.select_dtypes('object').columns
for col in dt_cat_cols:
    le = LabelEncoder()
    le.fit(X_train[col])
    X_train[col] = le.transform(X_train[col])
    X_test[col] = le.transform(X_test[col])

In [88]:
y_train = y_train.replace({"Yes":1, "No":0})
y_test = y_test.replace({"Yes":1, "No":0})

In [90]:
from sklearn.metrics import accuracy_score,classification_report,confusion_matrix

In [91]:
ada_model = AdaBoostClassifier(random_state=0)
ada_model.fit(X_train, y_train)
y_pred = ada_model.predict(X_test)
ada_score = accuracy_score(y_test, y_pred)
ada_cm = confusion_matrix(y_test, y_pred)
ada_cr = classification_report(y_test, y_pred)
print(ada_score, ada_cm, ada_cr, end="\n\n")

0.795551348793185 [[1371  189]
 [ 243  310]]               precision    recall  f1-score   support

           0       0.85      0.88      0.86      1560
           1       0.62      0.56      0.59       553

    accuracy                           0.80      2113
   macro avg       0.74      0.72      0.73      2113
weighted avg       0.79      0.80      0.79      2113




In [92]:
from sklearn.model_selection import GridSearchCV

In [93]:
xgb_model = XGBClassifier(random_state=0)
xgb_model.fit(X_train, y_train)
y_pred = xgb_model.predict(X_test)
xgb_score = accuracy_score(y_test, y_pred)
xgb_cm = confusion_matrix(y_test, y_pred)
xgb_cr = classification_report(y_test, y_pred)
print(xgb_score, xgb_cm, xgb_cr, end="\n\n")

0.79649787032655 [[1388  172]
 [ 258  295]]               precision    recall  f1-score   support

           0       0.84      0.89      0.87      1560
           1       0.63      0.53      0.58       553

    accuracy                           0.80      2113
   macro avg       0.74      0.71      0.72      2113
weighted avg       0.79      0.80      0.79      2113




In [94]:
parameters={'learning_rate':[0.1,0.15,0.2,0.25,0.3],
            'max_depth':range(1,3)}

In [96]:
clf_model = GridSearchCV(estimator=xgb_model, param_grid=parameters)
clf_model.fit(X_train, y_train)
y_pred = clf_model.predict(X_test)
clf_score = accuracy_score(y_test, y_pred)
clf_cm = confusion_matrix(y_test, y_pred)
clf_cr = classification_report(y_test, y_pred)
print(clf_score, clf_cm, clf_cr, end="\n\n")
print(xgb_score, clf_score)

0.8017037387600567 [[1394  166]
 [ 253  300]]               precision    recall  f1-score   support

           0       0.85      0.89      0.87      1560
           1       0.64      0.54      0.59       553

    accuracy                           0.80      2113
   macro avg       0.75      0.72      0.73      2113
weighted avg       0.79      0.80      0.80      2113


0.79649787032655 0.8017037387600567
