# Boosting

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

In [2]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report

In [3]:
df = pd.read_csv("bank.csv")
df.head()
df.shape

(11162, 17)

In [4]:
df.head()

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,deposit
0,59,0,1,1,0,2343,1,0,2,5,8,1042,1,-1,0,3,1
1,56,0,1,1,0,45,0,0,2,5,8,1467,1,-1,0,3,1
2,41,9,1,1,0,1270,1,0,2,5,8,1389,1,-1,0,3,1
3,55,7,1,1,0,2476,1,0,2,5,8,579,1,-1,0,3,1
4,54,0,1,2,0,184,0,0,2,5,8,673,2,-1,0,3,1


In [5]:
df.deposit.value_counts()

0    5873
1    5289
Name: deposit, dtype: int64

In [6]:
x = df.iloc[:, :-1]
y = df.iloc[:, -1]

In [7]:
y

0        1
1        1
2        1
3        1
4        1
        ..
11157    0
11158    0
11159    0
11160    0
11161    0
Name: deposit, Length: 11162, dtype: int64

In [8]:
xtrain, xtest, ytrain, ytest = train_test_split(x,y, test_size=0.3, random_state=1)

# Boosting Classifiers

## Adaboost

In [9]:
from sklearn.ensemble import AdaBoostClassifier

ada_clf = AdaBoostClassifier(DecisionTreeClassifier(), n_estimators= 200,learning_rate=0.1)

In [10]:
ada_clf.fit(xtrain, ytrain)

AdaBoostClassifier(base_estimator=DecisionTreeClassifier(), learning_rate=0.1,
                   n_estimators=200)

In [11]:
y_pred = ada_clf.predict(xtest)
accuracy_score(ytest, y_pred)

0.786503433860854

In [12]:
print(classification_report(ytest, y_pred))

              precision    recall  f1-score   support

           0       0.79      0.81      0.80      1760
           1       0.78      0.76      0.77      1589

    accuracy                           0.79      3349
   macro avg       0.79      0.79      0.79      3349
weighted avg       0.79      0.79      0.79      3349



## GradientBoosting

In [13]:
from sklearn.ensemble import GradientBoostingClassifier
import time

In [14]:
gbc_clf = GradientBoostingClassifier()

In [15]:
gbc_clf.fit(xtrain, ytrain)

GradientBoostingClassifier()

In [16]:
# start = time.time()
# gbc_clf.fit(xtrain, ytrain)
# end = time.time()
# time_elapsed = end - start
# print(time_elapsed)


In [17]:
y_pred = gbc_clf.predict(xtest)
print('Accuracy : ',accuracy_score(y_pred, ytest))

Accuracy :  0.8378620483726485


In [18]:
print(classification_report(ytest, y_pred))

              precision    recall  f1-score   support

           0       0.87      0.82      0.84      1760
           1       0.81      0.86      0.83      1589

    accuracy                           0.84      3349
   macro avg       0.84      0.84      0.84      3349
weighted avg       0.84      0.84      0.84      3349



In [19]:
# Hyper parameters
params = {'n_estimators' : [10, 80, 100, 120],
          'learning_rate' : [0.1, 0.3, 1.0],    # rate at which model is learning, 0 to 1
          'max_depth' :  [3, 4, 5]}

In [20]:
from sklearn.model_selection import GridSearchCV

In [21]:
grd_gbc_clf = GridSearchCV(gbc_clf, params)

In [22]:
grd_gbc_clf.fit(xtrain, ytrain)

GridSearchCV(estimator=GradientBoostingClassifier(),
             param_grid={'learning_rate': [0.1, 0.3, 1.0],
                         'max_depth': [3, 4, 5],
                         'n_estimators': [10, 80, 100, 120]})

In [23]:
grd_gbc_clf.best_params_

{'learning_rate': 0.1, 'max_depth': 5, 'n_estimators': 100}

In [24]:
pred_using_GS = grd_gbc_clf.predict(xtest)

In [25]:
print('Accuracy : ',accuracy_score(pred_using_GS, ytest))
# print('Estimators : ',grd_gbc_clf.n_estimators_)

Accuracy :  0.8527918781725888


## Xgboost

In [26]:
import xgboost, time

In [27]:
xgb_clf = xgboost.XGBClassifier()

In [28]:
start = time.time()
xgb_clf.fit(xtrain, ytrain)
end = time.time()

time_elapsed = end - start
print(time_elapsed)


70.56235122680664


In [29]:
y_pred = xgb_clf.predict(xtest)

In [30]:
accuracy_score(y_pred, ytest)

0.8459241564646163

# Hyperparameter Tunning

In [32]:
# params = {'n_estimators':[100, 200, 400], 'max_depth':[1,2,3,6], 'learning_rate' :[0.1, 0.2, 0.3, 0.5]}
# grid_search = GridSearchCV(xgb_clf, params)
# grid_search.fit(xtrain, ytrain)

In [None]:
grid_search.best_params_

In [None]:
xgb_clf = xgboost.XGBClassifier(learning_rate = 0.1, max_depth = 3, n_estimators = 400)

In [None]:
xgb_clf.fit(xtrain, ytrain)

In [None]:
y_pred = xgb_clf.predict(xtest)
accuracy_score(y_pred, ytest)

In [None]:
print(classification_report(ytest, y_pred))

In [None]:
#!pip install xgboost             # use it if you get an error