In [27]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.ensemble import AdaBoostClassifier
from sklearn.model_selection import train_test_split

from sklearn.metrics import classification_report

import warnings
warnings.filterwarnings("ignore")

In [2]:
df = pd.read_csv("bank.csv")

In [3]:
df.head()

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,deposit
0,59,0,1,1,0,2343,1,0,2,5,8,1042,1,-1,0,3,1
1,56,0,1,1,0,45,0,0,2,5,8,1467,1,-1,0,3,1
2,41,9,1,1,0,1270,1,0,2,5,8,1389,1,-1,0,3,1
3,55,7,1,1,0,2476,1,0,2,5,8,579,1,-1,0,3,1
4,54,0,1,2,0,184,0,0,2,5,8,673,2,-1,0,3,1


In [4]:
df.shape

(11162, 17)

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11162 entries, 0 to 11161
Data columns (total 17 columns):
 #   Column     Non-Null Count  Dtype
---  ------     --------------  -----
 0   age        11162 non-null  int64
 1   job        11162 non-null  int64
 2   marital    11162 non-null  int64
 3   education  11162 non-null  int64
 4   default    11162 non-null  int64
 5   balance    11162 non-null  int64
 6   housing    11162 non-null  int64
 7   loan       11162 non-null  int64
 8   contact    11162 non-null  int64
 9   day        11162 non-null  int64
 10  month      11162 non-null  int64
 11  duration   11162 non-null  int64
 12  campaign   11162 non-null  int64
 13  pdays      11162 non-null  int64
 14  previous   11162 non-null  int64
 15  poutcome   11162 non-null  int64
 16  deposit    11162 non-null  int64
dtypes: int64(17)
memory usage: 1.4 MB


In [6]:
df.isnull().sum()

age          0
job          0
marital      0
education    0
default      0
balance      0
housing      0
loan         0
contact      0
day          0
month        0
duration     0
campaign     0
pdays        0
previous     0
poutcome     0
deposit      0
dtype: int64

In [7]:
# feature and target
# train test split

X = df.iloc[:,:-1]
y = df.iloc[:,-1]

X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3,random_state=1)

## Adaptive Boosting

In [8]:
model = AdaBoostClassifier(n_estimators=100, random_state=1)
model.fit(X_train,y_train)

AdaBoostClassifier(n_estimators=100, random_state=1)

In [9]:
# prediction
y_pred = model.predict(X_test)

print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       0.83      0.86      0.84      1760
           1       0.83      0.80      0.82      1589

    accuracy                           0.83      3349
   macro avg       0.83      0.83      0.83      3349
weighted avg       0.83      0.83      0.83      3349



## Gradient Boosting

In [10]:
from sklearn.ensemble import GradientBoostingClassifier

In [11]:
model2 = GradientBoostingClassifier(random_state=1, n_estimators=150, learning_rate=0.2)
model2.fit(X_train,y_train)

GradientBoostingClassifier(learning_rate=0.2, n_estimators=150, random_state=1)

In [12]:
y_pred = model2.predict(X_test)

print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       0.88      0.83      0.85      1760
           1       0.82      0.87      0.85      1589

    accuracy                           0.85      3349
   macro avg       0.85      0.85      0.85      3349
weighted avg       0.85      0.85      0.85      3349



## XGBoost

In [13]:
# pip install xgboost

In [14]:
import warnings
warnings.filterwarnings("ignore")

from xgboost import XGBClassifier

xgb = XGBClassifier(n_estimators=200,random_state=1)

xgb.fit(X_train,y_train)




XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
              gamma=0, gpu_id=-1, importance_type=None,
              interaction_constraints='', learning_rate=0.300000012,
              max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
              monotone_constraints='()', n_estimators=200, n_jobs=4,
              num_parallel_tree=1, predictor='auto', random_state=1,
              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
              tree_method='exact', validate_parameters=1, verbosity=None)

In [15]:
y_pred = xgb.predict(X_test)

print(classification_report(y_test,y_pred))



              precision    recall  f1-score   support

           0       0.87      0.82      0.85      1760
           1       0.82      0.86      0.84      1589

    accuracy                           0.84      3349
   macro avg       0.84      0.84      0.84      3349
weighted avg       0.84      0.84      0.84      3349



# Hyper parameter tuning using GridSearchCV

In [28]:
param_grid = {"n_estimators":[75,100,125,150],
              "learning_rate":[0.1,0.2,0.3],
              "min_samples_leaf":[10,20],
              "max_depth":[2,3,4]}

In [29]:
# model
gbc = GradientBoostingClassifier(random_state=1)

In [30]:
# grid search
from sklearn.model_selection import GridSearchCV

gs = GridSearchCV(gbc, param_grid, cv=2)

In [31]:
gs.fit(X_train,y_train)

GridSearchCV(cv=2, estimator=GradientBoostingClassifier(random_state=1),
             param_grid={'learning_rate': [0.1, 0.2, 0.3],
                         'max_depth': [2, 3, 4], 'min_samples_leaf': [10, 20],
                         'n_estimators': [75, 100, 125, 150]})

In [32]:
gs.best_params_

{'learning_rate': 0.2,
 'max_depth': 3,
 'min_samples_leaf': 10,
 'n_estimators': 150}

In [33]:
final_model = GradientBoostingClassifier(random_state=1,learning_rate=0.2,min_samples_leaf=10,n_estimators=150)
final_model.fit(X_train,y_train)

GradientBoostingClassifier(learning_rate=0.2, min_samples_leaf=10,
                           n_estimators=150, random_state=1)

In [34]:
y_pred = final_model.predict(X_test)

In [35]:
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       0.88      0.83      0.85      1760
           1       0.82      0.87      0.85      1589

    accuracy                           0.85      3349
   macro avg       0.85      0.85      0.85      3349
weighted avg       0.85      0.85      0.85      3349

