In [1]:
#importing necessary libriaries
import pandas as pd
import numpy as np 
import sklearn
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split # To split the data
from sklearn.metrics import classification_report
from sklearn.metrics import roc_auc_score
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.ensemble import RandomForestClassifier, BaggingClassifier
np.random.seed(53)

In [2]:
#importing data set
bank_data= pd.read_csv("bank-additional-full.csv", sep=";")

In [3]:
# making some changes in data set
change= {"education":{"illiterate":0, "unknown":1, "basic.4y":2, "basic.6y":3, "basic.9y":4, "high.school":5,
                       "university.degree":6,"professional.course":7}  } 
bank_data = bank_data.replace(change)

In [4]:
out=["no", "yes"]
c = bank_data.drop(["month","day_of_week","y","contact"], axis=1)
model_data = pd.get_dummies(c, columns=["job","marital","default","housing","loan","poutcome"],
                           drop_first= True)

out_put = bank_data["y"]

In [5]:
features= list(model_data.columns)

In [6]:
(x_train, x_test, y_train, y_test) = train_test_split(model_data, out_put, train_size=0.75)

In [7]:
model = DecisionTreeClassifier(random_state=44, max_depth=5).fit(x_train,y_train)

In [8]:
score = model.score(x_test,y_test)
prediction = model.predict(x_test) 
score

0.914441099349325

In [9]:
from matplotlib import pyplot as plt 
from sklearn import tree
import graphviz

In [10]:
dot_data = tree.export_graphviz(model, out_file=None, 
                                feature_names=features,  
                                class_names=out,
                                filled=True)

# Draw graph
graph = graphviz.Source(dot_data, format="jpg") 

In [11]:
# Using Random forest in dataset
from sklearn.ensemble import RandomForestClassifier, BaggingClassifier
rfc = RandomForestClassifier(n_estimators=100)
rfc.fit(x_train, y_train)
y_pred_r= rfc.predict(x_test)
print(classification_report(y_test,y_pred_r))
y_pred1= (y_pred_r=="yes")
y_test1=(y_test=="yes")
print("ROC score: ",roc_auc_score(y_test1, y_pred1, average="weighted"))

              precision    recall  f1-score   support

          no       0.94      0.97      0.95      9161
         yes       0.65      0.48      0.55      1136

    accuracy                           0.91     10297
   macro avg       0.79      0.72      0.75     10297
weighted avg       0.91      0.91      0.91     10297

ROC score:  0.7231720197838049


In [12]:
from memory_profiler import memory_usage
import time

def space():
    rfc = RandomForestClassifier(n_estimators=100)
    rfc.fit(x_train, y_train)
print("Space taken to train: ",memory_usage(space)[0], "bytes")

start = time.time()
rfc = RandomForestClassifier(n_estimators=100)
rfc.fit(x_train, y_train)   # Fit the model
end =  time.time() - start


print("Time taken to train: ",end)

Space taken to train:  226.25390625 bytes
Time taken to train:  2.802419662475586


In [13]:
tree = DecisionTreeClassifier()
bagging = BaggingClassifier(tree, n_estimators=500, max_samples=0.7, max_features=0.7, n_jobs=-1)
bagging.fit(x_train, y_train)
print("Bagging Accuracy:", bagging.score(x_test, y_test))

Bagging Accuracy: 0.9123045547246771


In [14]:
# Report
y_pred_b= bagging.predict(x_test)
y_pred_b
print(classification_report(y_test,y_pred_b))
y_pred2= (y_pred_b=="yes")
y_test2=(y_test=="yes")
print("ROC score: ",roc_auc_score(y_test2, y_pred2, average="weighted"))

              precision    recall  f1-score   support

          no       0.93      0.98      0.95      9161
         yes       0.69      0.38      0.49      1136

    accuracy                           0.91     10297
   macro avg       0.81      0.68      0.72     10297
weighted avg       0.90      0.91      0.90     10297

ROC score:  0.6777373387799782


In [15]:
def space():
    tree = DecisionTreeClassifier()
    bagging = BaggingClassifier(tree, n_estimators=500, max_samples=0.7, max_features=0.7, n_jobs=-1)
    bagging.fit(x_train, y_train)
print("Space taken to train: ",memory_usage(space)[0], "bytes")

start = time.time()
tree = DecisionTreeClassifier()
bagging = BaggingClassifier(tree, n_estimators=500, max_samples=0.7, max_features=0.7, n_jobs=-1)
bagging.fit(x_train, y_train)  # Fit the model
end =  time.time() - start

print("Time taken to train: ",end)

Space taken to train:  352.55078125 bytes
Time taken to train:  11.244510889053345


In [16]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import accuracy_score
# Create an AdaBoost classifier with decision stumps as base estimator
clf = AdaBoostClassifier(base_estimator=DecisionTreeClassifier(max_depth=1), n_estimators=50, random_state=42)

# Fit the classifier to the training data
clf.fit(x_train, y_train)

# Make predictions on the testing data
y_pred_adb = clf.predict(x_test)

In [17]:
# Calculate the accuracy of the classifier
accuracy = accuracy_score(y_test, y_pred_adb)
print('Accuracy:', accuracy)
print(classification_report(y_test,y_pred_adb))
y_pred_adb= (y_pred_adb=="yes")
y_test3 =(y_test=="yes")
print("ROC score: ",roc_auc_score(y_test3, y_pred_adb, average="weighted"))

Accuracy: 0.9094882004467321
              precision    recall  f1-score   support

          no       0.93      0.98      0.95      9161
         yes       0.66      0.38      0.48      1136

    accuracy                           0.91     10297
   macro avg       0.79      0.68      0.71     10297
weighted avg       0.90      0.91      0.90     10297

ROC score:  0.6753834188407379


In [18]:
def space():
    clf = AdaBoostClassifier(base_estimator=DecisionTreeClassifier(max_depth=1), n_estimators=50, random_state=42)
    clf.fit(x_train, y_train)
print("Space taken to train: ",memory_usage(space)[0], "bytes")

start = time.time()
clf = AdaBoostClassifier(base_estimator=DecisionTreeClassifier(max_depth=1), n_estimators=50, random_state=42)

# Fit the classifier to the training data
clf.fit(x_train, y_train)# Fit the model
end =  time.time() - start


print("Time taken to train: ",end)

Space taken to train:  478.52734375 bytes
Time taken to train:  2.7455310821533203
