# Bank - Marketing Problem

## Importing the package and data

In [None]:
import pandas as pd
import numpy as np


In [None]:
bank = pd.read_excel('Bank Marketing.xlsx')
bank.head()

Unnamed: 0,Age,Job,Marital Status,Account Balance,Own House,Personal Loan,No of campaigns,Subscription
0,59,unemployed,married,0,0,0,0,0
1,36,Others,married,3057,0,0,0,1
2,47,blue-collar,divorced,126,1,0,0,1
3,43,management,divorced,388,1,0,0,1
4,34,self-employed,single,462,0,0,0,1


## EDA

In [None]:
bank.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4521 entries, 0 to 4520
Data columns (total 8 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   Age              4521 non-null   int64 
 1   Job              4521 non-null   object
 2   Marital Status   4521 non-null   object
 3   Account Balance  4521 non-null   int64 
 4   Own House        4521 non-null   int64 
 5   Personal Loan    4521 non-null   int64 
 6   No of campaigns  4521 non-null   int64 
 7   Subscription     4521 non-null   int64 
dtypes: int64(6), object(2)
memory usage: 282.7+ KB


In [None]:
#Check for missing values
bank.isnull().sum()

Age                0
Job                0
Marital Status     0
Account Balance    0
Own House          0
Personal Loan      0
No of campaigns    0
Subscription       0
dtype: int64

In [None]:
#Since there is no missing value in the data, we can look at the descriptive statistics
bank.describe()

Unnamed: 0,Age,Account Balance,Own House,Personal Loan,No of campaigns,Subscription
count,4521.0,4521.0,4521.0,4521.0,4521.0,4521.0
mean,41.170095,1422.657819,0.566025,0.152842,0.542579,0.11524
std,10.576211,3009.638142,0.495676,0.359875,1.693562,0.319347
min,19.0,-3313.0,0.0,0.0,0.0,0.0
25%,33.0,69.0,0.0,0.0,0.0,0.0
50%,39.0,444.0,1.0,0.0,0.0,0.0
75%,49.0,1480.0,1.0,0.0,0.0,0.0
max,87.0,71188.0,1.0,1.0,25.0,1.0


## Create dummy variables

In [None]:
bank.shape

(4521, 8)

In [None]:
cat_col = ['Job','Marital Status']
bank_dummy = pd.get_dummies(bank,columns=cat_col,drop_first=True)
bank_dummy.shape
bank_dummy

Unnamed: 0,Age,Account Balance,Own House,Personal Loan,No of campaigns,Subscription,Job_blue-collar,Job_management,Job_self-employed,Job_unemployed,Marital Status_married,Marital Status_single
0,59,0,0,0,0,0,0,0,0,1,1,0
1,36,3057,0,0,0,1,0,0,0,0,1,0
2,47,126,1,0,0,1,1,0,0,0,0,0
3,43,388,1,0,0,1,0,1,0,0,0,0
4,34,462,0,0,0,1,0,0,1,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...
4516,58,0,1,0,0,0,1,0,0,0,1,0
4517,38,0,1,0,0,0,0,0,0,0,1,0
4518,53,2627,1,1,0,0,0,0,1,0,1,0
4519,49,371,1,0,0,0,1,0,0,0,1,0


In [None]:
#Identifying target variable
Y = bank_dummy[['Subscription']]
X = bank_dummy.drop(columns=['Subscription'])

In [None]:
X

Unnamed: 0,Age,Account Balance,Own House,Personal Loan,No of campaigns,Job_blue-collar,Job_management,Job_self-employed,Job_unemployed,Marital Status_married,Marital Status_single
0,59,0,0,0,0,0,0,0,1,1,0
1,36,3057,0,0,0,0,0,0,0,1,0
2,47,126,1,0,0,1,0,0,0,0,0
3,43,388,1,0,0,0,1,0,0,0,0
4,34,462,0,0,0,0,0,1,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...
4516,58,0,1,0,0,1,0,0,0,1,0
4517,38,0,1,0,0,0,0,0,0,1,0
4518,53,2627,1,1,0,0,0,1,0,1,0
4519,49,371,1,0,0,1,0,0,0,1,0


In [None]:
Y

Unnamed: 0,Subscription
0,0
1,1
2,1
3,1
4,1
...,...
4516,0
4517,0
4518,0
4519,0


## Splitting into train and test

In [None]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(X,Y,test_size=0.25,random_state=42)

In [None]:
len(x_train),len(x_test),len(y_train),len(y_test)

(3390, 1131, 3390, 1131)

In [None]:
x_train

Unnamed: 0,Age,Account Balance,Own House,Personal Loan,No of campaigns,Job_blue-collar,Job_management,Job_self-employed,Job_unemployed,Marital Status_married,Marital Status_single
1412,55,0,0,0,0,0,0,0,0,0,0
2117,48,2537,0,0,0,0,1,0,0,1,0
3443,42,2613,1,0,0,0,0,0,1,0,1
1488,49,322,0,0,0,1,0,0,0,1,0
2620,26,-402,0,1,0,0,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...
4426,52,1623,1,0,0,0,0,1,0,0,1
466,70,4531,0,0,0,0,0,0,0,0,0
3092,48,3186,0,1,0,0,0,0,0,0,0
3772,46,271,1,0,0,0,0,0,0,1,0


In [None]:
x_test

Unnamed: 0,Age,Account Balance,Own House,Personal Loan,No of campaigns,Job_blue-collar,Job_management,Job_self-employed,Job_unemployed,Marital Status_married,Marital Status_single
2398,43,50,0,0,0,0,0,0,0,1,0
800,55,383,0,0,0,0,0,0,1,1,0
2288,39,585,1,0,3,0,0,1,0,1,0
2344,26,0,1,0,0,1,0,0,0,1,0
3615,38,0,1,0,0,0,0,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...
3188,32,76,1,0,0,0,0,0,0,1,0
3218,44,-101,1,0,0,0,0,0,0,0,1
2767,25,0,1,0,0,0,0,0,0,1,0
498,47,286,0,0,0,0,1,0,0,1,0


In [None]:
y_train

Unnamed: 0,Subscription
1412,0
2117,0
3443,0
1488,0
2620,0
...,...
4426,0
466,0
3092,0
3772,0


In [None]:
y_test

Unnamed: 0,Subscription
2398,0
800,0
2288,0
2344,0
3615,0
...,...
3188,0
3218,0
2767,0
498,0


## Building the model

In [None]:
from sklearn.naive_bayes import GaussianNB

In [None]:
naive = GaussianNB()


In [None]:
naive_model = naive.fit(x_train,y_train)
naive_model
print('The model has ran successfully!')

The model has ran successfully!


## Predicting on test data

In [None]:
y_test

Unnamed: 0,Subscription
2398,0
800,0
2288,0
2344,0
3615,0
...,...
3188,0
3218,0
2767,0
498,0


In [None]:
y_test['Predicted'] = naive_model.predict(x_test)

In [None]:
y_test

Unnamed: 0,Subscription,Predicted
2398,0,0
800,0,0
2288,0,0
2344,0,0
3615,0,0
...,...,...
3188,0,0
3218,0,0
2767,0,0
498,0,0


## Evaluating the model

In [None]:
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

In [None]:
print(confusion_matrix(y_test['Subscription'],y_test['Predicted']))

[[962  41]
 [108  20]]


In [None]:
accuracy = accuracy_score(y_test['Subscription'],y_test['Predicted'])
accuracy = np.round(accuracy,2)

In [None]:
print('The accuracy of the model is: ',accuracy)

The accuracy of the model is:  0.87


In [None]:
print(classification_report(y_test['Subscription'],y_test['Predicted']))

              precision    recall  f1-score   support

           0       0.90      0.96      0.93      1003
           1       0.33      0.16      0.21       128

    accuracy                           0.87      1131
   macro avg       0.61      0.56      0.57      1131
weighted avg       0.83      0.87      0.85      1131



## Predicting on new data

In [None]:
d1=X.iloc[15]
d1

Age                        37
Account Balance           228
Own House                   1
Personal Loan               0
No of campaigns             0
Job_blue-collar             0
Job_management              0
Job_self-employed           0
Job_unemployed              0
Marital Status_married      0
Marital Status_single       1
Name: 15, dtype: int64

In [None]:
val_data = pd.DataFrame(d1)
val_data.transpose()

Unnamed: 0,Age,Account Balance,Own House,Personal Loan,No of campaigns,Job_blue-collar,Job_management,Job_self-employed,Job_unemployed,Marital Status_married,Marital Status_single
15,37,228,1,0,0,0,0,0,0,0,1


In [None]:
val_data1 = naive_model.predict(val_data.transpose())
val_data1

array([0])