Copyright @ 2020 **ABCOM Information Systems Pvt. Ltd.** All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

See the License for the specific language governing permissions and limitations under the License.

# Importing Python Packages


In [None]:
# import statements
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn import preprocessing
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split 

# Load data

In [None]:
# loading the data from csv file
df = pd.read_csv("bank-full.csv", sep = ";", header = 0)

In [None]:
df.head()

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,y
0,58,management,married,tertiary,no,2143,yes,no,unknown,5,may,261,1,-1,0,unknown,no
1,44,technician,single,secondary,no,29,yes,no,unknown,5,may,151,1,-1,0,unknown,no
2,33,entrepreneur,married,secondary,no,2,yes,yes,unknown,5,may,76,1,-1,0,unknown,no
3,47,blue-collar,married,unknown,no,1506,yes,no,unknown,5,may,92,1,-1,0,unknown,no
4,33,unknown,single,unknown,no,1,no,no,unknown,5,may,198,1,-1,0,unknown,no


In [None]:
df = df.dropna()

In [None]:
print(df.shape)

(45211, 17)


In [None]:
# Printing the list of columns
print(list(df.columns))

['age', 'job', 'marital', 'education', 'default', 'balance', 'housing', 'loan', 'contact', 'day', 'month', 'duration', 'campaign', 'pdays', 'previous', 'poutcome', 'y']


In [None]:
df.columns[9]

'day'

In [None]:
#drop columns which are not needed.
df.drop(df.columns[[0, 3, 5, 8, 9, 10, 11, 12, 13, 14]], axis=1, inplace=True)

In [None]:
df.head()

Unnamed: 0,job,marital,default,housing,loan,poutcome,y
0,management,married,no,yes,no,unknown,no
1,technician,single,no,yes,no,unknown,no
2,entrepreneur,married,no,yes,yes,unknown,no
3,blue-collar,married,no,yes,no,unknown,no
4,unknown,single,no,no,no,unknown,no


# Preparing Data

In [None]:
# creating one hot encoding of the categorical columns.
data = pd.get_dummies(df, columns =['job', 'marital', 'default', 'housing', 'loan', 'poutcome'])

In [None]:
data.head()

Unnamed: 0,y,job_admin.,job_blue-collar,job_entrepreneur,job_housemaid,job_management,job_retired,job_self-employed,job_services,job_student,job_technician,job_unemployed,job_unknown,marital_divorced,marital_married,marital_single,default_no,default_yes,housing_no,housing_yes,loan_no,loan_yes,poutcome_failure,poutcome_other,poutcome_success,poutcome_unknown
0,no,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,1,1,0,0,0,0,1
1,no,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,1,1,0,0,0,0,1
2,no,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,1
3,no,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,1,1,0,0,0,0,1
4,no,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0,1,0,1,0,0,0,0,1


In [None]:
data.columns

Index(['y', 'job_admin.', 'job_blue-collar', 'job_entrepreneur',
       'job_housemaid', 'job_management', 'job_retired', 'job_self-employed',
       'job_services', 'job_student', 'job_technician', 'job_unemployed',
       'job_unknown', 'marital_divorced', 'marital_married', 'marital_single',
       'default_no', 'default_yes', 'housing_no', 'housing_yes', 'loan_no',
       'loan_yes', 'poutcome_failure', 'poutcome_other', 'poutcome_success',
       'poutcome_unknown'],
      dtype='object')

# Understanding Data Mapping


In [None]:
data

Unnamed: 0,y,job_admin.,job_blue-collar,job_entrepreneur,job_housemaid,job_management,job_retired,job_self-employed,job_services,job_student,job_technician,job_unemployed,job_unknown,marital_divorced,marital_married,marital_single,default_no,default_yes,housing_no,housing_yes,loan_no,loan_yes,poutcome_failure,poutcome_other,poutcome_success,poutcome_unknown
0,no,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,1,1,0,0,0,0,1
1,no,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,1,1,0,0,0,0,1
2,no,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,1
3,no,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,1,1,0,0,0,0,1
4,no,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0,1,0,1,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
45206,yes,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,1,0,1,0,1,0,0,0,0,1
45207,yes,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,1,0,1,0,1,0,0,0,0,1
45208,yes,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,1,0,1,0,1,0,0,0,1,0
45209,no,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,0,0,0,0,1


In [None]:
data.columns[12]

'job_unknown'

In [None]:
data.drop(data.columns[[12, 25]], axis=1, inplace=True)

In [None]:
#Viewing the final columns which we'll use for prediction
data.columns

Index(['y', 'job_admin.', 'job_blue-collar', 'job_entrepreneur',
       'job_housemaid', 'job_management', 'job_retired', 'job_self-employed',
       'job_services', 'job_student', 'job_technician', 'job_unemployed',
       'marital_divorced', 'marital_married', 'marital_single', 'default_no',
       'default_yes', 'housing_no', 'housing_yes', 'loan_no', 'loan_yes',
       'poutcome_failure', 'poutcome_other', 'poutcome_success'],
      dtype='object')

# Splitting Data


In [None]:
# Separating X from data
X = data.iloc[:,1:]

In [None]:
X.head()

Unnamed: 0,job_admin.,job_blue-collar,job_entrepreneur,job_housemaid,job_management,job_retired,job_self-employed,job_services,job_student,job_technician,job_unemployed,marital_divorced,marital_married,marital_single,default_no,default_yes,housing_no,housing_yes,loan_no,loan_yes,poutcome_failure,poutcome_other,poutcome_success
0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,1,0,0,1,1,0,0,0,0
1,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0,1,1,0,0,0,0
2,0,0,1,0,0,0,0,0,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0
3,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,1,1,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,1,0,1,0,0,0,0


In [None]:
Y = data.loc[:,"y"]

In [None]:
# Seprating Y from data
Y.head()

0    no
1    no
2    no
3    no
4    no
Name: y, dtype: object

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, random_state=0)

# Building Classifier


In [None]:
# Loading the classifier from the sklearn
classifier = LogisticRegression(solver='lbfgs',random_state=0)

In [None]:
# Fitting the classifier onto the training data
classifier.fit(X_train, Y_train)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=0, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

# Predicting Test Data


In [None]:
#Using the classifier to predict the test data and storing it in predicted_y aaray
predicted_y = classifier.predict(X_test)

In [None]:
predicted_y

array(['no', 'no', 'no', ..., 'no', 'no', 'no'], dtype=object)

In [None]:
for x in range(len(predicted_y)): 
   if (predicted_y[x] == "yes"):
       print(x, end="\t")

26	84	115	160	210	259	302	304	318	339	364	371	381	393	447	544	594	631	673	709	825	837	862	868	888	941	988	1060	1074	1179	1223	1278	1311	1377	1379	1405	1414	1441	1494	1540	1567	1578	1592	1599	1614	1671	1678	1689	1770	1772	1783	1784	1863	1872	1889	1908	1928	1935	1939	1956	1957	1970	1990	1994	2017	2030	2109	2115	2122	2123	2148	2245	2280	2337	2428	2431	2433	2492	2493	2513	2520	2531	2582	2620	2692	2720	2742	2781	2784	2796	2851	2895	2897	2964	2994	3000	3065	3076	3104	3116	3123	3144	3159	3169	3214	3228	3270	3281	3354	3369	3392	3451	3488	3537	3539	3614	3681	3690	3711	3752	3761	3863	3917	3930	3934	3941	3945	3958	3974	3995	4057	4092	4111	4178	4208	4219	4231	4232	4270	4285	4290	4352	4355	4369	4380	4430	4459	4478	4491	4516	4538	4552	4566	4567	4607	4610	4628	4646	4732	4748	4760	4892	4946	5010	5013	5029	5037	5108	5129	5169	5250	5266	5287	5324	5380	5382	5403	5416	5495	5519	5549	5573	5604	5686	5713	5733	5776	5791	5800	5808	5811	5843	5844	6049	6099	6100	6101	6128	6137	6145	6212	6241	6295	6380	6410	6412

In [None]:
# Comparing the predicted data with actual data
print('Accuracy: {:.2f}'.format(classifier.score(X_test, Y_test)))

Accuracy: 0.89
