# Setting up the project

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn import preprocessing
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

In [2]:
#loading data from csv file
df = pd.read_csv('bank.csv', header=0)


In [3]:
df.head()

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,y
0,30,unemployed,married,primary,no,1787,no,no,cellular,19,oct,79,1,-1,0,unknown,no
1,33,services,married,secondary,no,4789,yes,yes,cellular,11,may,220,1,339,4,failure,no
2,35,management,single,tertiary,no,1350,yes,no,cellular,16,apr,185,1,330,1,failure,no
3,30,management,married,tertiary,no,1476,yes,yes,unknown,3,jun,199,4,-1,0,unknown,no
4,59,blue-collar,married,secondary,no,0,yes,no,unknown,5,may,226,1,-1,0,unknown,no


In [4]:
#dopping Nan
df=df.dropna()

In [5]:
print (df.shape)

(4521, 17)


In [6]:
print(list(df.columns))

['age', 'job', 'marital', 'education', 'default', 'balance', 'housing', 'loan', 'contact', 'day', 'month', 'duration', 'campaign', 'pdays', 'previous', 'poutcome', 'y']


In [7]:
#drop unwanted columns
df.drop(df.columns[[0,3,5,8,9,10,11,12,13,14]], 
   axis = 1, inplace = True)

In [8]:
df.columns[6]

'y'

In [9]:
df.head()

Unnamed: 0,job,marital,default,housing,loan,poutcome,y
0,unemployed,married,no,no,no,unknown,no
1,services,married,no,yes,yes,failure,no
2,management,single,no,yes,no,failure,no
3,management,married,no,yes,yes,unknown,no
4,blue-collar,married,no,yes,no,unknown,no


# Encoding data

In [10]:
# creating one hot encoding of the categorical columns.
data = pd.get_dummies(df, columns =['job', 'marital', 'default', 'housing', 'loan', 'poutcome'])

In [11]:
data.head()

Unnamed: 0,y,job_admin.,job_blue-collar,job_entrepreneur,job_housemaid,job_management,job_retired,job_self-employed,job_services,job_student,...,default_no,default_yes,housing_no,housing_yes,loan_no,loan_yes,poutcome_failure,poutcome_other,poutcome_success,poutcome_unknown
0,no,0,0,0,0,0,0,0,0,0,...,1,0,1,0,1,0,0,0,0,1
1,no,0,0,0,0,0,0,0,1,0,...,1,0,0,1,0,1,1,0,0,0
2,no,0,0,0,0,1,0,0,0,0,...,1,0,0,1,1,0,1,0,0,0
3,no,0,0,0,0,1,0,0,0,0,...,1,0,0,1,0,1,0,0,0,1
4,no,0,1,0,0,0,0,0,0,0,...,1,0,0,1,1,0,0,0,0,1


In [12]:
data.columns

Index(['y', 'job_admin.', 'job_blue-collar', 'job_entrepreneur',
       'job_housemaid', 'job_management', 'job_retired', 'job_self-employed',
       'job_services', 'job_student', 'job_technician', 'job_unemployed',
       'job_unknown', 'marital_divorced', 'marital_married', 'marital_single',
       'default_no', 'default_yes', 'housing_no', 'housing_yes', 'loan_no',
       'loan_yes', 'poutcome_failure', 'poutcome_other', 'poutcome_success',
       'poutcome_unknown'],
      dtype='object')

## Understanding Data Mapping

In [13]:
data

Unnamed: 0,y,job_admin.,job_blue-collar,job_entrepreneur,job_housemaid,job_management,job_retired,job_self-employed,job_services,job_student,...,default_no,default_yes,housing_no,housing_yes,loan_no,loan_yes,poutcome_failure,poutcome_other,poutcome_success,poutcome_unknown
0,no,0,0,0,0,0,0,0,0,0,...,1,0,1,0,1,0,0,0,0,1
1,no,0,0,0,0,0,0,0,1,0,...,1,0,0,1,0,1,1,0,0,0
2,no,0,0,0,0,1,0,0,0,0,...,1,0,0,1,1,0,1,0,0,0
3,no,0,0,0,0,1,0,0,0,0,...,1,0,0,1,0,1,0,0,0,1
4,no,0,1,0,0,0,0,0,0,0,...,1,0,0,1,1,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4516,no,0,0,0,0,0,0,0,1,0,...,1,0,0,1,1,0,0,0,0,1
4517,no,0,0,0,0,0,0,1,0,0,...,0,1,0,1,0,1,0,0,0,1
4518,no,0,0,0,0,0,0,0,0,0,...,1,0,1,0,1,0,0,0,0,1
4519,no,0,1,0,0,0,0,0,0,0,...,1,0,1,0,1,0,0,1,0,0


# Dropping the “unknown”

In [14]:
data.columns[12]


'job_unknown'

In [15]:
data.drop(data.columns[[12, 16, 18, 21, 24]], axis=1, inplace=True)

In [16]:
data.columns

Index(['y', 'job_admin.', 'job_blue-collar', 'job_entrepreneur',
       'job_housemaid', 'job_management', 'job_retired', 'job_self-employed',
       'job_services', 'job_student', 'job_technician', 'job_unemployed',
       'marital_divorced', 'marital_married', 'marital_single', 'default_yes',
       'housing_yes', 'loan_no', 'poutcome_failure', 'poutcome_other',
       'poutcome_unknown'],
      dtype='object')

# #Splitting Data


In [17]:
 #create feature array
X = data.iloc[:,1:]

In [18]:
X.head()

Unnamed: 0,job_admin.,job_blue-collar,job_entrepreneur,job_housemaid,job_management,job_retired,job_self-employed,job_services,job_student,job_technician,job_unemployed,marital_divorced,marital_married,marital_single,default_yes,housing_yes,loan_no,poutcome_failure,poutcome_other,poutcome_unknown
0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,1
1,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,1,0,1,0,0
2,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,1,1,0,0
3,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1
4,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0,0,1


In [19]:
#creating output array
Y = data.iloc[:,0]

In [20]:
Y.head()

0    no
1    no
2    no
3    no
4    no
Name: y, dtype: object

In [21]:
#This will create the four arrays called X_train, Y_train, X_test, and Y_test.
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, random_state=0)

# Building Classifier

# The sklearn Classifier

In [22]:
classifier = LogisticRegression(solver='lbfgs',random_state=0)

In [23]:
classifier.fit(X_train, Y_train)

LogisticRegression(random_state=0)

In [24]:
LogisticRegression(C = 1.0, class_weight = None, dual = False, 
   fit_intercept=True, intercept_scaling=1, max_iter=100, 
   multi_class='warn', n_jobs=None, penalty='l2', random_state=0, 
   solver='lbfgs', tol=0.0001, verbose=0, warm_start=False)

LogisticRegression(multi_class='warn', random_state=0)

# Testing

# Predicting Test Data

In [25]:
predicted_y = classifier.predict(X_test)

In [26]:
predicted_y

array(['no', 'no', 'no', ..., 'no', 'no', 'no'], dtype=object)

In [30]:
for x in range(len(predicted_y)):
    if(predicted_y[x] == 1):
        print(x,end="\t")

In [33]:
print(Y)

0       no
1       no
2       no
3       no
4       no
        ..
4516    no
4517    no
4518    no
4519    no
4520    no
Name: y, Length: 4521, dtype: object


# Verifying Accuracy

In [29]:
print('Accuracy: {:.2f}'.format(classifier.score(X_test, Y_test)))

Accuracy: 0.89


It shows that the accuracy of our model is 90% which is considered very 
good in most of the applications. Thus, no further tuning is required. 
Now, our customer is ready to run the next campaign, get the list of potential 
customers and chase them for opening the TD with a probable high rate of success.