## Importing Libraries

In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
import pickle
from sklearn.metrics import accuracy_score

## Preprocessing

In [2]:
df = pd.read_csv('bank_survey.csv')

In [3]:
df.head()

Unnamed: 0,RowNumber,UID,Customer_name,CreditBonus,City,Gender,Age,Tenure,Balance,AvailedProducts,CreditCardPresent,ActiveOnline,Current Salary,Status
0,1,15634602,Oster,619,Pilani,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Chizoba,608,Hyderabad,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Bianchi,502,Pilani,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Peyser,699,Pilani,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Cocci,850,Hyderabad,Female,43,2,125510.82,1,1,1,79084.1,0


In [4]:
df = df.sample(frac=1).reset_index(drop=True)

In [5]:
df.head()

Unnamed: 0,RowNumber,UID,Customer_name,CreditBonus,City,Gender,Age,Tenure,Balance,AvailedProducts,CreditCardPresent,ActiveOnline,Current Salary,Status
0,6847,15809309,Longo,689,Hyderabad,Female,40,5,154251.67,1,0,1,118319.5,0
1,1861,15778190,Onyekaozulu,639,Hyderabad,Female,28,8,97840.72,1,1,1,178222.77,0
2,9758,15696047,Chimezie,501,Pilani,Male,35,6,99760.84,1,1,1,13591.52,0
3,8084,15684011,Miller,576,Goa,Male,29,7,130575.26,1,0,1,173629.78,0
4,3020,15741049,Colebatch,577,Pilani,Male,29,7,0.0,2,1,1,55473.15,0


In [6]:
df = pd.get_dummies(df, columns = ['Gender', 'City'], drop_first = True)

In [7]:
df.head()

Unnamed: 0,RowNumber,UID,Customer_name,CreditBonus,Age,Tenure,Balance,AvailedProducts,CreditCardPresent,ActiveOnline,Current Salary,Status,Gender_Male,City_Hyderabad,City_Pilani
0,6847,15809309,Longo,689,40,5,154251.67,1,0,1,118319.5,0,0,1,0
1,1861,15778190,Onyekaozulu,639,28,8,97840.72,1,1,1,178222.77,0,0,1,0
2,9758,15696047,Chimezie,501,35,6,99760.84,1,1,1,13591.52,0,1,0,1
3,8084,15684011,Miller,576,29,7,130575.26,1,0,1,173629.78,0,1,0,0
4,3020,15741049,Colebatch,577,29,7,0.0,2,1,1,55473.15,0,1,0,1


In [8]:
df['Tenure'] /= 10

In [9]:
df.head()

Unnamed: 0,RowNumber,UID,Customer_name,CreditBonus,Age,Tenure,Balance,AvailedProducts,CreditCardPresent,ActiveOnline,Current Salary,Status,Gender_Male,City_Hyderabad,City_Pilani
0,6847,15809309,Longo,689,40,0.5,154251.67,1,0,1,118319.5,0,0,1,0
1,1861,15778190,Onyekaozulu,639,28,0.8,97840.72,1,1,1,178222.77,0,0,1,0
2,9758,15696047,Chimezie,501,35,0.6,99760.84,1,1,1,13591.52,0,1,0,1
3,8084,15684011,Miller,576,29,0.7,130575.26,1,0,1,173629.78,0,1,0,0
4,3020,15741049,Colebatch,577,29,0.7,0.0,2,1,1,55473.15,0,1,0,1


## Normalisation of Data

We tested both the zscore normalisation as well as the minmax normalisation
#### Note : Use either one of the methods, not both

In [10]:
zscore = lambda x : (x - x.mean()) / x.std()
minmax = lambda x : (x - x.min()) / (x.max() - x.min())

In [11]:
df['Balance'] = minmax(df['Balance'])
df['Current Salary'] = minmax(df['Current Salary'])
df['CreditBonus'] = minmax(df['CreditBonus'])
df['AvailedProducts'] = minmax(df['AvailedProducts'])
df['Age'] = minmax(df['Age'])

In [12]:
df['Balance'] = zscore(df['Balance'])
df['Current Salary'] = zscore(df['Current Salary'])
df['CreditBonus'] = zscore(df['CreditBonus'])
df['AvailedProducts'] = zscore(df['AvailedProducts'])
df['Age'] = zscore(df['Age'])

In [13]:
df.head()

Unnamed: 0,RowNumber,UID,Customer_name,CreditBonus,Age,Tenure,Balance,AvailedProducts,CreditCardPresent,ActiveOnline,Current Salary,Status,Gender_Male,City_Hyderabad,City_Pilani
0,6847,15809309,Longo,0.398033,0.102805,0.5,1.246298,-0.911538,0,1,0.316973,0,0,1,0
1,1861,15778190,Onyekaozulu,-0.11928,-1.041381,0.8,0.342239,-0.911538,1,1,1.358579,0,0,1,0
2,9758,15696047,Chimezie,-1.547064,-0.373939,0.6,0.373012,-0.911538,1,1,-1.504051,0,1,0,1
3,8084,15684011,Miller,-0.771094,-0.946032,0.7,0.866853,-0.911538,0,1,1.278715,0,1,0,0
4,3020,15741049,Colebatch,-0.760748,-0.946032,0.7,-1.225786,0.807696,1,1,-0.775808,0,1,0,1


In [14]:
df.drop(labels = ['RowNumber', 'UID', 'Customer_name'], axis = 1, inplace = True)

In [15]:
labels = np.array([df['Status'] == 0, df['Status'] == 1]).transpose().astype(int)
labels

array([[1, 0],
       [1, 0],
       [1, 0],
       ...,
       [0, 1],
       [1, 0],
       [1, 0]])

In [16]:
df.drop(labels = ['Status'], axis = 1, inplace = True)

In [17]:
data = df.as_matrix()

## Neural Network using TensorFlow

In [18]:
def multilayer_perceptron(x, weights, bias):
    
    # First hidden layer with RELU Activation
    layer_1 = tf.add(tf.matmul(x, weights['h1']), bias['b1'])
    layer_1 = tf.nn.relu(layer_1)
    
    # Second hidden layer
    layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), bias['b2'])
    layer_2 = tf.nn.relu(layer_2)
    
    # Output Layer
    
    out_layer = tf.matmul(layer_2, weights['out']) + bias['out']
    
    return out_layer    

def beautyCM(cm, ind=['True pos', 'True neg'], cols=['Pred pos', 'Pred neg']):
    return pd.DataFrame(cm, index=ind, columns=cols)

def precision(cm):
     # prec = TP / (TP + FP)
    try:
        return round(float(cm.loc['True pos', 'Pred pos']) / \
                     (cm.loc['True pos', 'Pred pos'] + cm.loc['True neg', 'Pred pos']), 4)
    except ZeroDivisionError:
        return 1.0

def recall(cm):
    # prec = TP / (TP + FN)
    try:
        return round(float(cm.loc['True pos', 'Pred pos']) / \
                 (cm.loc['True pos', 'Pred pos'] + cm.loc['True pos', 'Pred neg']), 4)
    except ZeroDivisionError:
        return 1.0

def run(Xtrain, Ytrain, Xtest, Ytest):
    learning_rate = 0.001
    epoch = 100
    batch_size = 100
    n_classes = 2
    n_count = 80000
    n_input = 11
    n_hidden_1 = 32
    n_hidden_2 = 32
    
    weights = {
    'h1':tf.Variable(tf.random_normal([n_input, n_hidden_1])),
    'h2':tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])),
    'out':tf.Variable(tf.random_normal([n_hidden_2, n_classes]))
    }
    bias = {
        'b1':tf.Variable(tf.random_normal([n_hidden_1])),
        'b2':tf.Variable(tf.random_normal([n_hidden_2])),
        'out':tf.Variable(tf.random_normal([n_classes]))
    }
    
    x = tf.placeholder('float', [None, n_input])
    y = tf.placeholder('float', [None, n_classes])
    pred = multilayer_perceptron(x, weights, bias)
    
    cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = pred, labels = y))
    optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(cost)
    
    ss = tf.InteractiveSession()
    init = tf.initialize_all_variables()
    ss.run(init)
    
    for i in range(epoch):
        avg_cost = 0.0
        batch_count = n_count//batch_size
        
        for j in range(batch_count):
            batchX = Xtrain[j * batch_size : (j + 1) * batch_size]
            batchY = Ytrain[j * batch_size : (j + 1) * batch_size]
            
            ss.run(optimizer,feed_dict={x:batchX,y:batchY})
            
            
    correct_predictions = tf.cast(tf.equal(tf.argmax(pred,1),tf.argmax(y,1)), 'float')
    accuracy = tf.reduce_mean(correct_predictions)

    CM = tf.confusion_matrix(tf.argmax(pred, 1), tf.argmax(y, 1))
    CM = CM.eval(feed_dict = {x:Xtest,y:Ytest})
    
    accuracy = accuracy.eval({x:Xtest,y:Ytest})
    
    return (accuracy,CM)

In [19]:
def x_fold(bins):
    binsize = 10000//bins
    overall_accuracy = 0
    overall_precision = 0
    overall_recall = 0
    for i in range(bins):
        Xtrain = np.concatenate((data[ : i * binsize], data[(i + 1) * binsize : ]))
        Ytrain = np.concatenate((labels[ : i * binsize], labels[(i + 1) * binsize : ]))
        Xtest = data[i * binsize : (i + 1) * binsize]
        Ytest = labels[i * binsize : (i + 1) * binsize]
        accuracy,CM = run(Xtrain,Ytrain,Xtest,Ytest)
        accuracy *= 100
        overall_accuracy += accuracy
        print("Batch {} accuracy = {:.4f}".format(i + 1,accuracy))
        print("Confusion Matrix : ")
        CM = beautyCM(CM)
        print(CM)
        overall_precision += precision(CM)
        overall_recall += recall(CM)
        print("Batch {} precision = {:.4f}".format(i + 1,precision(CM)))
        print("Batch {} recall = {:.4f}".format(i + 1,recall(CM)))
        F1 = 2 * (precision(CM) * recall(CM)) / (precision(CM) + recall(CM))
        print("Batch {} Fscore = {:.4f}".format(i + 1,F1))
        print("")
    overall_accuracy /= bins
    overall_precision /= bins
    overall_recall /= bins
    F = 2 * (overall_precision * overall_recall) / (overall_precision + overall_recall)
    print("Overall accuracy = {:.4f}".format(overall_accuracy))
    print("Overall precision = {:.4f}".format(overall_precision))
    print("Overall recall = {:.4f}".format(overall_recall))    
    print("Overall Fscore = {:.4f}".format(F))

### Min-Max Normalisation

In [20]:
x_fold(5)

Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See tf.nn.softmax_cross_entropy_with_logits_v2.

Instructions for updating:
Use `tf.global_variables_initializer` instead.
Batch 1 accuracy = 84.1000
Confusion Matrix : 
          Pred pos  Pred neg
True pos      1478       196
True neg       122       204
Batch 1 precision = 0.9238
Batch 1 recall = 0.8829
Batch 1 Fscore = 0.9029

Batch 2 accuracy = 86.0500
Confusion Matrix : 
          Pred pos  Pred neg
True pos      1566       231
True neg        48       155
Batch 2 precision = 0.9703
Batch 2 recall = 0.8715
Batch 2 Fscore = 0.9183

Batch 3 accuracy = 82.8500
Confusion Matrix : 
          Pred pos  Pred neg
True pos      1538       306
True neg        37       119
Batch 3 precision = 0.9765
Batch 3 recall = 0.8341
Batch 3 Fscore = 0.8997

Batch 4 accuracy = 84.7500
Confusion Matrix : 
          Pred pos  Pred neg
True pos      1512       221
Tr

In [21]:
x_fold(10)

Batch 1 accuracy = 84.5000
Confusion Matrix : 
          Pred pos  Pred neg
True pos       785       139
True neg        16        60
Batch 1 precision = 0.9800
Batch 1 recall = 0.8496
Batch 1 Fscore = 0.9102

Batch 2 accuracy = 83.8000
Confusion Matrix : 
          Pred pos  Pred neg
True pos       785       148
True neg        14        53
Batch 2 precision = 0.9825
Batch 2 recall = 0.8414
Batch 2 Fscore = 0.9065

Batch 3 accuracy = 80.4000
Confusion Matrix : 
          Pred pos  Pred neg
True pos       675        64
True neg       132       129
Batch 3 precision = 0.8364
Batch 3 recall = 0.9134
Batch 3 Fscore = 0.8732

Batch 4 accuracy = 85.9000
Confusion Matrix : 
          Pred pos  Pred neg
True pos       799       133
True neg         8        60
Batch 4 precision = 0.9901
Batch 4 recall = 0.8573
Batch 4 Fscore = 0.9189

Batch 5 accuracy = 82.3000
Confusion Matrix : 
          Pred pos  Pred neg
True pos       761       154
True neg        23        62
Batch 5 precision = 0.9707

## Zscore normalisation

In [None]:
x_fold(5)

Batch 1 accuracy = 84.0500
Confusion Matrix : 
          Pred pos  Pred neg
True pos      1565       284
True neg        35       116
Batch 1 precision = 0.9781
Batch 1 recall = 0.8464
Batch 1 Fscore = 0.9075

Batch 2 accuracy = 85.2500
Confusion Matrix : 
          Pred pos  Pred neg
True pos      1531       212
True neg        83       174
Batch 2 precision = 0.9486
Batch 2 recall = 0.8784
Batch 2 Fscore = 0.9122

Batch 3 accuracy = 82.8500
Confusion Matrix : 
          Pred pos  Pred neg
True pos      1522       290
True neg        53       135
Batch 3 precision = 0.9663
Batch 3 recall = 0.8400
Batch 3 Fscore = 0.8987

Batch 4 accuracy = 83.5000
Confusion Matrix : 
          Pred pos  Pred neg
True pos      1455       189
True neg       141       215
Batch 4 precision = 0.9117
Batch 4 recall = 0.8850
Batch 4 Fscore = 0.8982

Batch 5 accuracy = 83.3500
Confusion Matrix : 
          Pred pos  Pred neg
True pos      1549       304
True neg        29       118
Batch 5 precision = 0.9816

In [None]:
x_fold(10)

Batch 1 accuracy = 84.1000
Confusion Matrix : 
          Pred pos  Pred neg
True pos       783       141
True neg        18        58
Batch 1 precision = 0.9775
Batch 1 recall = 0.8474
Batch 1 Fscore = 0.9078

Batch 2 accuracy = 83.7000
Confusion Matrix : 
          Pred pos  Pred neg
True pos       777       141
True neg        22        60
Batch 2 precision = 0.9725
Batch 2 recall = 0.8464
Batch 2 Fscore = 0.9051

Batch 3 accuracy = 85.4000
Confusion Matrix : 
          Pred pos  Pred neg
True pos       794       133
True neg        13        60
Batch 3 precision = 0.9839
Batch 3 recall = 0.8565
Batch 3 Fscore = 0.9158

Batch 4 accuracy = 85.2000
Confusion Matrix : 
          Pred pos  Pred neg
True pos       779       120
True neg        28        73
Batch 4 precision = 0.9653
Batch 4 recall = 0.8665
Batch 4 Fscore = 0.9132

Batch 5 accuracy = 82.0000
Confusion Matrix : 
          Pred pos  Pred neg
True pos       773       169
True neg        11        47
Batch 5 precision = 0.9860