In [82]:
import numpy as np
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
#Stop warnings
import warnings
def ignore_warn(*args, **kwargs):
    pass
warnings.warn = ignore_warn

In [83]:
data = pd.read_csv('ATM_Training.csv')
y = data.pop('Withdraw')
X_train, X_test, y_train, y_test = train_test_split(data, y, test_size=0.33, random_state=42)

In [84]:
def baseline_model():
    """Baseline neural network model by playing around with parameters."""
    #Construct a neural network object:
    model = Sequential()
    #Input dimension for input layer MUST match the number of predictors in our dataset. Output dimension tells us 
    #how many edges going into next layer (can be any number from 1 to number of vertices in next layer). 
    #Kernel_init tells us what initial weightings to set the weights when training it. Relu is activation function:
    model.add(Dense(input_dim=6, output_dim = 60, kernel_initializer='normal', activation='relu'))
    #Here we created a layer with 60 vertices and the same parameters as before:
    model.add(Dense(60, kernel_initializer='normal', activation = 'relu'))
    model.add(Dense(60, kernel_initializer='normal', activation = 'relu'))
    model.add(Dense(30, kernel_initializer='normal', activation = 'relu'))
    model.add(Dense(30, kernel_initializer='normal', activation = 'relu'))
    model.add(Dense(15, kernel_initializer='normal', activation = 'relu'))    
    model.add(Dense(6, kernel_initializer='normal', activation = 'relu'))
    model.add(Dense(6, kernel_initializer='normal', activation = 'relu'))
    model.add(Dense(1, kernel_initializer='normal', activation = 'relu'))
    # Compile model from this. Keep this loss function whilst ADAM is a better optimizer than SGD:
    model.compile(loss='mean_squared_error', optimizer='adam')
    return model

In [85]:
#Need to transform from pandas df into matrix for keras to work with. Then since alot of the predictors are dummy
#variables, this means that when training and learning, the model won't update since alot of 0's in our data.
#Therefore, add a small number to ensure we update the parameters.
xarray = X_train.as_matrix()
xarray = xarray+0.0001
xarrayTest = X_test+0.0001


In [86]:
model = baseline_model()
model.fit(xarray, y_train, nb_epoch=25, batch_size=50)
predictions = model.predict(X_test.as_matrix())
test = list(predictions)
print('Final MSE is ',mean_squared_error(y_test,predictions))

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
Final MSE is  6.25379051441


The neural network model I got was the best one I've found so from just playing around with the parameters. However, it is from far being an optimal model.

# Wide Model

In [87]:

data = pd.read_csv('ATM_Training.csv')
y = data.pop('Withdraw')
X_train, X_test, y_train, y_test = train_test_split(data, y, test_size=0.33, random_state=42)
X_train +=0.01
X_test +=0.01
scaler = StandardScaler()
scaler.fit(X_train)

StandardScaler(copy=True, with_mean=True, with_std=True)

In [88]:
X_Train = scaler.transform(X_train)
X_Test = scaler.transform(X_test)

In [89]:
def baseline_model2():
    """Wide neural network model."""
    model = Sequential()
    model.add(Dense(input_dim=6, output_dim = 60, kernel_initializer='normal', activation='relu'))
    model.add(Dense(1000, kernel_initializer='normal', activation = 'relu'))
    model.add(Dense(250, kernel_initializer='normal', activation = 'relu'))
    model.add(Dense(1, kernel_initializer='normal', activation = 'relu'))
    model.compile(loss='mean_squared_error', optimizer='adam')
    return model

In [90]:
model = baseline_model2()
model.fit(X_Train, y_train, nb_epoch=25, batch_size=50)

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


<keras.callbacks.History at 0x1306be3c8>

In [91]:
MSE_nn = model.evaluate(X_Test, y_test)



In [92]:
print('Final MSE is {mse}'.format(mse = MSE_nn))

Final MSE is 0.27606850176146536
