In [2]:
import numpy as np
from numpy.random import seed 
seed(1) 
from tensorflow import set_random_seed 
set_random_seed(2)
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras import optimizers
from keras.layers.normalization import BatchNormalization
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn import preprocessing
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.cross_validation import StratifiedKFold
from keras.layers.advanced_activations import LeakyReLU, PReLU
from keras.layers.noise import GaussianNoise
from keras.layers import Dropout

#Stop warnings
import warnings
def ignore_warn(*args, **kwargs):
    pass
warnings.warn = ignore_warn

Using TensorFlow backend.


The neural network model I got was the best one I've found so from just playing around with the parameters. However, it is from far being an optimal model.

In [3]:
def process_data():
    '''Processes data by standardising categorical variables.'''
    data = pd.read_csv('ATM_training.csv')
    y = data.pop('Withdraw')
    X_train, X_test, y_train, y_test = train_test_split(data, y, test_size=0.33, random_state=42)
    X_train +=0.001
    X_test +=0.001
    scaler = StandardScaler()
    X_tran = scaler.fit(X_train.iloc[:,0:2])
    dat = X_tran.transform(X_train.iloc[:,0:2])
    dat2 = X_tran.transform(X_test.iloc[:,0:2])

    X_train.pop('Shops')
    X_train.pop('ATMs')
    X_test.pop('Shops')
    X_test.pop('ATMs')
    X_train += 0.01
    X_test += 0.01
    data1train = []
    data2train = []
    data1test = []
    data2test = []
    for i in range(len(dat)):
        data1train.append(dat[i][0])
        data2train.append(dat[i][1])
    for i in range(len(dat2)):
        data1test.append(dat2[i][0])
        data2test.append(dat2[i][1])

    X_train['Shops'] = data1train
    X_train['ATM'] = data2train
    X_test['Shops'] = data1test
    X_test['ATM'] = data2test
    return X_train, X_test, y_train, y_test

In [4]:
X_train, X_test, y_train, y_test = process_data()

In [5]:
def eval_model(model):
    MSE_nn = model.evaluate(X_test.as_matrix(), y_test, verbose=0)
    print('Final MSE is {:.4f}'.format(MSE_nn))
    return MSE_nn

In [6]:
def train_model(method, epoch, batch):
    model = method
    model.fit(X_train.as_matrix(), y_train, nb_epoch=epoch, batch_size=batch, verbose=0)
    score = eval_model(model)
    return score

# Wide Model

In [7]:
def baseline_model():
    """Basic Neural network model."""
    model = Sequential()
    np.random.seed(1337) # for reproducibility
    model.add(Dense(input_dim=6, output_dim = 24, kernel_initializer='normal', activation='relu'))
    model.add(Dense(24, kernel_initializer='normal', activation='relu'))
    model.add(Dense(12, kernel_initializer='normal', activation ='relu'))
    model.add(Dense(1, kernel_initializer='normal', activation = 'relu'))
    model.compile(loss='mean_squared_error', optimizer='adam')
    return model

In [44]:
def baseline_model2():
    """Basic Neural network model."""
    model = Sequential()
    np.random.seed(1337) # for reproducibility
    model.add(Dense(input_dim=6, output_dim = 100, kernel_initializer='normal', activation='relu'))    
    model.add(Dense(100, kernel_initializer='normal'))
    model.add(PReLU(alpha_initializer='zeros', alpha_regularizer=None, alpha_constraint=None, shared_axes=None))    
    model.add(Dense(50, kernel_initializer='normal'))
    model.add(PReLU(alpha_initializer='zeros', alpha_regularizer=None, alpha_constraint=None, shared_axes=None))
    model.add(Dense(25, kernel_initializer='normal'))
    model.add(PReLU(alpha_initializer='zeros', alpha_regularizer=None, alpha_constraint=None, shared_axes=None))
    model.add(Dense(1, kernel_initializer='normal', activation = 'relu'))
    model.compile(loss='mean_squared_error', optimizer='adam')
    return model

In [7]:
def baseline_model3():
    """Wide neural network model."""
    model = Sequential()
    np.random.seed(1337) # for reproducibility
    model.add(Dense(input_dim=6, output_dim = 30, kernel_initializer='normal', activation='relu'))
    model.add(Dense(30, kernel_initializer='normal', activation = 'relu'))
    model.add(Dense(30, kernel_initializer='normal', activation = 'relu'))
    model.add(BatchNormalization())
    model.add(Dense(30, kernel_initializer='normal', activation = 'relu'))
    model.add(Dense(30, kernel_initializer='normal', activation = 'relu'))
    model.add(BatchNormalization())
    model.add(Dense(30, kernel_initializer='normal', activation = 'relu'))
    model.add(Dense(1, kernel_initializer='normal', activation = 'linear'))
    model.compile(loss='mean_squared_error', optimizer='adam')
    return model


In [None]:
def baseline_model4():
    """Wide neural network model."""
    model = Sequential()
    np.random.seed(1337) # for reproducibility
    model.add(Dense(input_dim=6, output_dim = 250, kernel_initializer='normal'))
    model.add(LeakyReLU(alpha=0.1))
    model.add(Dense(1000, kernel_initializer='normal'))
    model.add(LeakyReLU(alpha=0.1))
    model.add(Dense(250, kernel_initializer='normal'))
    model.add(LeakyReLU(alpha=0.1))
    model.add(Dense(1, kernel_initializer='normal', activation = 'linear'))
    model.compile(loss='mean_squared_error', optimizer='adam')
    return model

In [None]:
epo = [50, 75, 100, 150, 200]
bat = [30, 50, 60, 80, 100]
best = 100
ep_best, bat_best = 0, 0
for ep in epo:
    for ba in bat:
        model_score = train_model(baseline_model(), ep, ba)
        print('This was for {} epochs and {} batch'.format(ep, ba))
        if model_score<best:
            best = model_score
            ep_best = ep
            bat_best = bat_best
print('Best model is {:.2f}, with epoch {:.2f} and batch {:.2f}'.format(best, ep_best, bat_best))

Final MSE is 0.2651
This was for 50 epochs and 30 batch
Final MSE is 0.2557
This was for 50 epochs and 50 batch
Final MSE is 0.2530
This was for 50 epochs and 60 batch
Final MSE is 0.2791
This was for 50 epochs and 80 batch
Final MSE is 0.2560
This was for 50 epochs and 100 batch
Final MSE is 0.2586
This was for 75 epochs and 30 batch
Final MSE is 0.2732
This was for 75 epochs and 50 batch
Final MSE is 0.2766
This was for 75 epochs and 60 batch
Final MSE is 0.2968
This was for 75 epochs and 80 batch
Final MSE is 0.2669
This was for 75 epochs and 100 batch
Final MSE is 0.3539
This was for 100 epochs and 30 batch
Final MSE is 0.2547
This was for 100 epochs and 50 batch
Final MSE is 0.2621
This was for 100 epochs and 60 batch
Final MSE is 0.2581
This was for 100 epochs and 80 batch
Final MSE is 0.2514
This was for 100 epochs and 100 batch
Final MSE is 0.2936
This was for 150 epochs and 30 batch
Final MSE is 0.2658
This was for 150 epochs and 50 batch
Final MSE is 0.2577
This was for 150 e

In [45]:
train_model(baseline_model2(), 40, 60)

Final MSE is 0.2677


0.26774966872756473

In [None]:
#1. Dense (init = Gaussian) -> Gaussian Noise(0.05->0.1) -> 
#PRELU or Leaky-RELU(0.1->0.3) -> BatchNormalisation -> Gaussian Dropout (0.1->0.3) -> 2. Dense and so on.

In [None]:
#= NAdam (lr = 0.005, momentum = 0.995)

In [None]:
#NAdam optimiser (nesterov Adam)
#Batch Norm
#PRELU
#Gaussian Noise/Dropout

# Best Model

In [8]:
def baseline_model():
    """Basic Neural network model."""
    model = Sequential()
    np.random.seed(1337) # for reproducibility
    model.add(Dense(input_dim=6, output_dim = 24, kernel_initializer='normal', activation='relu'))
    model.add(Dense(24, kernel_initializer='normal', activation='relu'))
    model.add(Dense(12, kernel_initializer='normal', activation ='relu'))
    model.add(Dense(1, kernel_initializer='normal', activation = 'relu'))
    model.compile(loss='mean_squared_error', optimizer='adam')
    return model
    
train_model(baseline_model(), 40, 60)

Final MSE is 0.2523


0.25232200653756615