In [1]:
import pandas as pd
import numpy as np
import model_preparation
import keras as kr

from model_preparation import prepare_data, get_features, get_bounds, get_interval_accuracy_score
from sklearn.metrics import mean_squared_error, r2_score
from keras.utils import np_utils
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Activation
from keras.layers import Conv2D, MaxPooling2D
from keras.optimizers import RMSprop
from keras.optimizers import Adam

# Load in pre-split data
X_train, X_test, y_train, y_test = prepare_data()
features = get_features()
train_bounds_5 = get_bounds(y_train, 5)
train_bounds_10 = get_bounds(y_train, 10)
test_bounds_5 = get_bounds(y_test, 5)
test_bounds_10 = get_bounds(y_test, 10)

Using TensorFlow backend.


In [2]:
# Flatten target variable to use in NN models
y_train = np.asarray(y_train)
y_test = np.asarray(y_test)

In [3]:
# Define functions for NN models, running models, etc
def base_model():
    model = Sequential()
    model.add(Dense(10, input_dim = 52, kernel_initializer = 'normal', activation = 'relu'))
    model.add(Dense(1, kernel_initializer = 'normal', activation = 'linear'))
    adam = Adam()
    # Compile
    model.compile(loss = 'mean_squared_error', optimizer=adam, metrics =['mean_squared_error'])
    return model

def run_network(xtrain, xtest, ytrain, ytest, model, epochs, batch):
    model.fit(xtrain, ytrain, nb_epoch = epochs, batch_size = batch,
              validation_data = (xtest, ytest),verbose = 2)
    return model

# Change the number of layers but hold the number of hidden neurons constant
def change_layers_neurons(numlayers, numneurons):
    model = Sequential()
    model.add(Dense(numneurons, input_dim = 52, kernel_initializer = 'normal', activation = 'relu'))
    for x in range(1- numlayers):
        model.add(Dense(numneurons, kernel_initializer = 'normal', activation = 'relu'))
    model.add(Dense(1, kernel_initializer = 'normal', activation = 'linear'))
    adam = Adam()
    model.compile(loss = 'mean_squared_error', optimizer=adam, metrics =['mean_squared_error'])
    return model

basemodel = base_model()
basemodel.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 10)                530       
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 11        
Total params: 541
Trainable params: 541
Non-trainable params: 0
_________________________________________________________________


In [4]:
# Run Network for Base Model
run_network(X_train, X_test, y_train, y_test, basemodel, 10,20)

# Create more networks hold number of neurons/layer constant
 
# 5 layers
model1 = change_layers_neurons(5, 10)
run_network(X_train, X_test, y_train, y_test, model1, 10,20)
run_network(X_train, X_test, y_train, y_test, model1, 10,3)

# 10 layers
model2 = change_layers_neurons(10, 10)
run_network(X_train, X_test, y_train, y_test, model2, 10,20)

# 15 layers
model3 = change_layers_neurons(15, 10)
run_network(X_train, X_test, y_train, y_test, model3, 10,20)

# 25 layers
model4 = change_layers_neurons(25, 10)
run_network(X_train, X_test, y_train, y_test, model4, 10,20)


# Create layers with more neurons, hold number of layers constant

# 15 neurons
model5 = change_layers_neurons(3, 15)
run_network(X_train, X_test, y_train, y_test, model5, 10,20)

# 20 neurons
model6 = change_layers_neurons(3, 20)
run_network(X_train, X_test, y_train, y_test, model6, 10,20)

# 30 neurons
model7 = change_layers_neurons(3, 30)
run_network(X_train, X_test, y_train, y_test, model7, 10,20)

## Model 7 performs the best with lowest MSE on testing and will be used for model predictions and evaluation below

  del sys.path[0]


Train on 80000 samples, validate on 20000 samples
Epoch 1/10
 - 4s - loss: 610.6786 - mean_squared_error: 610.6790 - val_loss: 108.4281 - val_mean_squared_error: 108.4281
Epoch 2/10
 - 4s - loss: 106.3870 - mean_squared_error: 106.3871 - val_loss: 103.6248 - val_mean_squared_error: 103.6248
Epoch 3/10
 - 4s - loss: 103.4697 - mean_squared_error: 103.4697 - val_loss: 101.3156 - val_mean_squared_error: 101.3156
Epoch 4/10
 - 4s - loss: 100.5074 - mean_squared_error: 100.5075 - val_loss: 98.2846 - val_mean_squared_error: 98.2845
Epoch 5/10
 - 4s - loss: 97.6705 - mean_squared_error: 97.6705 - val_loss: 96.3182 - val_mean_squared_error: 96.3181
Epoch 6/10
 - 4s - loss: 95.4899 - mean_squared_error: 95.4898 - val_loss: 94.6744 - val_mean_squared_error: 94.6744
Epoch 7/10
 - 4s - loss: 93.9441 - mean_squared_error: 93.9442 - val_loss: 93.8857 - val_mean_squared_error: 93.8857
Epoch 8/10
 - 4s - loss: 93.0375 - mean_squared_error: 93.0374 - val_loss: 93.4500 - val_mean_squared_error: 93.4500


Epoch 7/10
 - 11s - loss: 88.2518 - mean_squared_error: 88.2518 - val_loss: 88.0503 - val_mean_squared_error: 88.0502
Epoch 8/10
 - 11s - loss: 86.4969 - mean_squared_error: 86.4969 - val_loss: 86.4009 - val_mean_squared_error: 86.4008
Epoch 9/10
 - 9s - loss: 85.5985 - mean_squared_error: 85.5985 - val_loss: 87.3551 - val_mean_squared_error: 87.3551
Epoch 10/10
 - 8s - loss: 84.9916 - mean_squared_error: 84.9916 - val_loss: 85.7345 - val_mean_squared_error: 85.7345
Train on 80000 samples, validate on 20000 samples
Epoch 1/10
 - 10s - loss: 465.8546 - mean_squared_error: 465.8544 - val_loss: 105.1633 - val_mean_squared_error: 105.1633
Epoch 2/10
 - 9s - loss: 101.5680 - mean_squared_error: 101.5680 - val_loss: 96.8866 - val_mean_squared_error: 96.8865
Epoch 3/10
 - 11s - loss: 93.9801 - mean_squared_error: 93.9803 - val_loss: 91.1072 - val_mean_squared_error: 91.1072
Epoch 4/10
 - 10s - loss: 89.2637 - mean_squared_error: 89.2636 - val_loss: 88.4595 - val_mean_squared_error: 88.4595
Ep

<keras.engine.sequential.Sequential at 0x1e4a42a2908>

In [5]:
# Compute predictions using best model training on neural network (model 7)
y_pred_train_nn = model7.predict(X_train)
y_pred_test_nn = model7.predict(X_test)

# Compute R2 and MSE on test data
print(r2_score(y_test, y_pred_test_nn))
print(mean_squared_error(y_test, y_pred_test_nn))

0.8819338723093375
77.36669890027484


In [6]:
# Compute accuracy scores for 5% and 10% intervals on test data
print("5% +/- limit:")
print(np.mean(get_interval_accuracy_score(test_bounds_5, y_pred_test_nn)))
print("10% +/- limit:")
print(np.mean(get_interval_accuracy_score(test_bounds_10, y_pred_test_nn)))

5% +/- limit:
0.3686
10% +/- limit:
0.6065
