In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd

### Loading the Data

First we load our inputs and targets from the corresponding csv files into numpy arrays:

In [8]:
raw_inputs = np.genfromtxt('basf_inputs_rand_normalized.csv', delimiter=',')
print(raw_inputs.shape)
print(raw_inputs)

(1029, 12)
[[3.         4.         1.         ... 0.57142857 0.40765391 1.        ]
 [7.         4.         1.         ... 1.         0.66139767 0.5       ]
 [2.         1.         0.         ... 0.57142857 0.71214642 0.33333333]
 ...
 [7.         4.         1.         ... 0.71428571 0.36439268 1.        ]
 [6.         4.         1.         ... 0.57142857 0.42845258 0.33333333]
 [1.         3.         0.         ... 0.57142857 0.68053245 0.5       ]]


In [9]:
raw_targets = np.genfromtxt('basf_targets_rand_normalized.csv', delimiter=',')
raw_targets = np.expand_dims(raw_targets, axis=1) # because tensorflow wants each column in an array even if only 1 value
print(raw_targets.shape)
print(raw_targets)

(1029, 1)
[[0.33097762]
 [0.26266196]
 [0.10600707]
 ...
 [0.34393404]
 [0.02120141]
 [0.29799764]]


### One-Hot Encoding

Here the first four columns of feature data are converted to one-hot values, done prior to splitting into training, validation and testing sets for the sake of reducing redundancy and for efficiency. The first four columns have 8, 4, 2, and 5 different possible values respectively, so that will be the dimension of each one-hot vector.

In [10]:
sess = tf.Session()
num_col_values = (8, 4, 2, 5)
new_cols = []
for col in range(4):
    col_values = raw_inputs[:, col]
    print(col_values)
    col_values = tf.cast(col_values, tf.int32)
    col_one_hot = tf.one_hot(col_values, num_col_values[col])
    print(sess.run(col_one_hot))
    new_cols.append(col_one_hot)
new_cols.append(raw_inputs[:, 4:])

[3. 7. 2. ... 7. 6. 1.]
[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 1.]
 [0. 0. 1. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 1.]
 [0. 0. 0. ... 0. 1. 0.]
 [0. 1. 0. ... 0. 0. 0.]]
[4. 4. 1. ... 4. 4. 3.]
[[0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 1. 0. 0.]
 ...
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 1.]]
[1. 1. 0. ... 1. 1. 0.]
[[0. 1.]
 [0. 1.]
 [1. 0.]
 ...
 [0. 1.]
 [0. 1.]
 [1. 0.]]
[1. 3. 0. ... 3. 3. 2.]
[[0. 1. 0. 0. 0.]
 [0. 0. 0. 1. 0.]
 [1. 0. 0. 0. 0.]
 ...
 [0. 0. 0. 1. 0.]
 [0. 0. 0. 1. 0.]
 [0. 0. 1. 0. 0.]]


In [11]:
inputs = tf.concat(new_cols, axis=1)
targets = raw_targets
print(sess.run(inputs))

[[0.         0.         0.         ... 0.5714286  0.4076539  1.        ]
 [0.         0.         0.         ... 1.         0.6613977  0.5       ]
 [0.         0.         1.         ... 0.5714286  0.7121464  0.33333334]
 ...
 [0.         0.         0.         ... 0.71428573 0.36439267 1.        ]
 [0.         0.         0.         ... 0.5714286  0.42845258 0.33333334]
 [0.         1.         0.         ... 0.5714286  0.68053246 0.5       ]]


### Splitting the Dataset

We'll reserve 80% of the data for training and 10% for validation and testing, respectively:

In [39]:
training_bound = int(int(inputs.shape[0])*.8)
validation_bound = int(int(inputs.shape[0])*.9)
training_inputs, training_targets = inputs[:training_bound], targets[:training_bound]
validation_inputs, validation_targets = inputs[training_bound:validation_bound], targets[training_bound:validation_bound]
testing_inputs, testing_targets = inputs[validation_bound:], targets[validation_bound:]
print(int(training_inputs.shape[0])+ int(validation_inputs.shape[0]) + int(testing_inputs.shape[0])) # verify number examples preserved
print(training_inputs)
print(validation_inputs)
print(testing_inputs)

1029
Tensor("strided_slice_7:0", shape=(823, 27), dtype=float32)
Tensor("strided_slice_8:0", shape=(103, 27), dtype=float32)
Tensor("strided_slice_9:0", shape=(103, 27), dtype=float32)


In [40]:
# get the shape of the input to give to the model
print(training_inputs[0].shape)

(27,)


### Defining the Network

In [57]:
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation
from keras.optimizers import SGD
from keras.callbacks import EarlyStopping

In [58]:
model = Sequential()
model.add(Dense(16, input_dim=int(training_inputs[0].shape[0]), activation='relu'))
model.add(Dense(16, activation='relu'))
model.add(Dense(1, activation='linear'))

In [59]:
# use stochastic gradient descent as the optimizer
optimizer = SGD(lr=.1)

In [60]:
# early stopping callback to stop when validation loss stops improving
early_stopping = EarlyStopping(monitor='val_loss')

In [61]:
model.compile(loss='mean_squared_error', optimizer=optimizer)

In [62]:
# TODO: mess with these
steps_per_epoch = 10
validation_steps = 10
testing_steps = 10

In [63]:
print(training_targets.shape)
model.fit(
    training_inputs,
    training_targets,
    validation_data = (validation_inputs, validation_targets),
    steps_per_epoch = steps_per_epoch,
    validation_steps = validation_steps,
    callbacks=[early_stopping],
    epochs=1000)

(823, 1)
Train on 823 samples, validate on 103 samples
Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Ep

Epoch 82/1000
Epoch 83/1000
Epoch 84/1000
Epoch 85/1000
Epoch 86/1000
Epoch 87/1000
Epoch 88/1000
Epoch 89/1000


<keras.callbacks.History at 0x1a77bc9c5f8>

In [64]:
testing_loss = model.evaluate(
    testing_inputs,
    testing_targets,
    steps = testing_steps,
    verbose = 0)
print("Post-training loss on test data: ", testing_loss)

Post-training loss on test data:  0.007156532257795334


In [66]:
modelB = Sequential()
modelB.add(Dense(16, input_dim=int(training_inputs[0].shape[0]), activation='relu'))
modelB.add(Dense(16, activation='relu'))
modelB.add(Dense(1, activation='relu'))
modelB.compile(loss='mean_squared_error', optimizer=optimizer)
modelB.fit(
    training_inputs,
    training_targets,
    validation_data = (validation_inputs, validation_targets),
    steps_per_epoch = steps_per_epoch,
    validation_steps = validation_steps,
    callbacks=[early_stopping],
    epochs=1000)
testing_loss = modelB.evaluate(
    testing_inputs,
    testing_targets,
    steps = testing_steps,
    verbose = 0)
print("Post-training loss on test data: ", testing_loss)

Train on 823 samples, validate on 103 samples
Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/10

Epoch 82/1000
Epoch 83/1000
Epoch 84/1000
Epoch 85/1000
Epoch 86/1000
Epoch 87/1000
Epoch 88/1000
Epoch 89/1000
Epoch 90/1000
Epoch 91/1000
Epoch 92/1000
Epoch 93/1000
Epoch 94/1000
Epoch 95/1000
Epoch 96/1000
Epoch 97/1000
Epoch 98/1000
Epoch 99/1000
Epoch 100/1000
Epoch 101/1000
Epoch 102/1000
Epoch 103/1000
Epoch 104/1000
Epoch 105/1000
Epoch 106/1000
Epoch 107/1000
Epoch 108/1000
Epoch 109/1000
Epoch 110/1000
Epoch 111/1000
Epoch 112/1000
Epoch 113/1000
Epoch 114/1000
Epoch 115/1000
Epoch 116/1000
Epoch 117/1000
Epoch 118/1000
Epoch 119/1000
Epoch 120/1000
Epoch 121/1000
Epoch 122/1000
Epoch 123/1000
Epoch 124/1000
Epoch 125/1000
Epoch 126/1000
Epoch 127/1000
Epoch 128/1000
Epoch 129/1000
Epoch 130/1000
Epoch 131/1000
Epoch 132/1000
Epoch 133/1000
Epoch 134/1000
Epoch 135/1000
Epoch 136/1000
Post-training loss on test data:  0.004730306565761566


In [68]:
modelC = Sequential()
modelC.add(Dense(16, input_dim=int(training_inputs[0].shape[0]), activation='relu'))
modelC.add(Dense(1, activation='relu'))
modelC.compile(loss='mean_squared_error', optimizer=optimizer)
modelC.fit(
    training_inputs,
    training_targets,
    validation_data = (validation_inputs, validation_targets),
    steps_per_epoch = steps_per_epoch,
    validation_steps = validation_steps,
    callbacks=[early_stopping],
    epochs=1000)
testing_loss = modelC.evaluate(
    testing_inputs,
    testing_targets,
    steps = testing_steps,
    verbose = 0)
print("Post-training loss on test data: ", testing_loss)

Train on 823 samples, validate on 103 samples
Epoch 1/1000
Epoch 2/1000
Post-training loss on test data:  0.1007809191942215


In [73]:
modelD = Sequential()
modelD.add(Dense(8, input_dim=int(training_inputs[0].shape[0]), activation='relu'))
modelD.add(Dense(8, activation='relu'))
modelD.add(Dense(1, activation='relu'))
modelD.compile(loss='mean_squared_error', optimizer=optimizer)
modelD.fit(
    training_inputs,
    training_targets,
    validation_data = (validation_inputs, validation_targets),
    steps_per_epoch = steps_per_epoch,
    validation_steps = validation_steps,
    callbacks=[early_stopping],
    epochs=1000)
testing_loss = modelD.evaluate(
    testing_inputs,
    testing_targets,
    steps = testing_steps,
    verbose = 0)
print("Post-training loss on test data: ", testing_loss)

Train on 823 samples, validate on 103 samples
Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/10

Epoch 82/1000
Epoch 83/1000
Epoch 84/1000
Epoch 85/1000
Epoch 86/1000
Epoch 87/1000
Epoch 88/1000
Epoch 89/1000
Epoch 90/1000
Epoch 91/1000
Epoch 92/1000
Epoch 93/1000
Epoch 94/1000
Epoch 95/1000
Epoch 96/1000
Epoch 97/1000
Epoch 98/1000
Epoch 99/1000
Epoch 100/1000
Epoch 101/1000
Epoch 102/1000
Epoch 103/1000
Epoch 104/1000
Epoch 105/1000
Epoch 106/1000
Epoch 107/1000
Epoch 108/1000
Epoch 109/1000
Epoch 110/1000
Epoch 111/1000
Epoch 112/1000
Epoch 113/1000
Epoch 114/1000
Epoch 115/1000
Epoch 116/1000
Epoch 117/1000
Epoch 118/1000
Epoch 119/1000
Epoch 120/1000
Epoch 121/1000
Epoch 122/1000
Epoch 123/1000
Epoch 124/1000
Epoch 125/1000
Epoch 126/1000
Epoch 127/1000
Epoch 128/1000
Epoch 129/1000
Epoch 130/1000
Epoch 131/1000
Epoch 132/1000
Epoch 133/1000
Epoch 134/1000
Epoch 135/1000
Epoch 136/1000
Epoch 137/1000
Epoch 138/1000
Epoch 139/1000
Epoch 140/1000
Epoch 141/1000
Epoch 142/1000
Epoch 143/1000
Epoch 144/1000
Epoch 145/1000
Epoch 146/1000
Epoch 147/1000
Epoch 148/1000
Epoch 149/100

Epoch 162/1000
Epoch 163/1000
Epoch 164/1000
Epoch 165/1000
Epoch 166/1000
Epoch 167/1000
Epoch 168/1000
Epoch 169/1000
Epoch 170/1000
Epoch 171/1000
Epoch 172/1000
Epoch 173/1000
Epoch 174/1000
Epoch 175/1000
Epoch 176/1000
Epoch 177/1000
Post-training loss on test data:  0.003521531354635954


In [75]:
modelE = Sequential()
modelE.add(Dense(6, input_dim=int(training_inputs[0].shape[0]), activation='relu'))
modelE.add(Dense(6, activation='relu'))
modelE.add(Dense(1, activation='relu'))
modelE.compile(loss='mean_squared_error', optimizer=optimizer)
modelE.fit(
    training_inputs,
    training_targets,
    validation_data = (validation_inputs, validation_targets),
    steps_per_epoch = steps_per_epoch,
    validation_steps = validation_steps,
    callbacks=[early_stopping],
    epochs=1000)
testing_loss = modelE.evaluate(
    testing_inputs,
    testing_targets,
    steps = testing_steps,
    verbose = 0)
print("Post-training loss on test data: ", testing_loss)

Train on 823 samples, validate on 103 samples
Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/10

Epoch 82/1000
Epoch 83/1000
Epoch 84/1000
Epoch 85/1000
Epoch 86/1000
Epoch 87/1000
Epoch 88/1000
Epoch 89/1000
Epoch 90/1000
Epoch 91/1000
Epoch 92/1000
Epoch 93/1000
Epoch 94/1000
Epoch 95/1000
Epoch 96/1000
Epoch 97/1000
Epoch 98/1000
Epoch 99/1000
Epoch 100/1000
Epoch 101/1000
Epoch 102/1000
Epoch 103/1000
Epoch 104/1000
Epoch 105/1000
Epoch 106/1000
Epoch 107/1000
Epoch 108/1000
Epoch 109/1000
Epoch 110/1000
Epoch 111/1000
Epoch 112/1000
Epoch 113/1000
Epoch 114/1000
Epoch 115/1000
Epoch 116/1000
Epoch 117/1000
Epoch 118/1000
Epoch 119/1000
Epoch 120/1000
Epoch 121/1000
Epoch 122/1000
Epoch 123/1000
Epoch 124/1000
Epoch 125/1000
Epoch 126/1000
Epoch 127/1000
Epoch 128/1000
Epoch 129/1000
Epoch 130/1000
Epoch 131/1000
Epoch 132/1000
Epoch 133/1000
Epoch 134/1000
Epoch 135/1000
Epoch 136/1000
Epoch 137/1000
Epoch 138/1000
Epoch 139/1000
Epoch 140/1000
Epoch 141/1000
Epoch 142/1000
Epoch 143/1000
Epoch 144/1000
Epoch 145/1000
Epoch 146/1000
Epoch 147/1000
Epoch 148/1000
Epoch 149/100