## Predicting Stock Price Movement with a Feed Forward Neural Network, using Tensorflow

In [None]:
import pandas as pd
import tensorflow as tf
import numpy as np

To validate our model, we split the the 186k events of 1/6/17 such that we train on the first 80% of the day, and validate using the final 20%. After tuning parameters, we use the entire 1/6/17 data set to train, and then test using the following day's data, which is from 1/9/17.

In [2]:
# remove the first 1000 and last 1000 events due to weirdness from the beginning and end of the trading day.
train = pd.read_csv('SOXX_01_06_processed.csv')
train_data = train[1000:-1000].reset_index(drop=True)

test = pd.read_csv('SOXX_01_09_processed.csv')
test_data = test[1000:-1000].reset_index(drop=True)

### Preprocessing: Standardizing Features and Encoding Labels
Use sklearn prepocessing module to both encode labels, and standard scale the columns for the train, validation, and test sets.

In [3]:
from sklearn import preprocessing

scaler = preprocessing.StandardScaler()

bid_columns = []
ask_columns = []

for i in range(10, 0, -1):
    bid_columns.append('bid' + str(i))

for i in range(1,11):
    ask_columns.append('ask' + str(i))

train_depths = train_data[bid_columns + ask_columns]
test_depths = test_data[bid_columns + ask_columns]
depth_names = train_depths.columns

scaled_depths = scaler.fit_transform(train_depths)
train_x = pd.DataFrame(scaled_depths, columns=depth_names)
train_x['normalized_mid_price'] = train_data.norm_mid_price
train_x['mid_price_change'] = train_data.norm_mid_price_change
train_x['normalized_relative_depth'] = train_data.norm_rel_depth

scaled_test_depths = scaler.fit_transform(test_depths)
test_x = pd.DataFrame(scaled_test_depths, columns=depth_names)
test_x['normalized_mid_price'] = test_data.norm_mid_price
test_x['mid_price_change'] = test_data.norm_mid_price_change
test_x['normalized_relative_depth'] = test_data.norm_rel_depth

train_set_x = train_x[:150000]
validation_x = train_x[150000:]

Check that the input columns look good and standardized

In [4]:
train_set_x[10000:10010]

Unnamed: 0,bid10,bid9,bid8,bid7,bid6,bid5,bid4,bid3,bid2,bid1,ask1,ask2,ask3,ask4,ask5,ask6,ask7,ask8,ask9,ask10,normalized_mid_price,mid_price_change,normalized_relative_depth
10000,-0.916484,0.469729,-0.763767,0.172598,-0.578536,-0.742413,-1.439773,-0.087217,0.251144,-0.754155,-0.023116,-1.234478,-1.649811,-1.813906,-1.779646,-1.305669,-0.946409,-0.96268,-0.949827,-0.346373,-1.427243,1.140265,2.69412
10001,-0.916484,0.469729,-0.763767,0.172598,-0.578536,-0.742413,-1.439773,-0.087217,0.251144,-0.754155,-0.023116,-1.234478,-1.649811,-1.813906,-1.779646,-1.3057,-0.946599,-0.617941,-0.94987,-0.924091,-1.427243,1.140265,2.694462
10002,-0.916484,0.469729,-0.763767,0.172598,-0.578536,-0.742413,-1.439773,-0.087217,0.251144,-0.754155,-0.023116,-1.234478,-1.649811,-1.813906,-1.779646,-1.3057,-0.946599,-0.617941,-0.94987,-0.924091,-1.427243,1.140265,2.694462
10003,-0.916484,0.469729,-0.763767,0.172598,-0.578536,-0.742413,-1.439773,-0.087217,0.251144,-0.754155,-0.023116,-1.81631,-1.649811,-1.813906,-1.779646,-1.3057,-0.946599,-0.617941,-0.94987,-0.924091,-1.427243,1.140265,3.329511
10004,-0.916484,0.469729,-0.763767,0.172598,-0.578536,-0.742413,-1.439773,-0.087217,0.251144,-0.754155,-0.023116,-1.81631,-1.649811,-1.243165,-1.779646,-1.3057,-0.946599,-0.617941,-0.94987,-0.924091,-1.427243,1.140265,2.694369
10005,-0.916484,0.469729,-0.763767,0.172598,-0.578536,-0.742413,-1.439773,-0.087217,0.251144,-0.754155,-0.023116,-1.81631,-1.649811,-0.862671,-1.779646,-1.3057,-0.946599,-0.617941,-0.94987,-0.924091,-1.427243,1.140265,2.339232
10006,-0.916484,0.469729,-0.763767,0.172598,-0.578536,-0.742413,-1.439773,-0.087217,0.251144,-0.754155,-0.023116,-1.81631,-1.649811,-0.862671,-1.779646,-1.3057,-0.946599,-0.617941,-0.94987,-0.924091,-1.427243,1.306359,2.339232
10007,0.987799,-0.475337,0.863938,-0.076316,-0.350305,-1.189503,0.04008,-0.087016,-1.386611,-0.754155,-0.023116,-1.81631,-1.649811,-0.862671,-1.779646,-1.3057,-0.946599,-0.617941,-0.94987,-0.924091,-1.427243,1.306359,2.516934
10008,0.987799,-0.475337,0.863938,-0.076316,-0.350305,-1.189503,-0.38271,-0.087016,-1.386611,-0.754155,-0.023116,-1.81631,-1.649811,-0.862671,-1.779646,-1.3057,-0.946599,-0.617941,-0.94987,-0.924091,-1.427243,1.472468,2.339333
10009,0.987799,-0.475337,0.863938,-0.076316,-0.350305,-1.189503,-0.38271,-0.087016,-1.386611,-0.754155,-0.023116,-1.81631,-1.649811,-0.862671,-1.779646,-1.3057,-0.946599,-0.617941,-0.94987,-0.924091,-1.427243,1.472468,2.339333


### Label Encoding

In [5]:
le = preprocessing.LabelEncoder()

train_y = train_data['change_label']
le.fit(train_y)
encoded_train_y = le.transform(train_y)
encoded_train_set_y = encoded_train_y[:150000]
encoded_validation_set_y = encoded_train_y[150000:]

test_y = test_data['change_label']
encoded_test_y = le.transform(test_y)

## Building the Neural Net

#### Defining Layer Hyperparameters

In [6]:
n_inputs = len(train_x.columns)
n_hidden1 = 13
n_hidden2 = 8
n_outputs = 3
num_examples = len(train_x)

#### Setting up placeholders and  2 hidden Layers

In [7]:
x_place = tf.placeholder(tf.float32, shape=(None, n_inputs), name="x_place")
y_place = tf.placeholder(tf.int32, shape=(None), name="y_place")

layer1 = tf.layers.dense(x_place, n_hidden1, activation = tf.nn.relu, name = 'hidden1')
layer2 = tf.layers.dense(layer1, n_hidden2, activation = tf.nn.relu, name = 'hidden2')
logits = tf.layers.dense(layer2, n_outputs, name = 'outputs')

predictions = tf.nn.softmax(logits, name = 'predictions') 

Instructions for updating:
Use keras.layers.dense instead.
Instructions for updating:
Colocations handled automatically by placer.


#### Defining a loss function: softmax cross entropy

In [8]:
with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y_place, logits=logits)
    loss = tf.reduce_mean(xentropy, name="loss")
    loss_summary = tf.summary.scalar('log_loss', loss)

#### Define an optimization method: AdamOptimizer with .0001 learn rate 

In [9]:
learning_rate = 0.0001
with tf.name_scope("train"):
    optimizer = tf.train.AdamOptimizer(learning_rate)
    training_op = optimizer.minimize(loss)

#### Define an accuracy metric: did the neural network predict the correct label?

In [10]:
with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y_place, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
    accuracy_summary = tf.summary.scalar('accuracy', accuracy)

#### Neural Net Iteration
* Define epoch and batch_size parameters
* Instantiate variables and a saver to save the model

* We want to train sequentially, thus, for each iteration, we keep track of the current index,  increment by the batch size.
* We train on the batch, then change the start index such that we train on the next sequential batch
* Save and print results every 50 epochs to see the average accuracy and loss for that epoch.
* Finally save the model after all epochs have completed.

In [11]:
%%time
import datetime

n_epochs = 2000
batch_size = 500

num_examples = len(train_x)
init = tf.global_variables_initializer()
saver = tf.train.Saver()

with tf.Session() as sess:
    init.run()
    writer = tf.summary.FileWriter('./graphs/2000', sess.graph)
    for epoch in range(n_epochs):
        epoch_acc = 0
        epoch_loss = 0
        iterations = 0
        current_index = 0
        for iteration in range(num_examples // batch_size):
            next_index = current_index + batch_size
            x_batch = train_x[current_index:next_index]
            y_batch = encoded_train_y[current_index:next_index]
            sess.run(training_op, feed_dict = {x_place:x_batch, y_place:y_batch})
            current_index = next_index
            acc_batch = accuracy.eval(feed_dict={x_place: x_batch, y_place: y_batch})
            loss_batch = loss.eval(feed_dict={x_place: x_batch, y_place: y_batch})
            
            if (epoch+1) % 50 == 0:
                acc_batch = accuracy.eval(feed_dict={x_place: x_batch, y_place: y_batch})
                loss_batch = loss.eval(feed_dict={x_place: x_batch, y_place: y_batch})
                epoch_acc = epoch_acc + acc_batch
                epoch_loss = epoch_loss + loss_batch
                iterations += 1
            
        if (epoch+1) % 50 == 0:
            acc_train = accuracy.eval(feed_dict = {x_place: x_batch, y_place:y_batch})
            summary1, summary2 = sess.run([loss_summary, accuracy_summary], feed_dict = {x_place: train_x, y_place:encoded_train_y})
            
            writer.add_summary(summary1, epoch+1)
            writer.add_summary(summary2, epoch+1)
            print(datetime.datetime.now())
            print(epoch+1)
            print("Epoch acc: "+ str(epoch_acc/iterations))
            print("Epoch loss: "+ str(epoch_loss/iterations))
            
    save_path = saver.save(sess, "./my_model_final.ckpt")

writer.close()

KeyboardInterrupt: 

In [12]:
with tf.Session() as sess:
    saver.restore(sess, "./my_model_final20.ckpt")
    Z = logits.eval(feed_dict = {x_place: test_x})
    y_pred= np.argmax(Z, axis = 1)

Instructions for updating:
Use standard file APIs to check for files with this prefix.
INFO:tensorflow:Restoring parameters from ./my_model_final20.ckpt


InvalidArgumentError: Restoring from checkpoint failed. This is most likely due to a mismatch between the current graph and the graph from the checkpoint. Please ensure that you have not altered the graph expected based on the checkpoint. Original error:

Assign requires shapes of both tensors to match. lhs shape= [23,13] rhs shape= [22,13]
	 [[node save/Assign_5 (defined at <timed exec>:8) ]]

Caused by op 'save/Assign_5', defined at:
  File "/Users/Beni/miniconda3/lib/python3.6/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/Users/Beni/miniconda3/lib/python3.6/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/Users/Beni/miniconda3/lib/python3.6/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/Users/Beni/miniconda3/lib/python3.6/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/Users/Beni/miniconda3/lib/python3.6/site-packages/ipykernel/kernelapp.py", line 505, in start
    self.io_loop.start()
  File "/Users/Beni/miniconda3/lib/python3.6/site-packages/tornado/platform/asyncio.py", line 148, in start
    self.asyncio_loop.run_forever()
  File "/Users/Beni/miniconda3/lib/python3.6/asyncio/base_events.py", line 438, in run_forever
    self._run_once()
  File "/Users/Beni/miniconda3/lib/python3.6/asyncio/base_events.py", line 1451, in _run_once
    handle._run()
  File "/Users/Beni/miniconda3/lib/python3.6/asyncio/events.py", line 145, in _run
    self._callback(*self._args)
  File "/Users/Beni/miniconda3/lib/python3.6/site-packages/tornado/ioloop.py", line 690, in <lambda>
    lambda f: self._run_callback(functools.partial(callback, future))
  File "/Users/Beni/miniconda3/lib/python3.6/site-packages/tornado/ioloop.py", line 743, in _run_callback
    ret = callback()
  File "/Users/Beni/miniconda3/lib/python3.6/site-packages/tornado/gen.py", line 781, in inner
    self.run()
  File "/Users/Beni/miniconda3/lib/python3.6/site-packages/tornado/gen.py", line 742, in run
    yielded = self.gen.send(value)
  File "/Users/Beni/miniconda3/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 357, in process_one
    yield gen.maybe_future(dispatch(*args))
  File "/Users/Beni/miniconda3/lib/python3.6/site-packages/tornado/gen.py", line 209, in wrapper
    yielded = next(result)
  File "/Users/Beni/miniconda3/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 267, in dispatch_shell
    yield gen.maybe_future(handler(stream, idents, msg))
  File "/Users/Beni/miniconda3/lib/python3.6/site-packages/tornado/gen.py", line 209, in wrapper
    yielded = next(result)
  File "/Users/Beni/miniconda3/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 534, in execute_request
    user_expressions, allow_stdin,
  File "/Users/Beni/miniconda3/lib/python3.6/site-packages/tornado/gen.py", line 209, in wrapper
    yielded = next(result)
  File "/Users/Beni/miniconda3/lib/python3.6/site-packages/ipykernel/ipkernel.py", line 294, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/Users/Beni/miniconda3/lib/python3.6/site-packages/ipykernel/zmqshell.py", line 536, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/Users/Beni/miniconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2848, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "/Users/Beni/miniconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2874, in _run_cell
    return runner(coro)
  File "/Users/Beni/miniconda3/lib/python3.6/site-packages/IPython/core/async_helpers.py", line 67, in _pseudo_sync_runner
    coro.send(None)
  File "/Users/Beni/miniconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3049, in run_cell_async
    interactivity=interactivity, compiler=compiler, result=result)
  File "/Users/Beni/miniconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3220, in run_ast_nodes
    if (yield from self.run_code(code, result)):
  File "/Users/Beni/miniconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3296, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-11-f861dedc321a>", line 1, in <module>
    get_ipython().run_cell_magic('time', '', 'import datetime\n\nn_epochs = 2000\nbatch_size = 500\n\nnum_examples = len(train_x)\ninit = tf.global_variables_initializer()\nsaver = tf.train.Saver()\n\nwith tf.Session() as sess:\n    init.run()\n    writer = tf.summary.FileWriter(\'./graphs/2000\', sess.graph)\n    for epoch in range(n_epochs):\n        epoch_acc = 0\n        epoch_loss = 0\n        iterations = 0\n        current_index = 0\n        for iteration in range(num_examples // batch_size):\n            next_index = current_index + batch_size\n            x_batch = train_x[current_index:next_index]\n            y_batch = encoded_train_y[current_index:next_index]\n            sess.run(training_op, feed_dict = {x_place:x_batch, y_place:y_batch})\n            current_index = next_index\n            acc_batch = accuracy.eval(feed_dict={x_place: x_batch, y_place: y_batch})\n            loss_batch = loss.eval(feed_dict={x_place: x_batch, y_place: y_batch})\n            \n            if (epoch+1) % 50 == 0:\n                acc_batch = accuracy.eval(feed_dict={x_place: x_batch, y_place: y_batch})\n                loss_batch = loss.eval(feed_dict={x_place: x_batch, y_place: y_batch})\n                epoch_acc = epoch_acc + acc_batch\n                epoch_loss = epoch_loss + loss_batch\n                iterations += 1\n            \n        if (epoch+1) % 50 == 0:\n            acc_train = accuracy.eval(feed_dict = {x_place: x_batch, y_place:y_batch})\n            summary1, summary2 = sess.run([loss_summary, accuracy_summary], feed_dict = {x_place: train_x, y_place:encoded_train_y})\n            \n            writer.add_summary(summary1, epoch+1)\n            writer.add_summary(summary2, epoch+1)\n            print(datetime.datetime.now())\n            print(epoch+1)\n            print("Epoch acc: "+ str(epoch_acc/iterations))\n            print("Epoch loss: "+ str(epoch_loss/iterations))\n            \n    save_path = saver.save(sess, "./my_model_final.ckpt")\n\nwriter.close()\n')
  File "/Users/Beni/miniconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2352, in run_cell_magic
    result = fn(*args, **kwargs)
  File "</Users/Beni/miniconda3/lib/python3.6/site-packages/decorator.py:decorator-gen-61>", line 2, in time
  File "/Users/Beni/miniconda3/lib/python3.6/site-packages/IPython/core/magic.py", line 187, in <lambda>
    call = lambda f, *a, **k: f(*a, **k)
  File "/Users/Beni/miniconda3/lib/python3.6/site-packages/IPython/core/magics/execution.py", line 1291, in time
    exec(code, glob, local_ns)
  File "<timed exec>", line 8, in <module>
  File "/Users/Beni/miniconda3/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 832, in __init__
    self.build()
  File "/Users/Beni/miniconda3/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 844, in build
    self._build(self._filename, build_save=True, build_restore=True)
  File "/Users/Beni/miniconda3/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 881, in _build
    build_save=build_save, build_restore=build_restore)
  File "/Users/Beni/miniconda3/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 513, in _build_internal
    restore_sequentially, reshape)
  File "/Users/Beni/miniconda3/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 354, in _AddRestoreOps
    assign_ops.append(saveable.restore(saveable_tensors, shapes))
  File "/Users/Beni/miniconda3/lib/python3.6/site-packages/tensorflow/python/training/saving/saveable_object_util.py", line 73, in restore
    self.op.get_shape().is_fully_defined())
  File "/Users/Beni/miniconda3/lib/python3.6/site-packages/tensorflow/python/ops/state_ops.py", line 223, in assign
    validate_shape=validate_shape)
  File "/Users/Beni/miniconda3/lib/python3.6/site-packages/tensorflow/python/ops/gen_state_ops.py", line 64, in assign
    use_locking=use_locking, name=name)
  File "/Users/Beni/miniconda3/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 788, in _apply_op_helper
    op_def=op_def)
  File "/Users/Beni/miniconda3/lib/python3.6/site-packages/tensorflow/python/util/deprecation.py", line 507, in new_func
    return func(*args, **kwargs)
  File "/Users/Beni/miniconda3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 3300, in create_op
    op_def=op_def)
  File "/Users/Beni/miniconda3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1801, in __init__
    self._traceback = tf_stack.extract_stack()

InvalidArgumentError (see above for traceback): Restoring from checkpoint failed. This is most likely due to a mismatch between the current graph and the graph from the checkpoint. Please ensure that you have not altered the graph expected based on the checkpoint. Original error:

Assign requires shapes of both tensors to match. lhs shape= [23,13] rhs shape= [22,13]
	 [[node save/Assign_5 (defined at <timed exec>:8) ]]


In [None]:
predictions = le.inverse_transform(y_pred)

In [None]:
round(sum(test_y == predictions)/len(test_y),3)

In [None]:
pd.DataFrame({'label': y_batch, 'pred': y_pred[-200]})

In [None]:
combined = pd.DataFrame(test_x).join(pd.DataFrame({'label': test_y, 'pred': predictions}))

In [None]:
pd.Series(y_pred).value_counts()

In [None]:
pd.set_option('display.max_rows', 10000)
combined[10000:15000]

In [None]:
import matplotlib.pyplot as plt

train_data_reduced = train_data[train_data.index % 200 == 0]
plt.scatter(train_data_reduced.index, train_data_reduced.mid_price)
plt.show()

In [None]:
test_data_reduced = test_data[test_data.index % 200 == 0]
plt.scatter(test_data_reduced.index, test_data_reduced.mid_price)
plt.show()

In [None]:
print(bad_miss)

In [None]:
print(small_miss)

In [None]:
print(correct)

In [None]:
pd.Series(test_y - predictions).value_counts()

In [None]:
print(sum(np.where((test_y == -1) & (predictions == -1) ,1,0)))
print(sum(np.where((test_y == 0) & (predictions == 0) ,1,0)))
print(sum(np.where((test_y == 1) & (predictions == 1) ,1,0)))

In [None]:
print(sum(np.where((test_y == -1) & (predictions == 0) ,1,0)))
print(sum(np.where((test_y == -1) & (predictions == 1) ,1,0)))

In [None]:
print(sum(np.where((test_y == 0) & (predictions == -1) ,1,0)))
print(sum(np.where((test_y == 0) & (predictions == 1) ,1,0)))

In [None]:
print(sum(np.where((test_y == 1) & (predictions == -1) ,1,0)))
print(sum(np.where((test_y == 1) & (predictions == 0) ,1,0)))

In [None]:
pd.Series(train_y).value_counts()/len(train_y)

In [None]:
len(test_data)

In [None]:
train[1000:2000]