In [2]:
# import the packages
from __future__ import print_function
import tensorflow as tf
from tensorflow.contrib import rnn
import numpy as np
import pandas as pd
import os
import datetime
import random

In [3]:
# Training Parameters
learning_rate = 0.005
training_steps = 2000
batch_size = 500
display_step = 100

# Network Parameters
num_input = 3 # the stock price, the sector, and the volume
timesteps = 240 # timesteps
num_hidden = 50 # hidden layer num of features
num_classes = 1 # above or below the median
dropout = 0.1
threshold = tf.constant(0.5)

In [4]:
# tf Graph input
X = tf.placeholder("float", [None, timesteps, num_input])
Y = tf.placeholder("float", [None, num_classes])

# Define weights
weights = {
    'out': tf.Variable(tf.random_normal([num_hidden, num_classes]))
}
biases = {
    'out': tf.Variable(tf.random_normal([num_classes]))
}

In [5]:
def RNN(x, weights, biases):
    # Current data input shape: (batch_size, timesteps, num_input)
    # Required shape: 'timesteps' tensors list of shape (batch_size, num_input)

    # Unstack to get a list of 'timesteps' tensors of shape (batch_size, num_input)
    x = tf.unstack(x, timesteps, 1)

    # Define a lstm cell with tensorflow
    lstm_cell = rnn.BasicLSTMCell(num_hidden, forget_bias=0.8)

    # Apply the Dropout
    lstm_cell = tf.nn.rnn_cell.DropoutWrapper(lstm_cell, input_keep_prob=1.0, output_keep_prob=1.0 - dropout,
                                              state_keep_prob=1.0 - dropout)

    # Get lstm cell output
    outputs, states = rnn.static_rnn(lstm_cell, x, dtype=tf.float32)

    # Linear activation, using rnn inner loop last output
    return tf.matmul(outputs[-1], weights['out']) + biases['out']

logits = RNN(X, weights, biases)
prediction = tf.nn.sigmoid(logits) # for prediction, [0, 1]

In [7]:
# Define loss and optimizer
x_entropy = tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=Y)
loss_op = tf.reduce_mean(x_entropy)
optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(loss_op)

# Evaluate model (with test logits, for dropout to be disabled)
delta = tf.abs((Y - prediction))
correct_pred = tf.cast(tf.less(delta, threshold), tf.int32)
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

# Initialize the variables (i.e. assign their default value)
init = tf.global_variables_initializer()

# the tool to save the results
saver = tf.train.Saver()

label = list(range(timesteps * num_input)) + ['target'] + ['ticker'] + ['target_date'] + ['sector']

In [8]:
for i in range(13):
    # read the data
    training_name = os.path.join(os.getcwd(), 'data\\Set_' + str(i) + '_Train.csv')
    testing_name = os.path.join(os.getcwd(), 'data\\Set_' + str(i) + '_Test.csv')
    train_data = pd.read_csv(training_name, index_col=0)
    test_data = pd.read_csv(testing_name, index_col=0)

    train_data.columns = label
    test_data.columns = label

    training_label = train_data.iloc[:, timesteps * num_input]
    training_data = train_data.iloc[:, :timesteps * num_input]
    testing_label = test_data.iloc[:, timesteps * num_input]
    testing_data = test_data.iloc[:, :timesteps * num_input]

    # Start training
    with tf.Session() as sess:
        # print the training info
        print("-------------------------------------------------------------------------------------------------------")
        print("Training the model for Training Set " + str(i) + " from " +
              datetime.datetime.strftime(datetime.datetime.now(), '%Y-%m-%d %H:%M:%S') + "...")
        print("-------------------------------------------------------------------------------------------------------")

        # Run the initializer
        sess.run(init)

        # Restore model weights from previously saved model
        if i != 0:
            load_path = saver.restore(sess, log_path)
            print("Model restored from file: %s" % save_path)

        for step in range(training_steps):
            batch_ind = random.sample(range(len(train_data)), batch_size)
            batch = train_data.iloc[batch_ind, :]

            # query the data from the data set
            batch_x = np.array(batch.iloc[:, :timesteps * num_input])
            batch_x = batch_x.reshape((batch_size, timesteps, num_input), order = 'F')
            batch_y = np.array(batch.iloc[:, timesteps * num_input])
            batch_y = batch_y.reshape((batch_size, num_classes))

            # Run optimization op (backprop)
            sess.run(train_op, feed_dict={X: batch_x, Y: batch_y})
            if step % display_step == 0:
                # Calculate batch loss and accuracy
                loss, acc = sess.run([loss_op, accuracy], feed_dict={X: batch_x, Y: batch_y})
                print("Step " + str(step) + ",bibatch Loss = " + \
                      "{:.4f}".format(loss) + ", Training Accuracy = " + \
                      "{:.3f}".format(acc))

        testing_data = np.array(testing_data).reshape((len(testing_data), timesteps, num_input), order = 'F')
        testing_label = np.array(testing_label).reshape((len(testing_label), num_classes))
        training_data = np.array(training_data).reshape((len(training_data), timesteps, num_input), order = 'F')
        training_label = np.array(training_label).reshape((len(training_label), num_classes))
        print("Overall Training Accuracy:", sess.run(accuracy, feed_dict={X: training_data, Y: training_label}))
        print("Testing Accuracy:", sess.run(accuracy, feed_dict={X: testing_data, Y: testing_label}))

        log_path = os.path.join(os.getcwd(), 'Logs\\model_for_period_' + str(i))
        save_path = saver.save(sess, log_path)
        print("Model saved in file: %s" % save_path)

        pred = sess.run(prediction, feed_dict={X: testing_data, Y: testing_label})
        pred = pred.reshape((1, len(pred))).tolist()[0]
        output_data = pd.DataFrame({'y_prob': pred, 'y_true': test_data['target'], 'Ticker': test_data['ticker'],
                                    'Date': test_data['target_date'], 'Sector': test_data['sector'], })
        output_path = os.path.join(os.getcwd(), 'Pred\\prediction_period_' + str(i) + '.csv')
        output_data.to_csv(output_path)
        print('Prediction for period ' + str(i) + ' successfully shaved.')

-------------------------------------------------------------------------------------------------------
Training the model for Training Set 0 from 2018-12-30 23:14:23...
-------------------------------------------------------------------------------------------------------
Step 0,bibatch Loss = 0.7968, Training Accuracy = 0.490
Step 100,bibatch Loss = 0.6956, Training Accuracy = 0.486
Step 200,bibatch Loss = 0.6922, Training Accuracy = 0.520
Step 300,bibatch Loss = 0.6893, Training Accuracy = 0.504
Step 400,bibatch Loss = 0.6892, Training Accuracy = 0.536
Step 500,bibatch Loss = 0.6923, Training Accuracy = 0.508
Step 600,bibatch Loss = 0.6885, Training Accuracy = 0.540
Step 700,bibatch Loss = 0.6921, Training Accuracy = 0.508
Step 800,bibatch Loss = 0.6914, Training Accuracy = 0.514
Step 900,bibatch Loss = 0.6862, Training Accuracy = 0.526
Step 1000,bibatch Loss = 0.6948, Training Accuracy = 0.470
Step 1100,bibatch Loss = 0.6920, Training Accuracy = 0.480
Step 1200,bibatch Loss = 0.690

Step 300,bibatch Loss = 0.6862, Training Accuracy = 0.522
Step 400,bibatch Loss = 0.6760, Training Accuracy = 0.558
Step 500,bibatch Loss = 0.6784, Training Accuracy = 0.528
Step 600,bibatch Loss = 0.6721, Training Accuracy = 0.536
Step 700,bibatch Loss = 0.6786, Training Accuracy = 0.538
Step 800,bibatch Loss = 0.6735, Training Accuracy = 0.570
Step 900,bibatch Loss = 0.6799, Training Accuracy = 0.556
Step 1000,bibatch Loss = 0.6688, Training Accuracy = 0.582
Step 1100,bibatch Loss = 0.6712, Training Accuracy = 0.564
Step 1200,bibatch Loss = 0.6783, Training Accuracy = 0.536
Step 1300,bibatch Loss = 0.6499, Training Accuracy = 0.588
Step 1400,bibatch Loss = 0.6701, Training Accuracy = 0.542
Step 1500,bibatch Loss = 0.6645, Training Accuracy = 0.596
Step 1600,bibatch Loss = 0.6758, Training Accuracy = 0.538
Step 1700,bibatch Loss = 0.6768, Training Accuracy = 0.556
Step 1800,bibatch Loss = 0.6502, Training Accuracy = 0.568
Step 1900,bibatch Loss = 0.6754, Training Accuracy = 0.566
Over

Step 1100,bibatch Loss = 0.6865, Training Accuracy = 0.564
Step 1200,bibatch Loss = 0.6911, Training Accuracy = 0.496
Step 1300,bibatch Loss = 0.6858, Training Accuracy = 0.500
Step 1400,bibatch Loss = 0.6769, Training Accuracy = 0.560
Step 1500,bibatch Loss = 0.6806, Training Accuracy = 0.524
Step 1600,bibatch Loss = 0.6819, Training Accuracy = 0.580
Step 1700,bibatch Loss = 0.6724, Training Accuracy = 0.596
Step 1800,bibatch Loss = 0.6695, Training Accuracy = 0.578
Step 1900,bibatch Loss = 0.6718, Training Accuracy = 0.564
Overall Training Accuracy: 0.5395774
Testing Accuracy: 0.50348103
Model saved in file: C:\Users\Dian\OneDrive\Studying\Quantitative Finance\Deep Learning\Project\Logs\model_for_period_8
Prediction for period 8 successfully shaved.
-------------------------------------------------------------------------------------------------------
Training the model for Training Set 9 from 2018-12-31 04:23:49...
--------------------------------------------------------------------

Step 1900,bibatch Loss = 0.6805, Training Accuracy = 0.566
Overall Training Accuracy: 0.5525863
Testing Accuracy: 0.49951407
Model saved in file: C:\Users\Dian\OneDrive\Studying\Quantitative Finance\Deep Learning\Project\Logs\model_for_period_12
Prediction for period 12 successfully shaved.
