In [11]:
import argparse
import copy
import numpy as np
import os
import random
import tensorflow as tf

from time import time
try:
    from tensorflow.python.ops.nn_ops import leaky_relu
except ImportError:
    from tensorflow.python.framework import ops
    from tensorflow.python.ops import math_ops


    def leaky_relu(features, alpha=0.2, name=None):
        with ops.name_scope(name, "LeakyRelu", [features, alpha]):
            features = ops.convert_to_tensor(features, name="features")
            alpha = ops.convert_to_tensor(alpha, name="alpha")
            return math_ops.maximum(alpha * features, features)

from load_data import load_EOD_data
from evaluator import evaluate


In [51]:
class RankLSTM:
    def __init__(self, data_path, market_name, tickers_fname, parameters,
                 steps=1, epochs=50, batch_size=None, gpu=False):
        self.data_path = data_path
        self.market_name = market_name
        self.tickers_fname = tickers_fname
        # load data
        self.tickers = np.genfromtxt(tickers_fname,
                                     dtype=str, delimiter='\t', skip_header=False)
        ### DEBUG
        # self.tickers = self.tickers[0: 10]
        print('#tickers selected:', len(self.tickers))
        self.eod_data, self.mask_data, self.gt_data, self.price_data = \
            load_EOD_data(data_path, market_name, self.tickers, steps)

        self.parameters = copy.copy(parameters)
        self.steps = steps
        self.epochs = epochs
        if batch_size is None:
            self.batch_size = len(self.tickers)
        else:
            self.batch_size = batch_size

        self.valid_index = 756
        self.test_index = 1008
        self.trade_dates = self.mask_data.shape[1]
        self.fea_dim = 5

        self.gpu = gpu

    def get_batch(self, offset=None):
        if offset is None:
            offset = random.randrange(0, self.valid_index)
        seq_len = self.parameters['seq']
        mask_batch = self.mask_data[:, offset: offset + seq_len + self.steps]
        mask_batch = np.min(mask_batch, axis=1)
        return self.eod_data[:, offset:offset + seq_len, :], \
               np.expand_dims(mask_batch, axis=1), \
               np.expand_dims(
                   self.price_data[:, offset + seq_len - 1], axis=1
               ), \
               np.expand_dims(
                   self.gt_data[:, offset + seq_len + self.steps - 1], axis=1
               )

    def train(self):
        if self.gpu == True:
            device_name = '/gpu:0'
        else:
            device_name = '/cpu:0'
        print('device name:', device_name)
        with tf.device(device_name):
            tf.compat.v1.reset_default_graph() 

            ground_truth = tf.compat.v1.placeholder(tf.float32, [self.batch_size, 1])
            mask = tf.compat.v1.placeholder(tf.float32, [self.batch_size, 1])
            feature = tf.compat.v1.placeholder(tf.float32,
                [self.batch_size, self.parameters['seq'], self.fea_dim])
            base_price = tf.compat.v1.placeholder(tf.float32, [self.batch_size, 1])
            all_one = tf.ones([self.batch_size, 1], dtype=tf.float32)

            lstm_cell = tf.compat.v1.nn.rnn_cell.BasicLSTMCell(
                self.parameters['unit']
            )

            initial_state = lstm_cell.zero_state(self.batch_size,
                                                 dtype=tf.float32)
            
            outputs, _ = tf.compat.v1.nn.dynamic_rnn(
                lstm_cell, feature, dtype=tf.float32,
                initial_state=initial_state
            )

            seq_emb = outputs[:, -1, :]
            # One hidden layer
            prediction = tf.compat.v1.layers.dense(
                seq_emb, units=1, activation=leaky_relu, name='reg_fc',
                kernel_initializer=tf.compat.v1.glorot_uniform_initializer()
            )

            return_ratio = tf.compat.v1.div(tf.compat.v1.subtract(prediction, base_price), base_price)
            reg_loss = tf.compat.v1.losses.mean_squared_error(
                ground_truth, return_ratio, weights=mask
            )
            
            print('ground truth = ', ground_truth.shape)
            print('mask = ',mask.shape)
            print('feature = ',feature.shape)
            print('base price = ',base_price.shape)
            print('all one = ', all_one.shape)
            #print('lstm cell = ', lstm_cell.shape)
            print('outputs = ',outputs.shape)
            print('seq emb = ', seq_emb.shape)
            print('prediction = ',prediction.shape)
            print('return ration = ', return_ratio.shape)
            
            
            

In [52]:
if __name__ == '__main__':
    
    parameters = {'seq': int(16), 'unit': int(64), 'lr': float(0.001),
                  'alpha': float(1)}
    
    rank_LSTM = RankLSTM(
        data_path='2013-01-01',
        market_name='NASDAQ',
        tickers_fname='NASDAQ_tickers_qualify_dr-0.98_min-5_smooth.csv',
        parameters=parameters,
        steps=1, epochs=50, batch_size=None, gpu=False
    )

#tickers selected: 1026
single EOD data shape: (1245, 6)


In [53]:
pred_all = rank_LSTM.train()

device name: /cpu:0
ground truth =  (1026, 1)
mask =  (1026, 1)
feature =  (1026, 4, 5)
base price =  (1026, 1)
all one =  (1026, 1)
outputs =  (1026, 4, 64)
seq emb =  (1026, 64)
prediction =  (1026, 1)
return ration =  (1026, 1)


  lstm_cell = tf.compat.v1.nn.rnn_cell.BasicLSTMCell(
  prediction = tf.compat.v1.layers.dense(
