# Double DQN Trader


In [4]:
import tensorflow as tf
import numpy as np
from helpers import get_stock_code_and_action


# Initial

### Constants

In [14]:
s_space = 3
a_space = 3
learning_rate = 0.003
buffer_size = 10000

buffer = np.zeros((buffer_size, s_space + 1 + 1 + s_space))
buffer_length = 0
gamma = 0.9
codes = ['600036']
mode = 'train'

### Input Parameters

In [6]:
s = tf.placeholder(tf.float32, [None, s_space])
s_next = tf.placeholder(tf.float32, [None, s_space])
q_next = tf.placeholder(tf.float32, [None, a_space])

### NN

Two NN with same structure, 3 densor layer

In [7]:
def _build_critic_nn(s, scope):
    w_init, b_init = tf.random_normal_initializer(.0, .3), tf.constant_initializer(.1)
    with tf.variable_scope(scope):
        s_first_dense = tf.layers.dense(s,
                                       32,
                                       activation=tf.nn.relu,
                                       kernel_initializer=w_init,
                                       bias_initializer=b_init)
        s_second_dense = tf.layers.dense(s_first_dense,
                                        32,
                                        activation=tf.nn.relu,
                                        kernel_initializer=w_init,
                                        bias_initializer=b_init)
        q = tf.layers.dense(s_second_dense,
                            a_space,
                            kernel_initializer=w_init,
                            bias_initializer=b_init)
        return q
    

#### Generate NN

Generate NN with two layer `eval` and `target`

In [8]:
q_eval = _build_critic_nn(s, 'q_eval')
q_target = _build_critic_nn(s_next, 'q_next')

### Output Parameter

In [9]:
with tf.variable_scope('loss'):
    loss = tf.reduce_mean(tf.squared_difference(q_next, q_eval))

with tf.variable_scope('train'):
    train_op = tf.train.RMSPropOptimizer(learning_rate).minimize(loss)
    
e_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='q_eval')
t_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='q_target')
update_q_target_op = [tf.assign(t, e) for t, e in zip(t_params, e_params)]

# Train

Initial tf config

In [10]:
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
config.allow_soft_placement = True

Initial tf session

In [11]:
session = tf.Session(config=config)

In [12]:
def get_transition_batch():
    indices = np.random.choice(buffer_size, size.batch_size)
    batch = buffer[indices, :]
    s = batch[:, :s_space]
    a = batch[:, s_space: self.s_space + 1]
    r = batch[:, -s_space - 1: -s_space]
    s_next = batch[:, -s_space:]
    return s, a, r, s_next

In [13]:
def train():
    # 1. If buffer length is less than buffer size, return
    if buffer_length < self.buffer_size:
        return

    # 2. Update Q-Target if need
    if total_step % update_q_target_step == 0:
        session.run(self.update_q_target_op)
    
    # 3. Get transition bath
    s, a, r, s_next = get_transition_batch()

    # 4. Calculate q_eval_next.
    q_eval_next = self.session.run(self.q_eval, {self.s: s_next})

    # 5. Get action indices and make batch indices.
    a_indices = np.argmax(q_eval_next, axis=1)
    b_indices = np.arange(self.batch_size, dtype=np.int)

    # 6. Calculate q_target_next selected by actions.
    q_target_next = self.session.run(q_target, {s_next: s_next})
    q_target_next_with_a = q_target_next[b_indices, a_indices]

    # 7. Calculate labels.
    q_eval = session.run(q_eval, {s: s})
    q_next = q_eval.copy()
    q_next[b_indices, a.astype(np.int)] = r + gamma * q_target_next_with_a

    # 8. Calculate loss.
    _, critic_loss = session.run([train_op, loss], {s: s, q_next: q_next})

    # 9. Increase total step
    total_step += 1

# Predict

In [15]:
def predict(s):
    q = session.run(q_eval, {s: s})
    a = np.argmax(q)
    return get_stock_code_and_action(codes, a, use_greedy=True, use_prob=True if mode == 'train' else False))

SyntaxError: invalid syntax (<ipython-input-15-a455c4504df9>, line 4)

# RQAlpha Strategy


In [23]:
import rqalpha

from rqalpha.api import *
from sklearn.preprocessing import MinMaxScaler

Backtest config

In [24]:
config = {
  "base": {
    "start_date": "2017-01-01",
    "end_date": "2018-01-02",
    "benchmark": "000300.XSHG",
    "accounts": {
        "stock": 100000
    }
  },
  "extra": {
    "log_level": "warning",
  },
  "mod": {
    "sys_analyser": {
      "enabled": True,
      "plot": True
    }
  }
}

### Strategy

In [18]:
def init(context):
    context.s1 = codes[0]

def before_trading(context):
    pass

def handle_bar(context, bar_dict):
    s = process_data(context, bar_dict)

    if s is None:
        return

    c, a, _ = predict(s)
    train()
    
    if a == ActionCode.Buy:
        order(context.s1, 0.10)
        buy_open(context.s1, 100)

    elif a == ActionCode.Sell:
        buy_open(context.s1, 0)

def stoploss():
    pass

def after_trading(context):
    pass


### Run strategy

In [None]:
rqalpha.run_func(init=strategy.init,
             before_trading=before_trading,
             handle_bar=handle_bar,
             after_trading=after_trading,
             config=config)