In [1]:
from __future__ import print_function

import pickle
import seaborn as sns
import numpy as np
import pandas as pd
import json
import os
import copy
import time
import tensorflow as tf
import matplotlib.pyplot as plt
%matplotlib inline

import sys
sys.path.append("/home/tomoaki/work")
import my_utils

In [2]:
input_list = ['MMM', 'T', 'ABBV', 'ABT', 'ACN', 'AGN', 'ALL', 'GOOGL', 
              'GOOG', 'MO', 'AMZN', 'AXP', 'AIG', 'AMGN', 'AAPL', 'BAC', 'BIIB', 
              'BLK', 'BA', 'BMY', 'CVS', 'COF', 'CAT', 'CELG', 'CVX', 'CSCO', 'C', 
              'KO', 'CL', 'CMCSA', 'COP', 'COST', 'DHR', 'DOW', 'DUK', 'DD', 'EMC', 
              'EMR', 'EXC', 'XOM', 'FB', 'FDX', 'F', 'GD', 'GE', 'GM', 'GILD', 'GS', 'HAL', 
              'HD', 'HON', 'INTC', 'IBM', 'JPM', 'JNJ', 'KMI', 'LLY', 'LMT', 'LOW', 'MA', 
              'MCD', 'MDT', 'MRK', 'MET', 'MSFT', 'MDLZ', 'MON', 'MS', 'NKE', 'OXY', 
              'ORCL', 'PEP', 'PFE', 'PM', 'PG', 'QCOM', 'RTN', 'SLB', 'SPG', 'SO', 'SBUX', 
              'TGT', 'TXN', 'BK', 'PCLN', 'TWX', 'FOXA', 'FOX', 'USB', 'UNP', 'UPS', 'UTX', 
              'UNH', 'VZ', 'V', 'WMT', 'WBA', 'DIS', 'WFC']

In [3]:
print ("Started!!")

st = time.time()
start_date="2014-04-01"
end_date="2016-04-01"
input_data, input_list = my_utils.get_fixed_data(input_list, start_date=start_date, end_date=end_date) 
elapsed = time.time() - st
print ("time for getting data:", elapsed)

Started!!
('fail_name_list: ', [])
time for getting data: 295.864296913


In [6]:
print (input_data.values.shape)

(505, 99)


In [7]:
print(input_data.index)

DatetimeIndex(['2014-04-01', '2014-04-02', '2014-04-03', '2014-04-04',
               '2014-04-07', '2014-04-08', '2014-04-09', '2014-04-10',
               '2014-04-11', '2014-04-14',
               ...
               '2016-03-18', '2016-03-21', '2016-03-22', '2016-03-23',
               '2016-03-24', '2016-03-28', '2016-03-29', '2016-03-30',
               '2016-03-31', '2016-04-01'],
              dtype='datetime64[ns]', length=505, freq=None)


# DDPG for trading


Given data, you are going to learn how to manage your portfolio.

In [8]:
# memory for replay
from collections import deque, namedtuple

Experiecne = namedtuple('Experience', 'state0, action, reward, state1')

class RingBuffer(object):
    def __init__(self, maxlen):
        self.maxlen = maxlen
        self.start = 0
        self.length = 0
        # self.data = [None for _ in range(maxlen)]
        self.data = []
        
    def __len__(self):
        return self.length
    
    def __getitem__(self, idx):
        if idx< 0 or idx >= self.length:
            raise KeyError()
        return self.data[(self.start + idx) % self.maxlen]
    
    def append(self, v):
        if self.length < self.maxlen:
            # We have space, simply increase the length
            self.length += 1
        elif self.length == self.maxlen:
            # No space, "remove" the first item
            self.data[1:] = self.data[:-1]
        else:
            # This should never happen
            raise RuntimeError()
        self.data.append(v)
        
class SequentialMemory(object):
    def __init__(self, limit=1000):
        self.limit = limit
        
        self.actions = RingBuffer(limit)
        self.rewards = RingBuffer(limit)
        self.observations = RingBuffer(limit)
        
    def sample(self, batch_size, window_length):
        # draw random indexes such that is bigger than window_length to enough length data
        batch_idx = np.random.random_integers(window_length, self.nb_entries - 1, size=batch_size)
        assert len(batch_idx) == batch_size
        
        # create experiences
        state0 = np.array([[self.observations[i] for i in range(idx - window_length,idx)] for idx in batch_idx])
        action = np.array([self.actions[idx - 1] for idx in batch_idx])
        reward = np.array([self.rewards[idx - 1] for idx in batch_idx])
        state1 = np.array([[self.observations[i] for i in range(idx - window_length + 1,idx + 1)] for idx in batch_idx])
        return Experiecne(state0, action, reward, state1)
    
    def append(self, observation, action, reward):
        self.observations.append(observation)
        self.actions.append(action)
        self.rewards.append(reward)
    
    @property
    def nb_entries(self):
        return  len(self.observations)
    

In [27]:
import tensorflow as tf
from keras.layers.convolutional import Convolution2D
from keras.layers.pooling import MaxPooling2D 
from keras.layers.core import Flatten, Lambda
from keras.models import Sequential
from keras.layers import BatchNormalization
from keras.layers import Activation
from keras.layers.core import Dense
from keras.engine.topology import Merge
from keras.layers.advanced_activations import PReLU
from keras import backend as K
import time
tf.python.control_flow_ops = tf
import warnings
warnings.filterwarnings("ignore")

class DDPG(object):
    def __init__(self, config):
        """initialized approximate value function
        config should have the following attributes
        Args:
            trade_stock_idx(int): trading stock index
            gam (float): discount factor
            n_history(int): nubmer of history that will be used as input
        """
        self.activation = config.activation
        self.gamma = config.gamma
        self.history_length = config.history_length
        self.n_stock = config.n_stock
        self.n_feature = config.n_feature
        self.n_smooth = config.n_smooth
        self.n_down = config.n_down
        self.k_w = config.k_w
        self.n_hidden = config.n_hidden
        self.n_batch = config.n_batch
        self.n_epochs = config.n_epochs
        self.update_rate = config.update_rate
        self.lr = config.learning_rate
        # the actual dimention of input
        self.n_input = (1 + self.n_smooth + self.n_down) * self.n_stock
        # the length of the data as input
        self.n_history = max(self.n_smooth + self.history_length - 1, self.n_down * self.history_length)
        print ("building model....")
        K.clear_session()
        self.sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=False))
        K.set_session(self.sess)
        with self.sess.as_default():
            self.build_model()
        print('finished!')
    
    def training(self, input_data):
        """training DQN which consider three actions; sell, buy, hold
              money and n_stock are considered as state variable
        
        Args:
            data (list): stock price for one company
            n_memory (int): the number of data that is used for Experience Replay
            init_cash (float): initial available cash
            update_target_freq (int): frequency of update for target network
        """
        # since target value has large scale, we will have normalization
        # trade_stock = stock_data[:, 0]
        # self.scale = trade_stock[0]
        # trade_stock = trade_stock / self.scale
        init_op = tf.initialize_all_variables()
        
        saver = tf.train.Saver()
        stock_data = input_data.values
        date = input_data.index
        T = len(stock_data)
        print_freq = int(T / 10)
        if print_freq == 0:
            print_freq = 1
        # print_freq = 1
        print ("training....")
        st = time.time()
        result_history = []
        portfolio = np.zeros(self.n_stock)
        value = 0
        K.set_session(self.sess)
        self.sess.run(init_op)
        memory = SequentialMemory()
        # analyze which timing sell and buy are executed
        # assumed that in the first n_history we do nothing
        for t in xrange(T - 1):
            # until having enough data, just stock data 
            if t < self.n_history:
                action = np.zeros(self.n_stock)
                reward = 0
                memory.append(stock_data[t], action, reward)
                continue
            price = stock_data[t]
            future_price = stock_data[t + 1]
            # stock memory and update portfolio
            memory.observations.append(price)
            feature = [memory.observations[idx] for idx in xrange(memory.observations.length - self.n_history, memory.observations.length)]
            action = np.round(self.actor_output.eval(session=self.sess,
                                      feed_dict={self.state: [feature], K.learning_phase(): 1})[0])
            
            reward = np.sum((future_price - price) * action)
            memory.rewards.append(reward)
            memory.actions.append(action)
            value += reward
            # update portfolio
            portfolio += action
            result_history.append(reward)
            for epoch in range(self.n_epochs):    
                # select transition from pool
                experiences = memory.sample(self.n_batch, self.n_history)
                self.sess.run(self.critic_optim, 
                                feed_dict={self.state: experiences.state0,
                                           self.state_target: experiences.state1,
                                           self.reward: experiences.reward,
                                           self.action: experiences.action,
                                           self.learning_rate: self.lr,
                                           K.learning_phase(): 1})  
                self.sess.run(self.actor_optim,
                                    feed_dict={self.state: experiences.state0,
                                               self.learning_rate: self.lr,
                                               K.learning_phase(): 1})  
                    
                # softupdate critic network
                # print ("update!")
                old_weights = self.critic_target.get_weights()
                # print ("target weights", old_weights[0][0][0])
                new_weights = self.critic.get_weights()
                weights = [self.update_rate * new_w + (1 - self.update_rate) * old_w for new_w, old_w in zip(new_weights, old_weights)]
                self.critic_target.set_weights(weights)
                # print ("weights", weights[0][0][0])
                # softupdate actor network
                old_weights = self.actor_target.get_weights()
                # print ("target weights", old_weights[0][0][0])
                new_weights = self.actor.get_weights()
                weights = [self.update_rate * new_w + (1 - self.update_rate) * old_w for new_w, old_w in zip(new_weights, old_weights)]
                self.actor_target.set_weights(weights)
                # print ("weights", weights[0][0][0])
                
                 
            if t % print_freq == 0:
                print ("time:",  date[t + 1])
                print("value:", value)
                print("portfolio:", portfolio)
                print ("elapsed time", time.time() - st)    
            
        # save_path = saver.save(sess, "/home/tomoaki/alpaca/notebooks/tomoaki/DQN/trained_params.ckpt")
        # save_path = saver.save(sess, "/jupyter/tomoaki/DQN/trained_params.ckpt")
        # print("Model saved in file: %s" % save_path)
        # print ("elapsed time: ", time.time() - st)
        print ("finished")
           
        return np.cumsum(np.array(result_history))
    
    def build_model(self):
        # just for conveninece of trainig, seprate placehoder for critic and target network
        # critic network input  should be [raw_data, smoothed, downsampled, action]
        self.critic = self.build_critic()
        self.critic_target = self.build_critic()
        # actor network input should be [raw_data, smoothed, downsampled]
        self.actor = self.build_actor()
        self.actor_target = self.build_actor()
        # transform input into the several scales and smoothing
        self.state =  tf.placeholder(tf.float32, [None, self.n_history, self.n_stock], name='state')
        self.state_target = tf.placeholder(tf.float32, [None, self.n_history, self.n_stock], name='state_target')
        # reshape to convolutional input
        state_ = tf.reshape(self.state, [-1, self.n_history, self.n_stock, 1])
        state_target_ = tf.reshape(self.state_target, [-1, self.n_history, self.n_stock, 1])
        raw, smoothed, down = self.transform_input(state_)
        raw_target, smoothed_target, down_target = self.transform_input(state_target_)
        
        # build graph for citic training
        self.action = tf.placeholder(tf.float32, [None, self.n_stock])
        input_q = [raw,] +  smoothed + down + [self.action,]
        self.Q = tf.squeeze(self.critic(input_q))#####
        # target network
        self.actor_target_output = self.actor_target([raw_target,] +  smoothed_target + down_target)
        input_q_target = [raw_target,] +  smoothed_target + down_target + [self.actor_target_output,]
        Q_target = tf.squeeze(self.critic_target(input_q_target))
        self.reward = tf.placeholder(tf.float32, [None], name='reward')
        target = self.reward  + self.gamma * Q_target
        # optimization
        self.learning_rate = tf.placeholder(tf.float32, shape=[], name="learning_rate")
        self.loss = tf.reduce_mean(tf.square(target - self.Q), name='loss')
        self.critic_optim = tf.train.AdamOptimizer(self.learning_rate) \
            .minimize(self.loss, var_list=self.critic.trainable_weights)
        
        # build graph for actor training
        self.actor_output = self.actor([raw,] +  smoothed + down)
        input_q_actor = [raw,] +  smoothed + down + [self.actor_output,]
        self.Q_actor = tf.squeeze(self.critic(input_q_actor))
        # optimization
        self.actor_optim = tf.train.AdamOptimizer(self.learning_rate) \
            .minimize(-self.Q_actor, var_list=self.actor.trainable_weights)
        
        # initialize network
        # tf.initialize_all_variables().run(session=self.sess)
        weights = self.critic.get_weights()
        self.critic_target.set_weights(weights)
        weights = self.actor.get_weights()
        self.actor_target.set_weights(weights)
        
    def build_critic(self):
        # recieve convereted tensor: raw_data, smooted_data, and downsampled_data
        nf = self.n_feature
        # smoothed input
        sm_model = [Sequential() for _ in range(self.n_smooth - 1)]
        for m in sm_model:
            m.add(Convolution2D(nb_filter=nf, nb_row=self.k_w, nb_col=1, border_mode='same', input_shape=(self.history_length, self.n_stock, 1)))
            m.add(BatchNormalization(mode=2, axis=-1))
            m.add(PReLU())
        # down sampled input
        dw_model = [Sequential() for _ in range(self.n_down - 1)]
        for m in dw_model:
            m.add(Convolution2D(nb_filter=nf, nb_row=self.k_w, nb_col=1, border_mode='same', input_shape=(self.history_length, self.n_stock, 1)))
            m.add(BatchNormalization(mode=2, axis=-1))
            m.add(PReLU())
        # raw input
        state = Sequential()
        # layer1
        nf = self.n_feature
        state.add(Convolution2D(nb_filter=nf, nb_row=self.k_w, nb_col=1, border_mode='same', input_shape=(self.history_length, self.n_stock, 1)))
        state.add(BatchNormalization(mode=2, axis=-1))
        state.add(PReLU())
        merged = Merge([state,] + sm_model + dw_model, mode='concat', concat_axis=-1)
        # layer2
        nf = nf * 2
        merged_state = Sequential()
        merged_state.add(merged)
        # model.add(SpatialDropout2D(0.5))
        merged_state.add(Convolution2D(nb_filter=nf, nb_row=self.k_w, nb_col=1, border_mode='same'))
        merged_state.add(BatchNormalization(mode=2, axis=-1))
        merged_state.add(PReLU())
        merged_state.add(Flatten())
        # layer3
        action = Sequential()
        action.add(Lambda(lambda x: x, input_shape=(self.n_stock,)))
        action.add(BatchNormalization(mode=1, axis=-1))
        merged = Merge([merged_state, action], mode='concat')
        model = Sequential()
        model.add(merged)
        model.add(Dense(self.n_hidden))
        model.add(BatchNormalization(mode=1, axis=-1))
        model.add(PReLU())
        # layer4
        # model.add(Dropout(0.5))
        model.add(Dense(int(np.sqrt(self.n_hidden))))
        # model.add(BatchNormalization(mode=1, axis=-1))
        model.add(PReLU())
        # output
        # model.add(Dropout(0.5))
        model.add(Dense(1))
        return model
    
    def build_actor(self):
        # recieve convereted tensor: raw_data, smooted_data, and downsampled_data
        nf = self.n_feature
        # layer1
        # smoothed input
        sm_model = [Sequential() for _ in range(self.n_smooth - 1)]
        for m in sm_model:
            m.add(Convolution2D(nb_filter=nf, nb_row=self.k_w, nb_col=1, border_mode='same', input_shape=(self.history_length, self.n_stock, 1)))
            m.add(BatchNormalization(mode=2, axis=-1))
            m.add(PReLU())
        # down sampled input
        dw_model = [Sequential() for _ in range(self.n_down - 1)]
        for m in dw_model:
            m.add(Convolution2D(nb_filter=nf, nb_row=self.k_w, nb_col=1, border_mode='same', input_shape=(self.history_length, self.n_stock, 1)))
            m.add(BatchNormalization(mode=2, axis=-1))
            m.add(PReLU())
        # raw input
        state = Sequential()
        state.add(Convolution2D(nb_filter=nf, nb_row=self.k_w, nb_col=1, border_mode='same', input_shape=(self.history_length, self.n_stock, 1)))
        state.add(BatchNormalization(mode=2, axis=-1))
        state.add(PReLU())
        state.build()
        merged = Merge([state,] + sm_model + dw_model, mode='concat')
        # layer2
        nf = nf * 2
        model = Sequential()
        model.add(merged)
        # model.add(SpatialDropout2D(0.5))
        model.add(Convolution2D(nb_filter=nf, nb_row=self.k_w, nb_col=1, border_mode='same'))
        model.add(BatchNormalization(mode=2 , axis=-1))
        model.add(PReLU())
        model.add(Flatten())
        # layer3
        model.add(Dense(self.n_hidden))
        model.add(BatchNormalization(mode=1, axis=-1))
        model.add(PReLU())
        # layer4
        # model.add(Dropout(0.5))
        model.add(Dense(int(np.sqrt(self.n_hidden))))
        # model.add(BatchNormalization(mode=1, axis=-1))
        model.add(PReLU())
        # output
        # model.add(Dropout(0.5))
        model.add(Dense(self.n_stock))
        return model
    
    def transform_input(self, input):
        # the last data is the newest information
        raw = input[:, self.n_history - self.history_length:, :, :]
        # smooth data
        smoothed = []
        for n_sm in range(2, self.n_smooth + 1):
            smoothed.append(
                tf.reduce_mean(tf.pack([input[:, self.n_history - st - self.history_length:self.n_history - st, :, :] for st in range(n_sm)]),0)
            )
        # downsample data
        down = []
        for n_dw in range(2, self.n_down + 1):
            sampled_ = tf.pack([input[:, idx, :, :] for idx in range(self.n_history-n_dw*self.history_length, self.n_history, n_dw)])
            down.append(tf.transpose(sampled_, [1, 0, 2, 3]))
        return raw, smoothed, down

In [28]:
# import numpy as np 
# input_data = np.zeros((505, 99))
n_stock = len(input_data.values[0])

class MultiDDPGConfig(object):
    activation = 'relu'
    gamma = 0.95
    history_length = 10
    n_stock = n_stock
    n_smooth = 3
    n_down = 3
    k_w = 3
    n_hidden = 100
    n_batch = 32
    n_epochs = 10
    n_feature = 5
    update_rate = 0.5
    learning_rate = 1e-3

In [None]:
import datetime
tf.reset_default_graph()
config = MultiDDPGConfig()

dqn = DDPG(config)
print ("start!")
exit_idx = dqn.training(input_data)
print ("finished!")

building model....


In [26]:
np.round([1.1, 2.452])

array([ 1.,  2.])