In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
sys.path.append(".")

In [3]:
import numpy as np
from keras_tqdm import TQDMNotebookCall

# import keras
from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten, CuDNNLSTM,LSTM
from keras.optimizers import Adam

# keras-rl agent
from rl.agents.dqn import DQNAgent
from rl.policy import BoltzmannQPolicy, EpsGreedyQPolicy
from rl.memory import SequentialMemory

# trader environment
from TraderEnv import OhlcvEnv
# custom normalizer
from util import NormalizerProcessor

Using TensorFlow backend.


In [4]:
from tempfile import mkstemp
from shutil import move, copymode, copy
from os import fdopen, remove

def replace(file_path, pattern, subst):
    #Create temp file
    fh, abs_path = mkstemp()
    with fdopen(fh,'w') as new_file:
        with open(file_path) as old_file:
            for line in old_file:
                new_file.write(line.replace(pattern, subst))
    #Copy the file permissions from the old file to the new file
    copymode(file_path, abs_path)
    #Remove original file
    remove(file_path)
    #Move new file
    move(abs_path, file_path)

def replace_all(file_path, new_file_path):
    #Copy the file permissions from the old file to the new file
    copy(new_file_path, file_path)

In [5]:
# replace_all("/opt/miniconda3/lib/python3.6/site-packages/rl/agents/dqn.py", "rl/agents/dqn.py")

In [6]:
replace_all("/opt/miniconda3/lib/python3.6/site-packages/rl/core.py", "rl/core.py")

In [7]:
def create_model(shape, nb_actions):
    model = Sequential()
    model.add(LSTM(64, input_shape=shape, return_sequences=True))
    model.add(LSTM(64))
    model.add(Dense(32))
    model.add(Activation('relu'))
    model.add(Dense(nb_actions, activation='linear'))
    return model

In [None]:
# OPTIONS
ENV_NAME = 'OHLCV-v0'
TIME_STEP = 30

# Get the environment and extract the number of actions.
PATH_TRAIN = "./data/train/"
PATH_TEST = "./data/test/"
env = OhlcvEnv(TIME_STEP, path=PATH_TRAIN, show_trade=False)
env_test = OhlcvEnv(TIME_STEP, path=PATH_TEST)

# random seed
np.random.seed(456)
env.seed(562)

nb_actions = env.action_space.n
model = create_model(shape=env.shape, nb_actions=nb_actions)
print(model.summary())

# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and even the metrics!
memory = SequentialMemory(limit=50000, window_length=TIME_STEP)
# policy = BoltzmannQPolicy()
policy = EpsGreedyQPolicy()
# enable the dueling network
# you can specify the dueling_type to one of {'avg','max','naive'}-+
dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=200,
               enable_dueling_network=True, dueling_type='avg', target_model_update=1e-2, policy=policy,
               processor=NormalizerProcessor())
dqn.compile(Adam(lr=1e-3), metrics=['mae'])

while True:
    # train
#     dqn.load_weights('./model/duel__LSTM_dqn_OHLCV-v0_weights_1044LS_0_19_0.04339342066576202.h5f')
    dqn.fit(env, nb_steps=800000, nb_max_episode_steps=None, visualize=False, verbose=1)
    # validate
    info = dqn.test(env_test, nb_episodes=1, visualize=True)
    n_long, n_short, total_reward, portfolio = info['n_trades']['long'], info['n_trades']['short'], info[
        'total_reward'], int(info['portfolio'])
    np.array([info]).dump(
        './info/duel_dqn_{0}_weights_{1}LS_{2}_{3}_{4}.info'.format(ENV_NAME, portfolio, n_long, n_short,
                                                                total_reward))
    dqn.save_weights(
        './model/duel__LSTM_dqn_{0}_weights_{1}LS_{2}_{3}_{4}.h5f'.format(ENV_NAME, portfolio, n_long, n_short, total_reward),
        overwrite=True)

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 30, 64)            36864     
_________________________________________________________________
lstm_2 (LSTM)                (None, 64)                33024     
_________________________________________________________________
dense_1 (Dense)              (None, 32)                2080      
_________________________________________________________________
activation_1 (Activation)    (None, 32)                0         
_________________________________________________________________
dense_2 (Dense)              (None, 3)                 99        
Total params: 72,067
Trainable params: 72,067
Non-trainable params: 0
_________________________________________________________________
None
Training for 800000 steps ...
