In [None]:
%pylab inline 


In [None]:
import gym
import sys
sys.path.append('/home/peter/code/projects')
from gym import error, spaces, utils
from gym.utils import seeding
from collections import Counter
import time
import progressbar as pb
from tqdm import tqdm
import os
import shutil
import aidevutil.denoise as denoise
from empyrical import sortino_ratio, calmar_ratio, omega_ratio
from stable_baselines.common.policies import MlpPolicy, MlpLstmPolicy, ActorCriticPolicy, FeedForwardPolicy
from stable_baselines.common.vec_env import SubprocVecEnv, VecEnv, VecEnvWrapper
from stable_baselines import A2C, PPO2, DQN, ACKTR, ACER
from stable_baselines.common.vec_env import DummyVecEnv
import tensorflow as tf
from trading_env import TradingEnv

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
from skopt import gp_minimize, gbrt_minimize, Optimizer
from skopt.benchmarks import branin as branin
from skopt.benchmarks import hart6 as hart6_
from functools import partial
from skopt.plots import plot_evaluations, plot_convergence
from skopt.plots import plot_objective
from skopt import gp_minimize, gbrt_minimize, forest_minimize, dummy_minimize
from skopt import callbacks
#from skopt.callbacks import CheckpointSaver
from skopt import load

In [None]:
np.random.seed()

In [None]:
# load the market data
input_source = np.load(open('data_eurusd2.npy','rb'))
to_predict = np.load(open('data_eurusd2_targets.npy','rb'))

to_predict = to_predict[3,:].reshape(-1)

input_source = input_source.T

In [None]:
input_source.shape, to_predict.shape

In [None]:
is_orig = np.copy(input_source)
cp = int(0.8*len(input_source))
test_input_source = input_source[cp:, :]
test_to_predict = to_predict[cp:]
input_source = input_source[0:cp, :]
to_predict = to_predict[0:cp]

In [None]:
input_source.shape, to_predict.shape

In [None]:
bars_per_episode = 1000
winlen = 1
traded_amt = 100000
initial_balance = 10000000
commission = 0
slippage = 0.0

In [None]:
def test_rl(args):
    
    afun, l1, l2, gamma, n_steps, ent_coef, vf_coef, vf_fisher_coef, learning_rate, max_grad_norm, kfac_clip, lr_schedule = args
    afun = [tf.nn.relu, tf.nn.tanh, tf.nn.sigmoid][afun]
    lr_schedule = ['linear', 'constant', 'double_linear_con', 'middle_drop', 'double_middle_drop'][lr_schedule]
    
    n_cpu = 32
    env = SubprocVecEnv([lambda: TradingEnv(input_source, to_predict,
                     winlen=winlen, bars_per_episode=bars_per_episode, traded_amt=traded_amt, initial_balance=initial_balance,
                     commission=commission, slippage=slippage,
                     reward_type='cur_balance',
                     min_ratio_trades = 20,
                     max_position_time = 30,
                     ) for i in range(n_cpu)])
    
    policy_kwargs = dict(act_fun=afun, net_arch=[int(l1), int(l2)])
    
    model = ACKTR(MlpPolicy, env, verbose=0, 
                  gamma=gamma,
                  nprocs=8,
                  n_steps=n_steps,
                  ent_coef=ent_coef,
                  vf_coef=vf_coef,
                  vf_fisher_coef=vf_fisher_coef,
                  learning_rate=learning_rate,
                  max_grad_norm=max_grad_norm,
                  kfac_clip=kfac_clip,
                  lr_schedule=lr_schedule,
                  policy_kwargs=policy_kwargs, 
                  tensorboard_log='/home/peter/tblog')

    model.learn(total_timesteps=3_000_000)
    
    # now test the model, return the negative profit ratio (because we are MINIMIZING)
    env = TradingEnv(test_input_source, test_to_predict,
                 winlen=winlen, bars_per_episode=bars_per_episode, traded_amt=traded_amt,
                 commission=commission, slippage=slippage,
                 reward_type='cur_balance',
                 min_ratio_trades = 20,
                 max_position_time = 30,
                 )
    env = DummyVecEnv([lambda: env])
    # calculate the likelihood of success for any given episode
    l = 100
    krl = []
    p = pb.ProgressBar(max_value=l)
    for i in range(l):
        p.update(i)
        observation = env.envs[0].reset()
        nstate = model.initial_state
        done = False
        navs = []
        for i in (range(env.envs[0].bars_per_episode)):
            action, nstate = model.predict([observation], state=nstate, deterministic=1)
            observation, reward, done, info = env.envs[0].step(action)
            if done:
                break
        krl.append(sum(env.envs[0].returns ))
    p.finish()

    krl = np.array(krl)
    pli = (100*(sum(krl > 0) / len(krl)))
    print('Profit likelihood: %3.3f%%' % pli)
    return 100-pli


In [None]:
opt = Optimizer([(0, 2), # afun
                 (32, 256), # l1
                 (32, 256), # l2
                 (0.75, 0.9999), # gamma
                 (5, 100), # n_steps
                 (0.0, 0.25), # ent_coef
                 (0.1, 0.6), # vf_coef
                 (0.2, 1.0), # vf_fisher_coef
                 (0.02, 0.75), # learning_rate
                 (0.1, 0.8), # max_grad_norm
                 (0.0, 0.05), # kfac_clip
                 (0, 4)]) # lr_schedule

best_y_ever = 99999999
for i in tqdm(range(1000)):
    suggested = opt.ask()
    print('Trying:', suggested)
    y = test_rl(suggested)
    opt.tell(suggested, y)
    print('iteration:', i, suggested, y)
    if y < best_y_ever:
        best_y_ever = y
        pkl.dump(suggested, open('best_params.pkl','wb'))
        print('Saved best parameters.')