In [1]:
import os
import backtrader as bt

from btgym import BTgymEnv, BTgymDataset
from btgym.strategy.observers import Reward, Position, NormPnL
from btgym.algorithms import Launcher, A3C

from btgym.research import DevStrat_4_6

from btgym.algorithms.policy import Aac1dPolicy

  return f(*args, **kwds)


In [None]:
# Under `DATASET` settings uncomment either synthetic data file (simple sine wave) - 
# solves it for less than 200K env. steps, 
# or: 1 month real EURUSD data file - solves it for about 14M env. steps (for vanilla A3C).
#
# Ignore `Data_master `reset()...` and `Dataset not ready...` warnings
#
# To visualise training and results point tensorboard to: User_home/tmp/test_gym_a3c
#
# Refer to environment renderings to see actual trades, orders etc.
#
# Refer to `DevStrat_4_6` comments and Docs for state and reward shaping information.
#
# For UNREAL: enable aux. `Value Replay` task, `Pixel Change Control` task or both, 
# note more sample-efficient convergence.
# It's currently not recommended to turn on 'Reward Prediction` task as it seems to hurt performance for BTgym;
# maybe bug.
#
# Toy trading settings details for single episode:
# Trading pair: EUR/USD
# Initial cash: 2K USD, leverage 1:10, single stake size: 5K, can add position
# Commission is set to imitate spread
# Stop trading if lost 5% of initial amount (-100USD)
# Stop trading if reached 10% profit (+200USD)
# Profit is considered as `broker value` at trade episode end.
# Episode start times sampled randomly from dataset date/time range in a way to ensure
# contionious trade within a week, i.e. starts on fridays, holidays are excluded.
# Trade maximum for 23 hours 55 mins, start trading in random time of day (single overnight is ok)
# Actions are market orders only: sell, buy, close or do_nothing
# Allowed to issue orders every 10th minute from beginning of episode tradetime

In [None]:
# Set backtesting engine parameters:

MyCerebro = bt.Cerebro()

MyCerebro.addstrategy(
    DevStrat_4_6,
    drawdown_call=5, # max % to loose, in percent of initial cash
    target_call=10,  # max % to win, same
    skip_frame=10,
)
# Set leveraged account:
MyCerebro.broker.setcash(2000)
MyCerebro.broker.setcommission(commission=0.0001, leverage=10.0) # commisssion to imitate spread
MyCerebro.addsizer(bt.sizers.SizerFix, stake=5000,)  

#MyCerebro.addanalyzer(bt.analyzers.DrawDown)

# Visualisations for reward, position and PnL dynamics:
MyCerebro.addobserver(Reward)
MyCerebro.addobserver(Position)
MyCerebro.addobserver(NormPnL)

MyDataset = BTgymDataset(
    #filename='.data/DAT_ASCII_EURUSD_M1_201703.csv',
    #filename='./data/DAT_ASCII_EURUSD_M1_201704.csv',
    filename='./data/test_sine_1min_period256_delta0002.csv',
    start_weekdays={0, 1, 2, 3},
    episode_duration={'days': 0, 'hours': 23, 'minutes': 55},
    start_00=False,
    time_gap={'hours': 6},
)

env_config = dict(
    class_ref=BTgymEnv,
    kwargs=dict(
        dataset=MyDataset,
        engine=MyCerebro,
        render_modes=['episode', 'human','external'],
        render_state_as_image=True,
        render_ylabel='OHL_diff.',
        render_size_episode=(12,8),
        render_size_human=(9, 4),
        render_size_state=(11, 3),
        render_dpi=75,
        port=5000,
        data_port=4999,
        connect_timeout=60,
        verbose=0,  # better be 0
    )
)

cluster_config = dict(
    host='127.0.0.1',
    port=12230,
    num_workers=6,  # Set according CPU's available 
    num_ps=1,
    num_envs=1,  # do not change yet
    log_dir=os.path.expanduser('~/tmp/test_gym_a3c'),
)

policy_config = dict(
    class_ref=Aac1dPolicy,
    kwargs={}
)

trainer_config = dict(
    class_ref=A3C,
    kwargs=dict(
        opt_learn_rate=[1e-4, 1e-4], # or random log-uniform range, values > 2e-4 can ruin training 
        opt_end_learn_rate=1e-5,
        opt_decay_steps=100*10**6,
        model_gae_lambda=0.95,
        model_beta=[0.05, 0.01], # Entropy reg, random log-uniform
        rollout_length=20,
        time_flat=False,
        model_summary_freq=100,
        episode_summary_freq=5,
        env_render_freq=20,
    )
)

In [None]:
launcher = Launcher(
    cluster_config=cluster_config,
    env_config=env_config,
    trainer_config=trainer_config,
    policy_config=policy_config,
    test_mode=False,
    max_env_steps=100*10**6,
    root_random_seed=0,
    purge_previous=1,  # ask to override previously saved model and logs
    verbose=0  # 0 or 1
)

# Train it:
launcher.run()