## Step 1: Import Packages
Modify the system path and load the corresponding packages and functions 

In [17]:
import os
import sys
from pathlib import Path
import warnings
warnings.filterwarnings("ignore")
ROOT = str(Path("__file__").resolve().parents[1])
sys.path.append(ROOT)
import torch
import argparse
import os.path as osp
from mmcv import Config
from trademaster.utils import replace_cfg_vals
from trademaster.nets.builder import build_net
from trademaster.environments.builder import build_environment
from trademaster.datasets.builder import build_dataset
from trademaster.agents.builder import build_agent
from trademaster.optimizers.builder import build_optimizer
from trademaster.losses.builder import build_loss
from trademaster.trainers.builder import build_trainer
from trademaster.transition.builder import build_transition

## Step 2: Load Configs
Load default config from the folder `configs/algorithmic_trading/algorithmic_trading_BTC_dqn_dqn_adam_mse.py`

In [18]:
parser = argparse.ArgumentParser(description='Download Alpaca Datasets')
parser.add_argument("--config", default=osp.join(ROOT, "configs", "algorithmic_trading", "algorithmic_trading_BTC_dqn_dqn_adam_mse.py"),
                    help="download datasets config file path")
parser.add_argument("--task_name", type=str, default="train")
parser.add_argument("--test_style", type=str, default='-1')
args = parser.parse_args([])
cfg = Config.fromfile(args.config)
task_name = args.task_name

cfg = replace_cfg_vals(cfg)
# update test style
cfg.data.update({'test_style': args.test_style})


In [19]:
cfg

Config (path: /data1/sunshuo/qml/TradeMaster/TradeMasterReBuild/configs/algorithmic_trading/algorithmic_trading_BTC_dqn_dqn_adam_mse.py): {'data': {'type': 'AlgorithmicTradingDataset', 'data_path': 'data/algorithmic_trading/BTC', 'train_path': 'data/algorithmic_trading/BTC/train.csv', 'valid_path': 'data/algorithmic_trading/BTC/valid.csv', 'test_path': 'data/algorithmic_trading/BTC/test.csv', 'test_style_path': 'data/algorithmic_trading/BTC/test_labeled_3_24_-0.15_0.15.csv', 'tech_indicator_list': ['high', 'low', 'open', 'close', 'adjcp', 'zopen', 'zhigh', 'zlow', 'zadjcp', 'zclose', 'zd_5', 'zd_10', 'zd_15', 'zd_20', 'zd_25', 'zd_30'], 'backward_num_day': 5, 'forward_num_day': 5, 'test_style': '-1'}, 'environment': {'type': 'AlgorithmicTradingEnvironment'}, 'agent': {'type': 'AlgorithmicTradingDQN', 'max_step': 12345, 'reward_scale': 1, 'repeat_times': 1, 'gamma': 0.9, 'batch_size': 64, 'clip_grad_norm': 3.0, 'soft_update_tau': 0, 'state_value_tau': 0.005}, 'trainer': {'type': 'Algori

## Step 3: Build Dataset
Build datasets from cfg defined above

In [20]:
dataset = build_dataset(cfg)

In [21]:
dataset

<trademaster.datasets.algorithmic_trading.dataset.AlgorithmicTradingDataset at 0x7f7222541890>

## Step 4: Build Reinforcement Learning Environments
Build environments based on cfg and previously-defined dataset

A style-test is provided as an option to test the algorithm's performance under different market conditions

In [22]:
train_environment = build_environment(cfg, default_args=dict(dataset=dataset, task="train"))
valid_environment = build_environment(cfg, default_args=dict(dataset=dataset, task="valid"))
test_environment = build_environment(cfg, default_args=dict(dataset=dataset, task="test"))
if task_name.startswith("style_test"):
        test_style_environments = []
        for i, path in enumerate(dataset.test_style_paths):
            test_style_environments.append(build_environment(cfg, default_args=dict(dataset=dataset, task="test_style",
                                                                                    style_test_path=path,
                                                                                    task_index=i)))


In [23]:
train_environment

<trademaster.environments.algorithmic_trading.environment.AlgorithmicTradingEnvironment at 0x7f7222553350>

In [24]:
valid_environment

<trademaster.environments.algorithmic_trading.environment.AlgorithmicTradingEnvironment at 0x7f72225532d0>

In [25]:
test_environment

<trademaster.environments.algorithmic_trading.environment.AlgorithmicTradingEnvironment at 0x7f7222553310>

## Step 5: Build Net 
Update information about the state and action dimension in the config and create nets and optimizer for DQN


In [26]:
action_dim = train_environment.action_dim
state_dim = train_environment.state_dim

cfg.act.update(dict(action_dim=action_dim, state_dim=state_dim))
act = build_net(cfg.act)
act_optimizer = build_optimizer(cfg, default_args=dict(params=act.parameters()))
if cfg.cri:
    cfg.cri.update(dict(action_dim=action_dim, state_dim=state_dim))
    cri = build_net(cfg.cri)
    cri_optimizer = build_optimizer(cfg, default_args=dict(params=cri.parameters()))
else:
    cri = None
    cri_optimizer = None

## Step 6: Build Loss
Build loss from config

In [27]:
criterion = build_loss(cfg)

## Step 7: Build Transition
Build transition from config

In [28]:
transition = build_transition(cfg)


## Step 8: Build Agent
Build agent from config and detect device

In [29]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
agent = build_agent(cfg, default_args=dict(action_dim = action_dim,
                                               state_dim = state_dim,
                                               act = act,
                                               cri = cri,
                                               act_optimizer = act_optimizer,
                                               cri_optimizer = cri_optimizer,
                                               criterion = criterion,
                                               transition = transition,
                                               device=device))

## Step 9: Build Trainer
Build trainer from config and create work directionary to save the result, model and config

In [30]:
if task_name.startswith("style_test"):
    trainers = []
    for env in test_style_environments:
        trainers.append(build_trainer(cfg, default_args=dict(train_environment=train_environment,
                                                                valid_environment=valid_environment,
                                                                test_environment=env,
                                                                agent=agent,
                                                                device=device)))
else:
    trainer = build_trainer(cfg, default_args=dict(train_environment=train_environment,
                                                    valid_environment=valid_environment,
                                                    test_environment=test_environment,
                                                    agent=agent,
                                                    device=device))

cfg.dump(osp.join(ROOT, cfg.work_dir, osp.basename(args.config)))

| Arguments Keep work_dir: /data1/sunshuo/qml/TradeMaster/TradeMasterReBuild/work_dir/algorithmic_trading_BTC_dqn_dqn_adam_mse


## Step 10: Train the Trainer
Train the trainer based on the config and get results from workdir

In [31]:
if task_name.startswith("train"):
    trainer.train_and_valid()
    trainer.test()
    print("train end")
elif task_name.startswith("test"):
    trainer.test()
    print("test end")
elif task_name.startswith("style_test"):
    daily_return_list = []
    for trainer in trainers:
        daily_return_list.extend(trainer.test())
    print('win rate is: ', sum(r > 0 for r in daily_return_list) / len(daily_return_list))
    print("style test end")


Train Episode: [1/20]
+---------------+-------------+-------------+--------------+---------------+---------------+
| Profit Margin | Sharp Ratio |  Volatility | Max Drawdown |  Calmar Ratio | Sortino Ratio |
+---------------+-------------+-------------+--------------+---------------+---------------+
|  -344.513937% |  -0.000214  | 6862.304186 |   6.214777   | -27423.674698 |   -0.556031   |
+---------------+-------------+-------------+--------------+---------------+---------------+
Valid Episode: [1/20]
+---------------+-------------+-------------+--------------+--------------+---------------+
| Profit Margin | Sharp Ratio |  Volatility | Max Drawdown | Calmar Ratio | Sortino Ratio |
+---------------+-------------+-------------+--------------+--------------+---------------+
|   36.243173%  |   0.001800  | 4108.651796 |   0.583186   | 62313.081138 |    0.551188   |
+---------------+-------------+-------------+--------------+--------------+---------------+
Valid Episode Reward Sum: 56390