## Step 1: Import Packages
Modify the system path and load the corresponding packages and functions 

In [4]:
import os
import sys
from pathlib import Path
import warnings
warnings.filterwarnings("ignore")
ROOT = str(Path("__file__").resolve().parents[1])
sys.path.append(ROOT)
import torch
import argparse
import os.path as osp
from mmcv import Config
from trademaster.utils import replace_cfg_vals
from trademaster.nets.builder import build_net
from trademaster.environments.builder import build_environment
from trademaster.datasets.builder import build_dataset
from trademaster.agents.builder import build_agent
from trademaster.optimizers.builder import build_optimizer
from trademaster.losses.builder import build_loss
from trademaster.trainers.builder import build_trainer
from trademaster.transition.builder import build_transition

## Step 2: Load Configs
Load default config from the folder `configs/algorithmic_trading/algorithmic_trading_BTC_dqn_dqn_adam_mse.py`

In [5]:

parser = argparse.ArgumentParser(description='Download Alpaca Datasets')
parser.add_argument("--config", default=osp.join(ROOT, "configs", "algorithmic_trading", "algorithmic_trading_BTC_dqn_dqn_adam_mse.py"),
                    help="download datasets config file path")
parser.add_argument("--task_name", type=str, default="train")
args, _= parser.parse_known_args()

cfg = Config.fromfile(args.config)
task_name = args.task_name
cfg = replace_cfg_vals(cfg)

In [6]:
cfg

Config (path: /Users/wentaozhang/workspace/RA/TradeMaster/configs/algorithmic_trading/algorithmic_trading_BTC_dqn_dqn_adam_mse.py): {'data': {'type': 'AlgorithmicTradingDataset', 'data_path': 'data/algorithmic_trading/BTC', 'train_path': 'data/algorithmic_trading/BTC/train.csv', 'valid_path': 'data/algorithmic_trading/BTC/valid.csv', 'test_path': 'data/algorithmic_trading/BTC/test.csv', 'test_dynamic_path': 'data/algorithmic_trading/BTC/test_labeled_3_24_-0.15_0.15.csv', 'tech_indicator_list': ['high', 'low', 'open', 'close', 'adjcp', 'zopen', 'zhigh', 'zlow', 'zadjcp', 'zclose', 'zd_5', 'zd_10', 'zd_15', 'zd_20', 'zd_25', 'zd_30'], 'backward_num_day': 5, 'forward_num_day': 5, 'test_dynamic': '-1'}, 'environment': {'type': 'AlgorithmicTradingEnvironment'}, 'agent': {'type': 'AlgorithmicTradingDQN', 'max_step': 12345, 'reward_scale': 1, 'repeat_times': 1, 'gamma': 0.9, 'batch_size': 64, 'clip_grad_norm': 3.0, 'soft_update_tau': 0, 'state_value_tau': 0.005}, 'trainer': {'type': 'Algorith

## Step 3: Build Dataset
Build datasets from cfg defined above

In [7]:
dataset = build_dataset(cfg)

In [8]:
dataset

<trademaster.datasets.algorithmic_trading.dataset.AlgorithmicTradingDataset at 0x7f947ab2e430>

## Step 4: Build Reinforcement Learning Environments
Build environments based on cfg and previously-defined dataset

A style-test is provided as an option to test the algorithm's performance under different market conditions

In [9]:
train_environment = build_environment(cfg, default_args=dict(dataset=dataset, task="train"))
valid_environment = build_environment(cfg, default_args=dict(dataset=dataset, task="valid"))
test_environment = build_environment(cfg, default_args=dict(dataset=dataset, task="test"))

In [10]:
train_environment

<trademaster.environments.algorithmic_trading.environment.AlgorithmicTradingEnvironment at 0x7f9469bbce80>

In [11]:
valid_environment

<trademaster.environments.algorithmic_trading.environment.AlgorithmicTradingEnvironment at 0x7f9469bb1af0>

In [12]:
test_environment

<trademaster.environments.algorithmic_trading.environment.AlgorithmicTradingEnvironment at 0x7f9438573f40>

## Step 5: Build Net 
Update information about the state and action dimension in the config and create nets and optimizer for DQN


In [13]:
action_dim = train_environment.action_dim
state_dim = train_environment.state_dim

cfg.act.update(dict(action_dim=action_dim, state_dim=state_dim))
act = build_net(cfg.act)
act_optimizer = build_optimizer(cfg, default_args=dict(params=act.parameters()))
if cfg.cri:
    cfg.cri.update(dict(action_dim=action_dim, state_dim=state_dim))
    cri = build_net(cfg.cri)
    cri_optimizer = build_optimizer(cfg, default_args=dict(params=cri.parameters()))
else:
    cri = None
    cri_optimizer = None

## Step 6: Build Loss
Build loss from config

In [14]:
criterion = build_loss(cfg)

## Step 7: Build Transition
Build transition from config

In [15]:
transition = build_transition(cfg)


## Step 8: Build Agent
Build agent from config and detect device

In [16]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
agent = build_agent(cfg, default_args=dict(action_dim = action_dim,
                                               state_dim = state_dim,
                                               act = act,
                                               cri = cri,
                                               act_optimizer = act_optimizer,
                                               cri_optimizer = cri_optimizer,
                                               criterion = criterion,
                                               transition = transition,
                                               device=device))

## Step 9: Build Trainer
Build trainer from config and create work directionary to save the result, model and config

In [17]:
trainer = build_trainer(cfg, default_args=dict(train_environment=train_environment,
                                                    valid_environment=valid_environment,
                                                    test_environment=test_environment,
                                                    agent=agent,
                                                    device=device))

| Arguments Keep work_dir: /Users/wentaozhang/workspace/RA/TradeMaster/work_dir/algorithmic_trading_BTC_dqn_dqn_adam_mse


## Step 10: Train the Trainer
Train the trainer based on the config and get results from workdir

In [18]:
trainer.train_and_valid()
trainer.test()

Train Episode: [1/20]
+---------------+-------------+------------+--------------+
| Profit Margin | Sharp Ratio | Volatility | Max Drawdown |
+---------------+-------------+------------+--------------+
| -1671.124288% |  -44.318845 | 12.264496% | 3286.128741% |
+---------------+-------------+------------+--------------+
metric result saved to metric_train_-1_agent_-1.pickle
Valid Episode: [1/20]
+---------------+-------------+-------------+--------------+
| Profit Margin | Sharp Ratio |  Volatility | Max Drawdown |
+---------------+-------------+-------------+--------------+
|  -103.345503% |  -0.113632  | 849.260789% | 147.326764%  |
+---------------+-------------+-------------+--------------+
metric result saved to metric_valid_-1_agent_-1.pickle
Valid Episode Reward Sum: -41921.302390
save path /Users/wentaozhang/workspace/RA/TradeMaster/work_dir/algorithmic_trading_BTC_dqn_dqn_adam_mse/checkpoints/checkpoint-00001.pth
Train Episode: [2/20]
+---------------+-------------+-----------

array([ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00, -0.00000000e+00,
       -0.00000000e+00,  0.00000000e+00, -0.00000000e+00, -0.00000000e+00,
        0.00000000e+00, -0.00000000e+00,  0.00000000e+00, -0.00000000e+00,
        0.00000000e+00,  0.00000000e+00, -0.00000000e+00,  0.00000000e+00,
       -0.00000000e+00, -0.00000000e+00, -0.00000000e+00, -0.00000000e+00,
        0.00000000e+00,  0.00000000e+00, -0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00, -0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
       -0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00, -0.00000000e+00,
        0.00000000e+00, -0.00000000e+00,  4.40733860e+01,  1.15794149e+03,
       -1.14879921e+03,  6.66391491e+02,  5.43195140e+02,  1.40683460e+03,
       -3.07194117e+02, -1.11972267e+03,  2.79868024e+03,  1.10043783e+03,
        1.01229892e+04, -