## Step 1: Import Packages
Modify the system path and load the corresponding packages and functions 

In [1]:
import os
import sys
from pathlib import Path
import warnings
warnings.filterwarnings("ignore")
ROOT = os.path.dirname(os.path.abspath("."))
sys.path.append(ROOT)
import torch
import argparse
import os.path as osp
from mmcv import Config
from trademaster.utils import replace_cfg_vals
from trademaster.nets.builder import build_net
from trademaster.environments.builder import build_environment
from trademaster.datasets.builder import build_dataset
from trademaster.agents.builder import build_agent
from trademaster.optimizers.builder import build_optimizer
from trademaster.losses.builder import build_loss
from trademaster.trainers.builder import build_trainer
from trademaster.transition.builder import build_transition

## Step 2: Load Configs
Load default config from the folder `configs/portfolio_management/portfolio_management_dj30_eiie_eiie_adam_mse.py`

In [2]:
parser = argparse.ArgumentParser(description='Download Alpaca Datasets')
parser.add_argument("--config", default=osp.join(ROOT, "configs", "portfolio_management", "portfolio_management_dj30_eiie_eiie_adam_mse.py"),
                    help="download datasets config file path")
parser.add_argument("--task_name", type=str, default="train")

args, _= parser.parse_known_args()
cfg = Config.fromfile(args.config)
task_name = args.task_name
cfg = replace_cfg_vals(cfg)

In [3]:
cfg

Config (path: /Users/wentaozhang/workspace/RA/TradeMaster/configs/portfolio_management/portfolio_management_dj30_eiie_eiie_adam_mse.py): {'data': {'type': 'PortfolioManagementDataset', 'data_path': 'data/portfolio_management/dj30', 'train_path': 'data/portfolio_management/dj30/train.csv', 'valid_path': 'data/portfolio_management/dj30/valid.csv', 'test_path': 'data/portfolio_management/dj30/test.csv', 'tech_indicator_list': ['zopen', 'zhigh', 'zlow', 'zadjcp', 'zclose', 'zd_5', 'zd_10', 'zd_15', 'zd_20', 'zd_25', 'zd_30'], 'length_day': 10, 'initial_amount': 100000, 'transaction_cost_pct': 0.001, 'test_dynamic_path': 'data/portfolio_management/dj30/DJI_label_by_DJIindex_3_24_-0.25_0.25.csv'}, 'environment': {'type': 'PortfolioManagementEIIEEnvironment'}, 'agent': {'type': 'PortfolioManagementEIIE', 'memory_capacity': 1000, 'gamma': 0.99, 'policy_update_frequency': 500}, 'trainer': {'type': 'PortfolioManagementEIIETrainer', 'epochs': 10, 'work_dir': 'work_dir/portfolio_management_dj30_ei

## Step 3: Build Dataset
Build datasets from cfg defined above

In [4]:
dataset = build_dataset(cfg)

## Step 4: Build Reinforcement Learning Environments
Build environments based on cfg and previously-defined dataset

A style-test is provided as an option to test the algorithm's performance under different market conditions

In [7]:
train_environment = build_environment(cfg, default_args=dict(dataset=dataset, task="train"))
valid_environment = build_environment(cfg, default_args=dict(dataset=dataset, task="valid"))
test_environment = build_environment(cfg, default_args=dict(dataset=dataset, task="test"))

In [8]:
train_environment.df.head()

<trademaster.environments.portfolio_management.eiie_environment.PortfolioManagementEIIEEnvironment at 0x7f90d3a620a0>

In [9]:
valid_environment.df.head()

<trademaster.environments.portfolio_management.eiie_environment.PortfolioManagementEIIEEnvironment at 0x7f90d3a37190>

In [10]:
test_environment.df.head()

<trademaster.environments.portfolio_management.eiie_environment.PortfolioManagementEIIEEnvironment at 0x7f90e0d9a580>

## Step 5: Build Net 
Update information about the state and action dimension in the config and create nets and optimizer for EIIE


In [11]:
action_dim = train_environment.action_dim # 29
state_dim = train_environment.state_dim # 11
input_dim = len(train_environment.tech_indicator_list)
time_steps = train_environment.time_steps

cfg.act.update(dict(input_dim=input_dim, time_steps=time_steps))
cfg.cri.update(dict(input_dim=input_dim, action_dim= action_dim, time_steps=time_steps))

act = build_net(cfg.act)
cri = build_net(cfg.cri)
act_optimizer = build_optimizer(cfg, default_args=dict(params=act.parameters()))
cri_optimizer = build_optimizer(cfg, default_args=dict(params=cri.parameters()))

## Step 6: Build Loss
Build loss from config

In [12]:
criterion = build_loss(cfg)

## Step 7: Build Transition
Build transition from config

In [13]:
transition = build_transition(cfg)


## Step 8: Build Agent
Build agent from config and detect device

In [14]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
agent = build_agent(cfg, default_args=dict(action_dim=action_dim,
                                               state_dim=state_dim,
                                               time_steps = time_steps,
                                               act=act,
                                               cri=cri,
                                               act_optimizer=act_optimizer,
                                               cri_optimizer = cri_optimizer,
                                               criterion=criterion,
                                               transition = transition,
                                               device = device))

## Step 9: Build Trainer
Build trainer from config and create work directionary to save the result, model and config

In [15]:
trainer = build_trainer(cfg, default_args=dict(train_environment=train_environment,
                                                    valid_environment=valid_environment,
                                                    test_environment=test_environment,
                                                    agent=agent,
                                                    device=device,
                                                    ))
work_dir = os.path.join(ROOT, cfg.trainer.work_dir)

if not os.path.exists(work_dir):
    os.makedirs(work_dir)
cfg.dump(osp.join(work_dir, osp.basename(args.config)))

| Arguments Remove work_dir: /Users/wentaozhang/workspace/RA/TradeMaster/work_dir/portfolio_management_dj30_eiie_eiie_adam_mse


## Step 10: Train the Trainer
Train the trainer based on the config and get results from workdir

In [16]:
trainer.train_and_valid()

[W NNPACK.cpp:53] Could not initialize NNPACK! Reason: Unsupported hardware.


Train Episode: [1/10]
+---------------+-------------+------------+--------------+
| Profit Margin | Sharp Ratio | Volatility | Max Drawdown |
+---------------+-------------+------------+--------------+
|  179.856113%  |  84.376458  | 0.756262%  |  15.012198%  |
+---------------+-------------+------------+--------------+
Valid Episode: [1/10]
+---------------+-------------+------------+--------------+
| Profit Margin | Sharp Ratio | Volatility | Max Drawdown |
+---------------+-------------+------------+--------------+
|   9.742087%   |   4.631699  | 2.137553%  |  30.998428%  |
+---------------+-------------+------------+--------------+
Valid Episode Reward Sum: 0.092963
save path /Users/wentaozhang/workspace/RA/TradeMaster/work_dir/portfolio_management_dj30_eiie_eiie_adam_mse/checkpoints/checkpoint-00001.pth
Train Episode: [2/10]


KeyboardInterrupt: 

In [None]:
trainer.test()