In [1]:
import pandas as pd
from stable_baselines3.common.logger import configure

from finrl.agents.stablebaselines3.models import DRLAgent
from finrl.config import INDICATORS, TRAINED_MODEL_DIR, RESULTS_DIR
from finrl.main import check_and_make_directories
from finrl.meta.env_stock_trading.env_stocktrading import StockTradingEnv

check_and_make_directories([TRAINED_MODEL_DIR])

## Read data


In [2]:
train = pd.read_csv('train_data.csv')
train = train.set_index(train.columns[0])
train.index.names = ['']

## Construct the environment

In [3]:
stock_dimension = len(train.tic.unique())
state_space = 1 + 2*stock_dimension + len(INDICATORS)*stock_dimension
print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}")

Stock Dimension: 29, State Space: 291


In [4]:
buy_cost_list = sell_cost_list = [0.001] * stock_dimension
num_stock_shares = [0] * stock_dimension

env_kwargs = {
    "hmax": 100,
    "initial_amount": 1000000,
    "num_stock_shares": num_stock_shares,
    "buy_cost_pct": buy_cost_list,
    "sell_cost_pct": sell_cost_list,
    "state_space": state_space,
    "stock_dim": stock_dimension,
    "tech_indicator_list": INDICATORS,
    "action_space": stock_dimension,
    "reward_scaling": 1e-4
}


e_train_gym = StockTradingEnv(df = train, **env_kwargs)

## Environment for training

In [5]:
env_train, _ = e_train_gym.get_sb_env()
print(type(env_train))

<class 'stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv'>


# Part 3: Train DRL Agents


In [6]:
agent = DRLAgent(env = env_train)

# Set the corresponding values to 'True' for the algorithms that you want to use
if_using_a2c = True
if_using_ddpg = True
if_using_ppo = True
if_using_sac = True

### Agent 1: A2C


In [7]:
agent = DRLAgent(env = env_train)
model_a2c = agent.get_model("a2c")

if if_using_a2c:
  # set up logger
  tmp_path = RESULTS_DIR + '/a2c'
  new_logger_a2c = configure(tmp_path, ["stdout", "csv", "tensorboard"])
  # Set new logger
  model_a2c.set_logger(new_logger_a2c)

{'n_steps': 5, 'ent_coef': 0.01, 'learning_rate': 0.0007}
Using cuda device
Logging to results/a2c


In [8]:
trained_a2c = agent.train_model(model=model_a2c, 
                             tb_log_name='a2c',
                             total_timesteps=50000) if if_using_a2c else None

--------------------------------------
| time/                 |            |
|    fps                | 64         |
|    iterations         | 100        |
|    time_elapsed       | 7          |
|    total_timesteps    | 500        |
| train/                |            |
|    entropy_loss       | -41.2      |
|    explained_variance | 0.282      |
|    learning_rate      | 0.0007     |
|    n_updates          | 99         |
|    policy_loss        | 38.4       |
|    reward             | 0.11025473 |
|    std                | 1          |
|    value_loss         | 1.09       |
--------------------------------------
--------------------------------------
| time/                 |            |
|    fps                | 76         |
|    iterations         | 200        |
|    time_elapsed       | 13         |
|    total_timesteps    | 1000       |
| train/                |            |
|    entropy_loss       | -41.2      |
|    explained_variance | -1.19e-07  |
|    learning_rate      |

In [9]:
trained_a2c.save(TRAINED_MODEL_DIR + "/agent_a2c") if if_using_a2c else None

### Agent 2: DDPG

In [10]:
agent = DRLAgent(env = env_train)
model_ddpg = agent.get_model("ddpg")

if if_using_ddpg:
  # set up logger
  tmp_path = RESULTS_DIR + '/ddpg'
  new_logger_ddpg = configure(tmp_path, ["stdout", "csv", "tensorboard"])
  # Set new logger
  model_ddpg.set_logger(new_logger_ddpg)

{'batch_size': 128, 'buffer_size': 50000, 'learning_rate': 0.001}
Using cuda device
Logging to results/ddpg


In [11]:
trained_ddpg = agent.train_model(model=model_ddpg, 
                             tb_log_name='ddpg',
                             total_timesteps=50000) if if_using_ddpg else None

day: 2892, episode: 20
begin_total_asset: 1000000.00
end_total_asset: 3990512.90
total_reward: 2990512.90
total_cost: 5286.30
total_trades: 55707
Sharpe: 0.800
----------------------------------
| time/              |           |
|    episodes        | 4         |
|    fps             | 77        |
|    time_elapsed    | 148       |
|    total_timesteps | 11572     |
| train/             |           |
|    actor_loss      | -49.5     |
|    critic_loss     | 135       |
|    learning_rate   | 0.001     |
|    n_updates       | 8679      |
|    reward          | 4.3509665 |
----------------------------------
----------------------------------
| time/              |           |
|    episodes        | 8         |
|    fps             | 73        |
|    time_elapsed    | 313       |
|    total_timesteps | 23144     |
| train/             |           |
|    actor_loss      | -29.5     |
|    critic_loss     | 11.2      |
|    learning_rate   | 0.001     |
|    n_updates       | 20251     |


In [12]:
trained_ddpg.save(TRAINED_MODEL_DIR + "/agent_ddpg") if if_using_ddpg else None

### Agent 3: PPO

In [13]:
agent = DRLAgent(env = env_train)
PPO_PARAMS = {
    "n_steps": 2048,
    "ent_coef": 0.01,
    "learning_rate": 0.00025,
    "batch_size": 128,
}
model_ppo = agent.get_model("ppo",model_kwargs = PPO_PARAMS)

if if_using_ppo:
  # set up logger
  tmp_path = RESULTS_DIR + '/ppo'
  new_logger_ppo = configure(tmp_path, ["stdout", "csv", "tensorboard"])
  # Set new logger
  model_ppo.set_logger(new_logger_ppo)

{'n_steps': 2048, 'ent_coef': 0.01, 'learning_rate': 0.00025, 'batch_size': 128}
Using cuda device
Logging to results/ppo


In [14]:
trained_ppo = agent.train_model(model=model_ppo, 
                             tb_log_name='ppo',
                             total_timesteps=200000) if if_using_ppo else None

------------------------------------
| time/              |             |
|    fps             | 97          |
|    iterations      | 1           |
|    time_elapsed    | 21          |
|    total_timesteps | 2048        |
| train/             |             |
|    reward          | 0.020233056 |
------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 100         |
|    iterations           | 2           |
|    time_elapsed         | 40          |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.016614027 |
|    clip_fraction        | 0.214       |
|    clip_range           | 0.2         |
|    entropy_loss         | -41.2       |
|    explained_variance   | -0.00975    |
|    learning_rate        | 0.00025     |
|    loss                 | 3.58        |
|    n_updates            | 10          |
|    policy_gradient_loss | -0.025   

In [15]:
trained_ppo.save(TRAINED_MODEL_DIR + "/agent_ppo") if if_using_ppo else None

### Agent 4: SAC

In [16]:
agent = DRLAgent(env = env_train)
SAC_PARAMS = {
    "batch_size": 128,
    "buffer_size": 100000,
    "learning_rate": 0.0001,
    "learning_starts": 100,
    "ent_coef": "auto_0.1",
}

model_sac = agent.get_model("sac",model_kwargs = SAC_PARAMS)

if if_using_sac:
  # set up logger
  tmp_path = RESULTS_DIR + '/sac'
  new_logger_sac = configure(tmp_path, ["stdout", "csv", "tensorboard"])
  # Set new logger
  model_sac.set_logger(new_logger_sac)

{'batch_size': 128, 'buffer_size': 100000, 'learning_rate': 0.0001, 'learning_starts': 100, 'ent_coef': 'auto_0.1'}
Using cuda device
Logging to results/sac


In [17]:
trained_sac = agent.train_model(model=model_sac, 
                             tb_log_name='sac',
                             total_timesteps=70000) if if_using_sac else None

day: 2892, episode: 110
begin_total_asset: 1000000.00
end_total_asset: 5060161.14
total_reward: 4060161.14
total_cost: 183863.98
total_trades: 58679
Sharpe: 0.848
----------------------------------
| time/              |           |
|    episodes        | 4         |
|    fps             | 45        |
|    time_elapsed    | 252       |
|    total_timesteps | 11572     |
| train/             |           |
|    actor_loss      | 325       |
|    critic_loss     | 189       |
|    ent_coef        | 0.108     |
|    ent_coef_loss   | -105      |
|    learning_rate   | 0.0001    |
|    n_updates       | 11471     |
|    reward          | 13.078064 |
----------------------------------
----------------------------------
| time/              |           |
|    episodes        | 8         |
|    fps             | 45        |
|    time_elapsed    | 506       |
|    total_timesteps | 23144     |
| train/             |           |
|    actor_loss      | 131       |
|    critic_loss     | 933      

In [18]:
trained_sac.save(TRAINED_MODEL_DIR + "/agent_sac") if if_using_sac else None