<a href="https://colab.research.google.com/github/abasu0713/machine-learning-notebooks/blob/master/optimus%5Bbeta%5D-train.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

<h1>Optimus - Trainer</h1>
<p>This is an experimental Jupyter Notebook for training deep reinforcement learning models on stock market data pulled and saved as files following the instructions in this <a href="https://github.com/abasu0713/machine-learning-notebooks/blob/master/optimus%5Bbeta%5D-data.ipynb">Notebook</a></p>

<h1>Training</h1>

In [None]:
## install required packages
!pip install swig
!pip install wrds
!pip install pyportfolioopt
## install finrl library
!pip install git+https://github.com/AI4Finance-Foundation/FinRL.git


Collecting git+https://github.com/AI4Finance-Foundation/FinRL.git
  Cloning https://github.com/AI4Finance-Foundation/FinRL.git to /tmp/pip-req-build-dw5f84o1
  Running command git clone --filter=blob:none --quiet https://github.com/AI4Finance-Foundation/FinRL.git /tmp/pip-req-build-dw5f84o1
  Resolved https://github.com/AI4Finance-Foundation/FinRL.git to commit b781b80727bb8848e7addcfd6fa61e1ff6b06fa3
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting elegantrl@ git+https://github.com/AI4Finance-Foundation/ElegantRL.git#egg=elegantrl (from finrl==0.3.6)
  Cloning https://github.com/AI4Finance-Foundation/ElegantRL.git to /tmp/pip-install-q40uvkh2/elegantrl_e288a8352f064785830dd3ee6ed1ad3a
  Running command git clone --filter=blob:none --quiet https://github.com/AI4Finance-Foundation/ElegantRL.git /tmp/pip-install-q40uvkh2/elegantrl_e288a8352f064785830dd3ee6e

In [None]:
import os
import pandas as pd

from finrl.meta.env_stock_trading.env_stocktrading import StockTradingEnv
from finrl.agents.stablebaselines3.models import DRLAgent
from stable_baselines3.common.logger import configure
from finrl import config_tickers
from finrl.main import check_and_make_directories
from finrl.config import INDICATORS, TRAINED_MODEL_DIR, RESULTS_DIR

check_and_make_directories([TRAINED_MODEL_DIR])

In [None]:
train = pd.read_csv('train-stock-market-data.csv')

# If you are not using the data generated from part 1 of this tutorial, make sure
# it has the columns and index in the form that could be make into the environment.
# Then you can comment and skip the following two lines.
train = train.set_index(train.columns[0])
train.index.names = ['']

In [None]:
stock_dimension = len(train.tic.unique())
state_space = 1 + 2*stock_dimension + len(INDICATORS)*stock_dimension
print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}")

Stock Dimension: 29, State Space: 291


In [None]:
buy_cost_list = sell_cost_list = [0.001] * stock_dimension
num_stock_shares = [0] * stock_dimension

env_kwargs = {
    "hmax": 100,
    "initial_amount": 1000000,
    "num_stock_shares": num_stock_shares,
    "buy_cost_pct": buy_cost_list,
    "sell_cost_pct": sell_cost_list,
    "state_space": state_space,
    "stock_dim": stock_dimension,
    "tech_indicator_list": INDICATORS,
    "action_space": stock_dimension,
    "reward_scaling": 1e-4
}


e_train_gym = StockTradingEnv(df = train, **env_kwargs)

In [None]:
env_train, _ = e_train_gym.get_sb_env()
print(type(env_train))

<class 'stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv'>


In [None]:
agent = DRLAgent(env = env_train)

# Set the corresponding values to 'True' for the algorithms that you want to use
if_using_a2c = True
if_using_ddpg = True
if_using_ppo = True
if_using_td3 = True
if_using_sac = True

In [None]:
agent = DRLAgent(env = env_train)
model_a2c = agent.get_model("a2c")

if if_using_a2c:
  # set up logger
  tmp_path = RESULTS_DIR + '/a2c'
  new_logger_a2c = configure(tmp_path, ["stdout", "csv", "tensorboard"])
  # Set new logger
  model_a2c.set_logger(new_logger_a2c)

{'n_steps': 5, 'ent_coef': 0.01, 'learning_rate': 0.0007}
Using cuda device
Logging to results/a2c


In [None]:
trained_a2c = agent.train_model(model=model_a2c,
                             tb_log_name='a2c',
                             total_timesteps=50000) if if_using_a2c else None

---------------------------------------
| time/                 |             |
|    fps                | 63          |
|    iterations         | 100         |
|    time_elapsed       | 7           |
|    total_timesteps    | 500         |
| train/                |             |
|    entropy_loss       | -41.2       |
|    explained_variance | 0.0101      |
|    learning_rate      | 0.0007      |
|    n_updates          | 99          |
|    policy_loss        | -18.5       |
|    reward             | 0.046986707 |
|    std                | 1           |
|    value_loss         | 1.04        |
---------------------------------------
-------------------------------------
| time/                 |           |
|    fps                | 79        |
|    iterations         | 200       |
|    time_elapsed       | 12        |
|    total_timesteps    | 1000      |
| train/                |           |
|    entropy_loss       | -41.3     |
|    explained_variance | 1.19e-07  |
|    learning_rate

In [None]:
trained_a2c.save(TRAINED_MODEL_DIR + "/agent_a2c") if if_using_a2c else None

In [None]:
agent = DRLAgent(env = env_train)
model_ddpg = agent.get_model("ddpg")

if if_using_ddpg:
  # set up logger
  tmp_path = RESULTS_DIR + '/ddpg'
  new_logger_ddpg = configure(tmp_path, ["stdout", "csv", "tensorboard"])
  # Set new logger
  model_ddpg.set_logger(new_logger_ddpg)

{'batch_size': 128, 'buffer_size': 50000, 'learning_rate': 0.001}
Using cuda device
Logging to results/ddpg


In [None]:
trained_ddpg = agent.train_model(model=model_ddpg,
                             tb_log_name='ddpg',
                             total_timesteps=50000) if if_using_ddpg else None

day: 2892, episode: 20
begin_total_asset: 1000000.00
end_total_asset: 5077013.26
total_reward: 4077013.26
total_cost: 4479.94
total_trades: 37153
Sharpe: 0.837
---------------------------------
| time/              |          |
|    episodes        | 4        |
|    fps             | 82       |
|    time_elapsed    | 139      |
|    total_timesteps | 11572    |
| train/             |          |
|    actor_loss      | 14.3     |
|    critic_loss     | 91.8     |
|    learning_rate   | 0.001    |
|    n_updates       | 8679     |
|    reward          | 4.443804 |
---------------------------------
---------------------------------
| time/              |          |
|    episodes        | 8        |
|    fps             | 78       |
|    time_elapsed    | 294      |
|    total_timesteps | 23144    |
| train/             |          |
|    actor_loss      | 4.44     |
|    critic_loss     | 11.6     |
|    learning_rate   | 0.001    |
|    n_updates       | 20251    |
|    reward          | 4

In [None]:
trained_ddpg.save(TRAINED_MODEL_DIR + "agent_ddpg") if if_using_ddpg else None

In [None]:
agent = DRLAgent(env = env_train)
PPO_PARAMS = {
    "n_steps": 2048,
    "ent_coef": 0.01,
    "learning_rate": 0.00025,
    "batch_size": 128,
}
model_ppo = agent.get_model("ppo",model_kwargs = PPO_PARAMS)

if if_using_ppo:
  # set up logger
  tmp_path = RESULTS_DIR + '/ppo'
  new_logger_ppo = configure(tmp_path, ["stdout", "csv", "tensorboard"])
  # Set new logger
  model_ppo.set_logger(new_logger_ppo)

{'n_steps': 2048, 'ent_coef': 0.01, 'learning_rate': 0.00025, 'batch_size': 128}
Using cuda device
Logging to results/ppo


In [None]:
trained_ppo = agent.train_model(model=model_ppo,
                             tb_log_name='ppo',
                             total_timesteps=200000) if if_using_ppo else None

trained_ppo.save(TRAINED_MODEL_DIR + "agent_ppo") if if_using_ppo else None

----------------------------------
| time/              |           |
|    fps             | 116       |
|    iterations      | 1         |
|    time_elapsed    | 17        |
|    total_timesteps | 2048      |
| train/             |           |
|    reward          | 1.1890339 |
----------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 113         |
|    iterations           | 2           |
|    time_elapsed         | 36          |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.016410885 |
|    clip_fraction        | 0.227       |
|    clip_range           | 0.2         |
|    entropy_loss         | -41.2       |
|    explained_variance   | -0.0013     |
|    learning_rate        | 0.00025     |
|    loss                 | 5.02        |
|    n_updates            | 10          |
|    policy_gradient_loss | -0.0288     |
|    reward  

In [None]:
agent = DRLAgent(env = env_train)
TD3_PARAMS = {"batch_size": 100,
              "buffer_size": 1000000,
              "learning_rate": 0.001}

model_td3 = agent.get_model("td3",model_kwargs = TD3_PARAMS)

if if_using_td3:
  # set up logger
  tmp_path = RESULTS_DIR + '/td3'
  new_logger_td3 = configure(tmp_path, ["stdout", "csv", "tensorboard"])
  # Set new logger
  model_td3.set_logger(new_logger_td3)

{'batch_size': 100, 'buffer_size': 1000000, 'learning_rate': 0.001}
Using cuda device
Logging to results/td3


In [None]:
trained_td3 = agent.train_model(model=model_td3,
                             tb_log_name='td3',
                             total_timesteps=50000) if if_using_td3 else None

trained_td3.save(TRAINED_MODEL_DIR + "agent_td3") if if_using_td3 else None

---------------------------------
| time/              |          |
|    episodes        | 4        |
|    fps             | 83       |
|    time_elapsed    | 138      |
|    total_timesteps | 11572    |
| train/             |          |
|    actor_loss      | 146      |
|    critic_loss     | 1.1e+04  |
|    learning_rate   | 0.001    |
|    n_updates       | 8679     |
|    reward          | 8.609434 |
---------------------------------
---------------------------------
| time/              |          |
|    episodes        | 8        |
|    fps             | 79       |
|    time_elapsed    | 290      |
|    total_timesteps | 23144    |
| train/             |          |
|    actor_loss      | 63.6     |
|    critic_loss     | 3.37e+03 |
|    learning_rate   | 0.001    |
|    n_updates       | 20251    |
|    reward          | 8.609434 |
---------------------------------
day: 2892, episode: 10
begin_total_asset: 1000000.00
end_total_asset: 5599582.72
total_reward: 4599582.72
total_cost

In [None]:
agent = DRLAgent(env = env_train)
SAC_PARAMS = {
    "batch_size": 128,
    "buffer_size": 100000,
    "learning_rate": 0.0001,
    "learning_starts": 100,
    "ent_coef": "auto_0.1",
}

model_sac = agent.get_model("sac",model_kwargs = SAC_PARAMS)

if if_using_sac:
  # set up logger
  tmp_path = RESULTS_DIR + '/sac'
  new_logger_sac = configure(tmp_path, ["stdout", "csv", "tensorboard"])
  # Set new logger
  model_sac.set_logger(new_logger_sac)


{'batch_size': 128, 'buffer_size': 100000, 'learning_rate': 0.0001, 'learning_starts': 100, 'ent_coef': 'auto_0.1'}
Using cuda device
Logging to results/sac


In [None]:
trained_sac = agent.train_model(model=model_sac,
                             tb_log_name='sac',
                             total_timesteps=70000) if if_using_sac else None

trained_sac.save(TRAINED_MODEL_DIR + "agent_sac") if if_using_sac else None

---------------------------------
| time/              |          |
|    episodes        | 4        |
|    fps             | 53       |
|    time_elapsed    | 216      |
|    total_timesteps | 11572    |
| train/             |          |
|    actor_loss      | 1.54e+03 |
|    critic_loss     | 270      |
|    ent_coef        | 0.28     |
|    ent_coef_loss   | 55.9     |
|    learning_rate   | 0.0001   |
|    n_updates       | 11471    |
|    reward          | 5.494194 |
---------------------------------
-----------------------------------
| time/              |            |
|    episodes        | 8          |
|    fps             | 53         |
|    time_elapsed    | 434        |
|    total_timesteps | 23144      |
| train/             |            |
|    actor_loss      | 608        |
|    critic_loss     | 80.2       |
|    ent_coef        | 0.104      |
|    ent_coef_loss   | -104       |
|    learning_rate   | 0.0001     |
|    n_updates       | 23043      |
|    reward          |

In [None]:
#from google.colab import files
#files.download("/content/trained_modelsagent_ddpg.zip")
#files.download("/content/trained_modelsagent_ppo.zip")
#files.download("/content/trained_modelsagent_sac.zip")
#files.download("/content/trained_modelsagent_td3.zip")
files.download("/content/trained_models/agent_a2c.zip")





<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>