In [1]:
from packages.rl.trainer import Trainer, TrainConfig
from packages.agents.sb3_ppo_agent import SB3PPOAgent
from packages.env.trading_env_windowed import WindowedSingleAssetEnv
from packages.rl.policies.transformer_extractor import TransformerFeatureExtractor

cfg = TrainConfig(
    symbol="BTC/USDT",
    timeframe="1d",
    start="2021-01-01",
    end="2023-01-01",
    window=64,
    train_mode="rl",
    policy="MlpPolicy",
    policy_kwargs=dict(
        features_extractor_class=TransformerFeatureExtractor,
        features_extractor_kwargs=dict(d_model=32, nhead=4, num_layers=1, out_dim=64),
        net_arch=[64, 64],
    ),
    env_class=WindowedSingleAssetEnv,
    env_kwargs={"window": 64},
    tb=False,
    seed=None,
)

agent = SB3PPOAgent(policy=cfg.policy, policy_kwargs=cfg.policy_kwargs)
trainer = Trainer(cfg, agent)
path = trainer.train()

[trainer] cfg: TrainConfig(symbol='BTC/USDT', timeframe='1d', start='2021-01-01', end='2023-01-01', window=64, total_timesteps=200000, save_path=None, seed=1498146094, tb=False, n_steps=None, batch_size=None, log_interval=None, train_mode='rl', policy='MlpPolicy', policy_kwargs={'features_extractor_class': <class 'packages.rl.policies.transformer_extractor.TransformerFeatureExtractor'>, 'features_extractor_kwargs': {'d_model': 32, 'nhead': 4, 'num_layers': 1, 'out_dim': 64}, 'net_arch': [64, 64]}, env_class=<class 'packages.env.trading_env_windowed.WindowedSingleAssetEnv'>, env_kwargs={'window': 64}, use_wandb=False, wandb_project='rl-bybit-ppo', wandb_run_name=None, agent_class_path=None)
[trainer] rows_raw=548  range=(2021-07-01 00:00:00+00:00 .. 2023-01-01 00:00:00+00:00)
[trainer] rows_after_features=548  max_env_steps=547 requested=200000 effective=547
[trainer] RL auto params: n_steps=91, batch_size=64, log_interval=1
Using cpu device


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=91 and n_envs=1)


----------------------------
| time/              |     |
|    fps             | 733 |
|    iterations      | 1   |
|    time_elapsed    | 0   |
|    total_timesteps | 91  |
----------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 101         |
|    iterations           | 2           |
|    time_elapsed         | 1           |
|    total_timesteps      | 182         |
| train/                  |             |
|    approx_kl            | 0.009571452 |
|    clip_fraction        | 0           |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.1        |
|    explained_variance   | 0.00406     |
|    learning_rate        | 0.0003      |
|    loss                 | 0.419       |
|    n_updates            | 10          |
|    policy_gradient_loss | -0.00624    |
|    value_loss           | 0.806       |
-----------------------------------------
-----------------------------------------

In [2]:
trainer.backtest(path, start="2023-01-01", end="2024-01-01")

{'total_reward': -23.564217735767365,
 'mean_reward': -0.0780272110455873,
 'n_steps': 302}

In [5]:
from packages.rl.trainer import Trainer, TrainConfig
from packages.agents.sklearn_agent import SklearnAgent

cfg = TrainConfig(
    symbol="BTC/USDT",
    timeframe="1d",
    start="2020-01-01",
    end="2022-12-31",
    window=64,
    train_mode="supervised",
    seed=None,
)
agent = SklearnAgent(threshold=0.51)
trainer = Trainer(cfg, agent)
path = trainer.train()

[trainer] cfg: TrainConfig(symbol='BTC/USDT', timeframe='1d', start='2020-01-01', end='2022-12-31', window=64, total_timesteps=200000, save_path=None, seed=3460813072, tb=False, n_steps=None, batch_size=None, log_interval=None, train_mode='supervised', policy='MlpPolicy', policy_kwargs={}, env_class=None, env_kwargs={}, use_wandb=False, wandb_project='rl-bybit-ppo', wandb_run_name=None, agent_class_path=None)
[trainer] rows_raw=547  range=(2021-07-01 00:00:00+00:00 .. 2022-12-31 00:00:00+00:00)
[trainer] rows_after_features=547  max_env_steps=546 requested=200000 effective=546
[trainer] supervised dataset: X=(546, 3), y_pos=0.487
[trainer] Saved model to models/agent-BTCUSDT-1d.bin


In [6]:
trainer.backtest(path, start="2023-01-01", end="2024-01-01")

{'total_reward': -33.643440415382386,
 'mean_reward': -0.09217380935721202,
 'n_steps': 365}

In [None]:
from packages.rl.trainer import Trainer, TrainConfig
from packages.agents.sb3_recurrent_ppo_agent import SB3RecurrentPPOAgent
from packages.env.trading_env_windowed import WindowedSingleAssetEnv

cfg = TrainConfig(
    symbol="BTC/USDT",
    timeframe="1d",
    start="2021-01-01",
    end="2023-01-01",
    window=64,
    train_mode="rl",
    policy="MlpLstmPolicy",
    policy_kwargs=dict(
        lstm_hidden_size=64,
        n_lstm_layers=1,
        shared_lstm=True,
        enable_critic_lstm=False,
        share_features_extractor=True,
        net_arch=[64, 64],
    ),
    env_class=WindowedSingleAssetEnv,
    env_kwargs={"window": 64},
    tb=False,
    seed=None,
    total_timesteps=100_000,
)

agent = SB3RecurrentPPOAgent(policy=cfg.policy, policy_kwargs=cfg.policy_kwargs)
trainer = Trainer(cfg, agent)
path = trainer.train()

[trainer] cfg: TrainConfig(symbol='BTC/USDT', timeframe='1d', start='2021-01-01', end='2023-01-01', window=64, total_timesteps=100000, save_path=None, seed=4204045241, tb=False, n_steps=None, batch_size=None, log_interval=None, train_mode='rl', policy='MlpLstmPolicy', policy_kwargs={'lstm_hidden_size': 64, 'n_lstm_layers': 1, 'shared_lstm': True, 'enable_critic_lstm': False, 'share_features_extractor': True, 'net_arch': [64, 64]}, env_class=<class 'packages.env.trading_env_windowed.WindowedSingleAssetEnv'>, env_kwargs={'window': 64}, use_wandb=False, wandb_project='rl-bybit-ppo', wandb_run_name=None, agent_class_path=None)
[trainer] rows_raw=548  range=(2021-07-01 00:00:00+00:00 .. 2023-01-01 00:00:00+00:00)
[trainer] rows_after_features=548  max_env_steps=547 requested=100000 effective=547
[trainer] RL auto params: n_steps=91, batch_size=64, log_interval=1
Using cpu device
-----------------------------
| time/              |      |
|    fps             | 1481 |
|    iterations      | 

In [8]:
trainer.backtest(path, start="2023-01-01", end="2024-01-01")

{'total_reward': 21.973753509521483,
 'mean_reward': 0.07276077321033604,
 'n_steps': 302}