In [73]:
import numpy as np
import datetime
import pandas as pd
# Available in the github repo : examples/data/BTC_USD-Hourly.csv
url = "https://raw.githubusercontent.com/ClementPerroud/Gym-Trading-Env/main/examples/data/BTC_USD-Hourly.csv"
df = pd.read_csv(url, parse_dates=["date"], index_col= "date")
df.sort_index(inplace= True)
df.dropna(inplace= True)
df.drop_duplicates(inplace=True)

URLError: <urlopen error [Errno -3] Temporary failure in name resolution>

In [None]:
# Function library for indicators
def sma(data, period):
    return data.rolling(window=period).mean()

def ema(data, period):
    return data.ewm(span=period, min_periods=period).mean()

def rsi(data, period):
    delta = data.diff()
    up = delta.clip(lower=0)
    down = -delta.clip(upper=0)
    rsi = 100 - 100 / (1 + up.ewm(span=period, min_periods=period).mean() / down.ewm(span=period, min_periods=period).mean())
    return rsi

def macd(data, fast_period, slow_period, signal_period):
    ema_fast = ema(data, fast_period)
    ema_slow = ema(data, slow_period)
    macd = ema_fast - ema_slow
    signal = ema(macd, signal_period)
    return macd, signal

def bollinger_bands(data, period):
    std = data.rolling(window=period).std()
    upper = ema(data, period) + 2 * std
    lower = ema(data, period) - 2 * std
    return upper, lower

def atr(data, period):
    high_low = data["high"] - data["low"]
    close_prev_close = abs(data["close"] - data["close"].shift(1))
    tr = pd.concat([high_low, close_prev_close], axis=1).max(axis=1)
    atr = tr.rolling(window=period).mean()
    return atr

def stochastic_oscillator(data, k_period, d_period):
    low_k = data["low"].rolling(window=k_period).min()
    high_k = data["high"].rolling(window=k_period).max()
    k = 100 * (data["close"] - low_k) / (high_k - low_k)
    d = k.ewm(span=d_period, min_periods=d_period).mean()
    return k, d

def volume_ratio(data, period):
    return data["volume"].rolling(window=period).mean() / data["volume"]

# Add indicators with different measurements
df["feature_SMA_10"] = sma(df["close"], 10)
df["feature_SMA_50"] = sma(df["close"], 50)
df["feature_EMA_20"] = ema(df["close"], 20)
df["feature_RSI_7"] = rsi(df["close"], 7)
df["feature_RSI_14"] = rsi(df["close"], 14)
df["feature_MACD_12_26_9"] = macd(df["close"], 12, 26, 9)[0]
df["feature_MACD_signal_9"] = macd(df["close"], 12, 26, 9)[1]
df["feature_Bollinger_Bands_Upper_20"] = bollinger_bands(df["close"], 20)[0]
df["feature_Bollinger_Bands_Lower_20"] = bollinger_bands(df["close"], 20)[1]

In [None]:
# df is a DataFrame with columns : "open", "high", "low", "close", "Volume USD"

# Create the feature : ( close[t] - close[t-1] )/ close[t-1]
df["feature_close"] = df["close"].pct_change()

# Create the feature : open[t] / close[t]
df["feature_open"] = df["open"]/df["close"]

# Create the feature : high[t] / close[t]
df["feature_high"] = df["high"]/df["close"]

# Create the feature : low[t] / close[t]
df["feature_low"] = df["low"]/df["close"]

 # Create the feature : volume[t] / max(*volume[t-7*24:t+1])
df["feature_volume"] = df["Volume USD"] / df["Volume USD"].rolling(7*24).max()

df.dropna(inplace= True) # Clean again !
# Eatch step, the environment will return 5 inputs  : "feature_close", "feature_open", "feature_high", "feature_low", "feature_volume"

In [None]:
import gymnasium as gym
import environments

#from environments import TradingEnv

In [None]:
import gym_trading_env

In [None]:
#import gymnasium as gym
#import gym_trading_env
#env = gym.make('MultiDatasetTradingEnv',
#    dataset_dir = 'preprocessed_data/*.pkl',
#    positions=[-1, 0, 1],  # -1 (=SHORT), 0(=OUT), +1 (=LONG)
#)

In [None]:
# Run 10 episodes
#for _ in range(10):
  # At every episode, the env will pick a new dataset.
#  done, truncated = False, False
#  observation, info = env.reset()
#  while not done and not truncated:
#      position_index = env.action_space.sample() # Pick random position index
#      observation, reward, done, truncated, info = env.step(position_index)

In [None]:
from environments import TradingEnv

# Create the environment directly
env = TradingEnv(
    name="BTCUSD",
    df=df,  # Your dataset with your custom features
    positions=[-0.9, 0.0, 0.9],  # -1 (=SHORT), 0(=OUT), +1 (=LONG)
    trading_fees=0.01/100,  # 0.01% per stock buy/sell (Binance fees)
    borrow_interest_rate=0.0003/100,  # 0.0003% per timestep (one timestep = 1h here)
)

In [None]:
from stable_baselines3 import PPO
from stable_baselines3 import SAC
from stable_baselines3.common.vec_env import VecEnv

In [74]:
policy = "MlpPolicy"

In [75]:
model = PPO(policy, env, verbose=1)

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [76]:
# Experiment with different hyperparameters based on your problem and environment
model = PPO(
    "MlpPolicy",
    env,
    verbose=1,
    learning_rate=0.05,  # Adjust as needed
    batch_size=32,  # Adjust based on memory and learning speed
    gamma=0.5,  # Discount factor
)


Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [77]:
model.learn(total_timesteps=500000)

-----------------------------
| time/              |      |
|    fps             | 1600 |
|    iterations      | 1    |
|    time_elapsed    | 1    |
|    total_timesteps | 2048 |
-----------------------------
---------------------------------------
| time/                   |           |
|    fps                  | 917       |
|    iterations           | 2         |
|    time_elapsed         | 4         |
|    total_timesteps      | 4096      |
| train/                  |           |
|    approx_kl            | 13.886714 |
|    clip_fraction        | 0.998     |
|    clip_range           | 0.2       |
|    entropy_loss         | -0.00732  |
|    explained_variance   | -204      |
|    learning_rate        | 0.05      |
|    loss                 | 0.169     |
|    n_updates            | 10        |
|    policy_gradient_loss | 0.36      |
|    value_loss           | 0.196     |
---------------------------------------
---------------------------------------
| time/                   |   

---------------------------------------
| time/                   |           |
|    fps                  | 666       |
|    iterations           | 14        |
|    time_elapsed         | 43        |
|    total_timesteps      | 28672     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -1.66e-06 |
|    explained_variance   | 0         |
|    learning_rate        | 0.05      |
|    loss                 | 0.0132    |
|    n_updates            | 130       |
|    policy_gradient_loss | -1.32e-08 |
|    value_loss           | 0.0293    |
---------------------------------------
---------------------------------------
| time/                   |           |
|    fps                  | 666       |
|    iterations           | 15        |
|    time_elapsed         | 46        |
|    total_timesteps      | 30720     |
| train/                  |           |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 3.31e+04  |
|    ep_rew_mean          | 1.53      |
| time/                   |           |
|    fps                  | 649       |
|    iterations           | 25        |
|    time_elapsed         | 78        |
|    total_timesteps      | 51200     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -1.66e-06 |
|    explained_variance   | 0.00018   |
|    learning_rate        | 0.05      |
|    loss                 | 0.0128    |
|    n_updates            | 240       |
|    policy_gradient_loss | 4.17e-08  |
|    value_loss           | 0.0293    |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 3.31e+04  |
|    ep_rew_mean          | 1.53      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 3.31e+04  |
|    ep_rew_mean          | 1.55      |
| time/                   |           |
|    fps                  | 648       |
|    iterations           | 35        |
|    time_elapsed         | 110       |
|    total_timesteps      | 71680     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -1.66e-06 |
|    explained_variance   | 0         |
|    learning_rate        | 0.05      |
|    loss                 | 0.0127    |
|    n_updates            | 340       |
|    policy_gradient_loss | -2.79e-08 |
|    value_loss           | 0.0292    |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 3.31e+04  |
|    ep_rew_mean          | 1.55      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 3.31e+04  |
|    ep_rew_mean          | 1.55      |
| time/                   |           |
|    fps                  | 645       |
|    iterations           | 45        |
|    time_elapsed         | 142       |
|    total_timesteps      | 92160     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -1.66e-06 |
|    explained_variance   | -1.19e-07 |
|    learning_rate        | 0.05      |
|    loss                 | 0.0129    |
|    n_updates            | 440       |
|    policy_gradient_loss | -2.58e-08 |
|    value_loss           | 0.0294    |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 3.31e+04  |
|    ep_rew_mean          | 1.55      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 3.31e+04  |
|    ep_rew_mean          | 1.56      |
| time/                   |           |
|    fps                  | 645       |
|    iterations           | 55        |
|    time_elapsed         | 174       |
|    total_timesteps      | 112640    |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -1.66e-06 |
|    explained_variance   | 5.96e-08  |
|    learning_rate        | 0.05      |
|    loss                 | 0.0284    |
|    n_updates            | 540       |
|    policy_gradient_loss | 9.41e-09  |
|    value_loss           | 0.0351    |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 3.31e+04  |
|    ep_rew_mean          | 1.56      |


Market Return : 423.10%   |   Portfolio Return : 380.74%   |   
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 3.31e+04  |
|    ep_rew_mean          | 1.56      |
| time/                   |           |
|    fps                  | 646       |
|    iterations           | 65        |
|    time_elapsed         | 205       |
|    total_timesteps      | 133120    |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -1.66e-06 |
|    explained_variance   | -1.19e-07 |
|    learning_rate        | 0.05      |
|    loss                 | 0.0262    |
|    n_updates            | 640       |
|    policy_gradient_loss | 2.06e-08  |
|    value_loss           | 0.034     |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 3.31e+04  |
|    ep_rew_mean          | 1.56      |
| time/                   |           |
|    fps                  | 643       |
|    iterations           | 75        |
|    time_elapsed         | 238       |
|    total_timesteps      | 153600    |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -1.66e-06 |
|    explained_variance   | 0         |
|    learning_rate        | 0.05      |
|    loss                 | 0.0248    |
|    n_updates            | 740       |
|    policy_gradient_loss | 4.4e-08   |
|    value_loss           | 0.033     |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 3.31e+04  |
|    ep_rew_mean          | 1.56      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 3.31e+04  |
|    ep_rew_mean          | 1.56      |
| time/                   |           |
|    fps                  | 638       |
|    iterations           | 85        |
|    time_elapsed         | 272       |
|    total_timesteps      | 174080    |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -1.66e-06 |
|    explained_variance   | -1.19e-07 |
|    learning_rate        | 0.05      |
|    loss                 | 0.0241    |
|    n_updates            | 840       |
|    policy_gradient_loss | -1.77e-08 |
|    value_loss           | 0.0323    |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 3.31e+04  |
|    ep_rew_mean          | 1.56      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 3.31e+04  |
|    ep_rew_mean          | 1.56      |
| time/                   |           |
|    fps                  | 622       |
|    iterations           | 95        |
|    time_elapsed         | 312       |
|    total_timesteps      | 194560    |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -1.66e-06 |
|    explained_variance   | 0         |
|    learning_rate        | 0.05      |
|    loss                 | 0.0225    |
|    n_updates            | 940       |
|    policy_gradient_loss | -2.57e-08 |
|    value_loss           | 0.032     |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 3.31e+04  |
|    ep_rew_mean          | 1.56      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 3.31e+04  |
|    ep_rew_mean          | 1.56      |
| time/                   |           |
|    fps                  | 604       |
|    iterations           | 105       |
|    time_elapsed         | 355       |
|    total_timesteps      | 215040    |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -1.66e-06 |
|    explained_variance   | 0         |
|    learning_rate        | 0.05      |
|    loss                 | 0.0217    |
|    n_updates            | 1040      |
|    policy_gradient_loss | 2.57e-08  |
|    value_loss           | 0.0314    |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 3.31e+04  |
|    ep_rew_mean          | 1.56      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 3.31e+04  |
|    ep_rew_mean          | 1.56      |
| time/                   |           |
|    fps                  | 600       |
|    iterations           | 115       |
|    time_elapsed         | 392       |
|    total_timesteps      | 235520    |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -1.66e-06 |
|    explained_variance   | 0         |
|    learning_rate        | 0.05      |
|    loss                 | 0.021     |
|    n_updates            | 1140      |
|    policy_gradient_loss | 5.21e-08  |
|    value_loss           | 0.031     |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 3.31e+04  |
|    ep_rew_mean          | 1.56      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 3.31e+04  |
|    ep_rew_mean          | 1.56      |
| time/                   |           |
|    fps                  | 599       |
|    iterations           | 125       |
|    time_elapsed         | 427       |
|    total_timesteps      | 256000    |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -1.66e-06 |
|    explained_variance   | 0         |
|    learning_rate        | 0.05      |
|    loss                 | 0.0197    |
|    n_updates            | 1240      |
|    policy_gradient_loss | -8.83e-08 |
|    value_loss           | 0.0306    |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 3.31e+04  |
|    ep_rew_mean          | 1.56      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 3.31e+04  |
|    ep_rew_mean          | 1.56      |
| time/                   |           |
|    fps                  | 593       |
|    iterations           | 135       |
|    time_elapsed         | 465       |
|    total_timesteps      | 276480    |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -1.66e-06 |
|    explained_variance   | 0         |
|    learning_rate        | 0.05      |
|    loss                 | 0.0191    |
|    n_updates            | 1340      |
|    policy_gradient_loss | 1.5e-08   |
|    value_loss           | 0.0305    |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 3.31e+04  |
|    ep_rew_mean          | 1.56      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 3.31e+04  |
|    ep_rew_mean          | 1.56      |
| time/                   |           |
|    fps                  | 592       |
|    iterations           | 145       |
|    time_elapsed         | 501       |
|    total_timesteps      | 296960    |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -1.66e-06 |
|    explained_variance   | 1.19e-07  |
|    learning_rate        | 0.05      |
|    loss                 | 0.0184    |
|    n_updates            | 1440      |
|    policy_gradient_loss | -2.61e-09 |
|    value_loss           | 0.0302    |
---------------------------------------
Market Return : 423.10%   |   Portfolio Return : 380.79%   |   
---------------------------------------
| rollout/                |           |
|    ep_len_mean

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 3.31e+04  |
|    ep_rew_mean          | 1.57      |
| time/                   |           |
|    fps                  | 592       |
|    iterations           | 155       |
|    time_elapsed         | 535       |
|    total_timesteps      | 317440    |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -1.66e-06 |
|    explained_variance   | 0.000138  |
|    learning_rate        | 0.05      |
|    loss                 | 0.0175    |
|    n_updates            | 1540      |
|    policy_gradient_loss | -2.8e-09  |
|    value_loss           | 0.0302    |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 3.31e+04  |
|    ep_rew_mean          | 1.57      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 3.31e+04  |
|    ep_rew_mean          | 1.57      |
| time/                   |           |
|    fps                  | 590       |
|    iterations           | 165       |
|    time_elapsed         | 572       |
|    total_timesteps      | 337920    |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -1.66e-06 |
|    explained_variance   | -1.19e-07 |
|    learning_rate        | 0.05      |
|    loss                 | 0.0172    |
|    n_updates            | 1640      |
|    policy_gradient_loss | 1.07e-08  |
|    value_loss           | 0.03      |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 3.31e+04  |
|    ep_rew_mean          | 1.57      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 3.31e+04  |
|    ep_rew_mean          | 1.57      |
| time/                   |           |
|    fps                  | 590       |
|    iterations           | 175       |
|    time_elapsed         | 607       |
|    total_timesteps      | 358400    |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -1.66e-06 |
|    explained_variance   | 0         |
|    learning_rate        | 0.05      |
|    loss                 | 0.0169    |
|    n_updates            | 1740      |
|    policy_gradient_loss | 3.2e-09   |
|    value_loss           | 0.0299    |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 3.31e+04  |
|    ep_rew_mean          | 1.57      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 3.31e+04  |
|    ep_rew_mean          | 1.57      |
| time/                   |           |
|    fps                  | 589       |
|    iterations           | 185       |
|    time_elapsed         | 643       |
|    total_timesteps      | 378880    |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -1.66e-06 |
|    explained_variance   | 5.96e-08  |
|    learning_rate        | 0.05      |
|    loss                 | 0.0171    |
|    n_updates            | 1840      |
|    policy_gradient_loss | -4.86e-08 |
|    value_loss           | 0.0297    |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 3.31e+04  |
|    ep_rew_mean          | 1.57      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 3.31e+04  |
|    ep_rew_mean          | 1.57      |
| time/                   |           |
|    fps                  | 583       |
|    iterations           | 195       |
|    time_elapsed         | 684       |
|    total_timesteps      | 399360    |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -1.66e-06 |
|    explained_variance   | 0         |
|    learning_rate        | 0.05      |
|    loss                 | 0.017     |
|    n_updates            | 1940      |
|    policy_gradient_loss | -1.51e-08 |
|    value_loss           | 0.0297    |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 3.31e+04  |
|    ep_rew_mean          | 1.57      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 3.31e+04  |
|    ep_rew_mean          | 1.57      |
| time/                   |           |
|    fps                  | 580       |
|    iterations           | 205       |
|    time_elapsed         | 723       |
|    total_timesteps      | 419840    |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -1.66e-06 |
|    explained_variance   | -1.19e-07 |
|    learning_rate        | 0.05      |
|    loss                 | 0.0163    |
|    n_updates            | 2040      |
|    policy_gradient_loss | 3.75e-08  |
|    value_loss           | 0.0296    |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 3.31e+04  |
|    ep_rew_mean          | 1.57      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 3.31e+04  |
|    ep_rew_mean          | 1.57      |
| time/                   |           |
|    fps                  | 578       |
|    iterations           | 215       |
|    time_elapsed         | 760       |
|    total_timesteps      | 440320    |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -1.66e-06 |
|    explained_variance   | 0         |
|    learning_rate        | 0.05      |
|    loss                 | 0.0159    |
|    n_updates            | 2140      |
|    policy_gradient_loss | 2.8e-08   |
|    value_loss           | 0.0295    |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 3.31e+04  |
|    ep_rew_mean          | 1.57      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 3.31e+04  |
|    ep_rew_mean          | 1.57      |
| time/                   |           |
|    fps                  | 579       |
|    iterations           | 225       |
|    time_elapsed         | 795       |
|    total_timesteps      | 460800    |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -1.66e-06 |
|    explained_variance   | 0         |
|    learning_rate        | 0.05      |
|    loss                 | 0.0152    |
|    n_updates            | 2240      |
|    policy_gradient_loss | -3.33e-08 |
|    value_loss           | 0.0295    |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 3.31e+04  |
|    ep_rew_mean          | 1.57      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 3.31e+04  |
|    ep_rew_mean          | 1.57      |
| time/                   |           |
|    fps                  | 579       |
|    iterations           | 235       |
|    time_elapsed         | 830       |
|    total_timesteps      | 481280    |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -1.66e-06 |
|    explained_variance   | 1.79e-07  |
|    learning_rate        | 0.05      |
|    loss                 | 0.0153    |
|    n_updates            | 2340      |
|    policy_gradient_loss | -7.88e-08 |
|    value_loss           | 0.0295    |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 3.31e+04  |
|    ep_rew_mean          | 1.57      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 3.31e+04  |
|    ep_rew_mean          | 1.57      |
| time/                   |           |
|    fps                  | 578       |
|    iterations           | 245       |
|    time_elapsed         | 867       |
|    total_timesteps      | 501760    |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -1.66e-06 |
|    explained_variance   | 1.19e-07  |
|    learning_rate        | 0.05      |
|    loss                 | 0.0152    |
|    n_updates            | 2440      |
|    policy_gradient_loss | -5e-08    |
|    value_loss           | 0.0294    |
---------------------------------------


<stable_baselines3.ppo.ppo.PPO at 0x7f91788ecc90>

In [82]:
model.save("my_trading_agent")

In [83]:
# Create the environment directly
env2 = TradingEnv(
    name="BTCUSD",
    df=df,  # Your dataset with your custom features
    positions=[-0.9, 0, 0.9],  # -1 (=SHORT), 0(=OUT), +1 (=LONG)
    trading_fees=0.01/100,  # 0.01% per stock buy/sell (Binance fees)
    borrow_interest_rate=0.0003/100,  # 0.0003% per timestep (one timestep = 1h here)
)

In [84]:
# Run an episode until it ends :
done, truncated = False, False
obs, info = env2.reset()
while not done and not truncated:
    action, _ = model.predict(obs)
    obs, reward, done, truncated, info = env2.step(action)
    # ... (evaluate further)

Market Return : 423.10%   |   Portfolio Return : 380.74%   |   


In [85]:
# At the end of the episode you want to render
env2.unwrapped.save_for_render(dir="render_logs")