In [8]:
import pandas as pd
# Available in the github repo : examples/data/BTC_USD-Hourly.csv
url = "https://raw.githubusercontent.com/ClementPerroud/Gym-Trading-Env/main/examples/data/BTC_USD-Hourly.csv"
df = pd.read_csv(url, parse_dates=["date"], index_col= "date")
df.sort_index(inplace= True)
df.dropna(inplace= True)
df.drop_duplicates(inplace=True)

In [2]:
df.head()

Unnamed: 0_level_0,unix,symbol,open,high,low,close,volume,Volume USD
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2018-05-15 06:00:00,1526364000,BTC/USD,8733.86,8796.68,8707.28,8740.99,4906603.14,559.93
2018-05-15 07:00:00,1526367600,BTC/USD,8740.99,8766.0,8721.11,8739.0,2390398.89,273.58
2018-05-15 08:00:00,1526371200,BTC/USD,8739.0,8750.27,8660.53,8728.49,7986062.84,917.79
2018-05-15 09:00:00,1526374800,BTC/USD,8728.49,8754.4,8701.35,8708.32,1593991.98,182.62
2018-05-15 10:00:00,1526378400,BTC/USD,8708.32,8865.0,8695.11,8795.9,11101273.74,1260.69


In [9]:
# df is a DataFrame with columns : "open", "high", "low", "close", "Volume USD"

# Create the feature : ( close[t] - close[t-1] )/ close[t-1]
df["feature_close"] = df["close"].pct_change()

# Create the feature : open[t] / close[t]
df["feature_open"] = df["open"]/df["close"]

# Create the feature : high[t] / close[t]
df["feature_high"] = df["high"]/df["close"]

# Create the feature : low[t] / close[t]
df["feature_low"] = df["low"]/df["close"]

 # Create the feature : volume[t] / max(*volume[t-7*24:t+1])
df["feature_volume"] = df["Volume USD"] / df["Volume USD"].rolling(7*24).max()

df.dropna(inplace= True) # Clean again !
# Eatch step, the environment will return 5 inputs  : "feature_close", "feature_open", "feature_high", "feature_low", "feature_volume"


In [10]:
import gymnasium as gym
import gym_trading_env
env = gym.make("TradingEnv",
        name= "BTCUSD",
        df = df, # Your dataset with your custom features
        positions = [ -1, 0, 1], # -1 (=SHORT), 0(=OUT), +1 (=LONG)
        trading_fees = 0.01/100, # 0.01% per stock buy / sell (Binance fees)
        borrow_interest_rate= 0.0003/100, # 0.0003% per timestep (one timestep = 1h here)
    )

In [12]:
from dataclasses import dataclass
from datetime import datetime

from stable_baselines3 import PPO

human_render: bool = False
env_id: str = "CartPole-v1"
total_timesteps: int = 100_000
learning_rate: float = 0.0005
num_envs: int = 1
num_steps: int = 1024
gamma: float = 0.95
gae_lambda: float = 0.95
num_minibatches: int = 4
update_epochs: int = 128
norm_adv: bool = True
clip_coef: float = 0.2
vf_coef: float = 0.5
max_grad_norm: float = 0.5

model = PPO(
    "MlpPolicy",
    env,
    learning_rate=learning_rate,
    gamma=gamma,
    # !! batch size is the number of steps times the number of environments divided by the number of minibatches
    batch_size=(num_steps * num_envs) // num_minibatches,
    n_steps=num_steps,
    n_epochs=update_epochs,
    vf_coef=vf_coef,
    max_grad_norm=max_grad_norm,
    clip_range=clip_coef,
    gae_lambda=gae_lambda,
    normalize_advantage=norm_adv,
    tensorboard_log=f"runs/SB3_PPO_trading_{datetime.now().strftime('%Y%m%d-%H%M%S')}",
)

In [21]:
for i in range(100):
    # Run an episode until it ends :
    done, truncated = False, False
    observation, info = env.reset()

    while not done and not truncated:
        # Pick a position by its index in your position list (=[-1, 0, 1])....usually something like : position_index = your_policy(observation)
        position_index, _ = model.predict(observation)
        position_index = env.action_space.sample()
        observation, reward, done, truncated, info = env.step(position_index)

Market Return : 423.10%   |   Portfolio Return : -83.76%   |   
