In [47]:
import numpy as np

import pandas as pd
# Available in the github repo : examples/data/BTC_USD-Hourly.csv
url = "https://raw.githubusercontent.com/ClementPerroud/Gym-Trading-Env/main/examples/data/BTC_USD-Hourly.csv"
df = pd.read_csv(url, parse_dates=["date"], index_col= "date")
df.sort_index(inplace= True)
df.dropna(inplace= True)
df.drop_duplicates(inplace=True)

In [48]:
import pandas as pd


# Function library for indicators
def sma(data, period):
    return data.rolling(window=period).mean()

def ema(data, period):
    return data.ewm(span=period, min_periods=period).mean()

def rsi(data, period):
    delta = data.diff()
    up = delta.clip(lower=0)
    down = -delta.clip(upper=0)
    rsi = 100 - 100 / (1 + up.ewm(span=period, min_periods=period).mean() / down.ewm(span=period, min_periods=period).mean())
    return rsi

def macd(data, fast_period, slow_period, signal_period):
    ema_fast = ema(data, fast_period)
    ema_slow = ema(data, slow_period)
    macd = ema_fast - ema_slow
    signal = ema(macd, signal_period)
    return macd, signal

def bollinger_bands(data, period):
    std = data.rolling(window=period).std()
    upper = ema(data, period) + 2 * std
    lower = ema(data, period) - 2 * std
    return upper, lower

def atr(data, period):
    high_low = data["high"] - data["low"]
    close_prev_close = abs(data["close"] - data["close"].shift(1))
    tr = pd.concat([high_low, close_prev_close], axis=1).max(axis=1)
    atr = tr.rolling(window=period).mean()
    return atr

def stochastic_oscillator(data, k_period, d_period):
    low_k = data["low"].rolling(window=k_period).min()
    high_k = data["high"].rolling(window=k_period).max()
    k = 100 * (data["close"] - low_k) / (high_k - low_k)
    d = k.ewm(span=d_period, min_periods=d_period).mean()
    return k, d

def volume_ratio(data, period):
    return data["volume"].rolling(window=period).mean() / data["volume"]

# Add indicators with different measurements
df["feature_SMA_10"] = sma(df["close"], 10)
df["feature_SMA_50"] = sma(df["close"], 50)
df["feature_EMA_20"] = ema(df["close"], 20)
df["feature_RSI_7"] = rsi(df["close"], 7)
df["feature_RSI_14"] = rsi(df["close"], 14)
df["feature_MACD_12_26_9"] = macd(df["close"], 12, 26, 9)[0]
df["feature_MACD_signal_9"] = macd(df["close"], 12, 26, 9)[1]
df["feature_Bollinger_Bands_Upper_20"] = bollinger_bands(df["close"], 20)[0]
df["feature_Bollinger_Bands_Lower_20"] = bollinger_bands(df["close"], 20)[1]

In [49]:
# df is a DataFrame with columns : "open", "high", "low", "close", "Volume USD"

# Create the feature : ( close[t] - close[t-1] )/ close[t-1]
df["feature_close"] = df["close"].pct_change()

# Create the feature : open[t] / close[t]
df["feature_open"] = df["open"]/df["close"]

# Create the feature : high[t] / close[t]
df["feature_high"] = df["high"]/df["close"]

# Create the feature : low[t] / close[t]
df["feature_low"] = df["low"]/df["close"]

 # Create the feature : volume[t] / max(*volume[t-7*24:t+1])
df["feature_volume"] = df["Volume USD"] / df["Volume USD"].rolling(7*24).max()

df.dropna(inplace= True) # Clean again !
# Eatch step, the environment will return 5 inputs  : "feature_close", "feature_open", "feature_high", "feature_low", "feature_volume"

In [50]:
import gymnasium as gym
import environments

#from environments import TradingEnv

In [51]:
import gym_trading_env

In [52]:
#import gymnasium as gym
#import gym_trading_env
#env = gym.make('MultiDatasetTradingEnv',
#    dataset_dir = 'preprocessed_data/*.pkl',
#    positions=[-1, 0, 1],  # -1 (=SHORT), 0(=OUT), +1 (=LONG)
#)

In [53]:
# Run 10 episodes
#for _ in range(10):
  # At every episode, the env will pick a new dataset.
#  done, truncated = False, False
#  observation, info = env.reset()
#  while not done and not truncated:
#      position_index = env.action_space.sample() # Pick random position index
#      observation, reward, done, truncated, info = env.step(position_index)

In [54]:
from environments import TradingEnv

# Create the environment directly
env = TradingEnv(
    name="BTCUSD",
    df=df,  # Your dataset with your custom features
    positions=[-0.9, 0.0, 0.9],  # -1 (=SHORT), 0(=OUT), +1 (=LONG)
    trading_fees=0.01/100,  # 0.01% per stock buy/sell (Binance fees)
    borrow_interest_rate=0.0003/100,  # 0.0003% per timestep (one timestep = 1h here)
)

In [55]:
from stable_baselines3 import PPO
from stable_baselines3 import SAC
from stable_baselines3.common.vec_env import VecEnv

In [56]:
policy = "MlpPolicy"

In [57]:
model = PPO(policy, env, verbose=1)

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [58]:
# Experiment with different hyperparameters based on your problem and environment
model = PPO(
    "MlpPolicy",
    env,
    verbose=1,
    learning_rate=0.01,  # Adjust as needed
    batch_size=32,  # Adjust based on memory and learning speed
    gamma=0.5,  # Discount factor
)


Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [59]:
model.learn(total_timesteps=100000)

-----------------------------
| time/              |      |
|    fps             | 1505 |
|    iterations      | 1    |
|    time_elapsed    | 1    |
|    total_timesteps | 2048 |
-----------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 892         |
|    iterations           | 2           |
|    time_elapsed         | 4           |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.032951437 |
|    clip_fraction        | 0.53        |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.05       |
|    explained_variance   | -4.17       |
|    learning_rate        | 0.01        |
|    loss                 | 0.00538     |
|    n_updates            | 10          |
|    policy_gradient_loss | 0.0236      |
|    value_loss           | 0.0156      |
-----------------------------------------
----------------------------------

KeyboardInterrupt: 

In [None]:
model.save("my_trading_agent")

In [None]:
# Create the environment directly
env2 = TradingEnv(
    name="BTCUSD",
    df=df,  # Your dataset with your custom features
    positions=[-0.9, 0, 0.9],  # -1 (=SHORT), 0(=OUT), +1 (=LONG)
    trading_fees=0.01/100,  # 0.01% per stock buy/sell (Binance fees)
    borrow_interest_rate=0.0003/100,  # 0.0003% per timestep (one timestep = 1h here)
)

In [None]:
# Run an episode until it ends :
done, truncated = False, False
obs, info = env2.reset()
while not done and not truncated:
    action, _ = model.predict(obs)
    obs, reward, done, truncated, info = env2.step(action)
    # ... (evaluate further)

In [None]:
# At the end of the episode you want to render
env2.unwrapped.save_for_render(dir="render_logs")