In [1]:
import gym_trading_env


In [2]:
from gym_trading_env.downloader import download, EXCHANGE_LIMIT_RATES
import datetime

EXCHANGE_LIMIT_RATES["binanceus"] = {
    "limit" : 200, # One request will query 200 data points (aka candlesticks).
    "pause_every" : 120, # it will pause every 120 requests.
    "pause" : 2, # the pause will last 2 seconds.
}
download(
    exchange_names = ["binanceus"],
    symbols= ["BTC/USDT", "ETH/USDT", "ICP/USDT", "NEAR/USDT", "RNDR/USDT"],
    timeframe= "1h",
    dir = "data",
    since= datetime.datetime(year= 2024, month= 8, day=7),
)

BTC/USDT downloaded from binanceus and stored at data/binanceus-BTCUSDT-1h.pkl
ETH/USDT downloaded from binanceus and stored at data/binanceus-ETHUSDT-1h.pkl
ICP/USDT downloaded from binanceus and stored at data/binanceus-ICPUSDT-1h.pkl
NEAR/USDT downloaded from binanceus and stored at data/binanceus-NEARUSDT-1h.pkl
RNDR/USDT downloaded from binanceus and stored at data/binanceus-RNDRUSDT-1h.pkl


In [3]:
import datetime
import pandas as pd

# Import data
df = pd.read_pickle("./data/binanceus-ICPUSDT-1h.pkl")

for item in df:
    print("The Item Is >>>>> " + item)
    

# df is a DataFrame with columns : "open", "high", "low", "close", "Volume USD"

#df.sort_index(inplace= True)

# Create the feature : ( close[t] - close[t-1] )/ close[t-1]
df["feature_close"] = df["close"].pct_change()

# Create the feature : open[t] / close[t]
df["feature_open"] = df["open"]/df["close"]

# Create the feature : high[t] / close[t]
df["feature_high"] = df["high"]/df["close"]

# Create the feature : low[t] / close[t]
df["feature_low"] = df["low"]/df["close"]

# Create the feature : volume[t] / max(*volume[t-7*24:t+1])
df["feature_volume"] = df["volume"] / df["volume"].rolling(2).max()

df["feature_sma10"] = df["close"].rolling(10).mean()
df["feature_sma5"] = df["close"].rolling(5).mean()

df["feature_volume"] = df["volume"] / df["volume"].rolling(2).max()
df.dropna(inplace= True) # Clean again !
# Each step, the environment will return 5 inputs  : "feature_close", "feature_open", "feature_high", "feature_low", "feature_volume"

df

The Item Is >>>>> open
The Item Is >>>>> high
The Item Is >>>>> low
The Item Is >>>>> close
The Item Is >>>>> volume
The Item Is >>>>> date_close


Unnamed: 0_level_0,open,high,low,close,volume,date_close,feature_close,feature_open,feature_high,feature_low,feature_volume,feature_sma10,feature_sma5
date_open,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2024-08-07 17:00:00,7.250,7.250,7.226,7.242,42.03,2024-08-07 18:00:00,-0.003988,1.001105,1.001105,0.997791,1.000000,7.4638,7.3066
2024-08-07 18:00:00,7.151,7.151,7.106,7.106,37.43,2024-08-07 19:00:00,-0.018779,1.006333,1.006333,1.000000,0.890554,7.4092,7.2406
2024-08-07 19:00:00,7.106,7.106,7.106,7.106,0.00,2024-08-07 20:00:00,0.000000,1.000000,1.000000,1.000000,0.000000,7.3528,7.2132
2024-08-07 20:00:00,7.126,7.126,7.016,7.016,36.00,2024-08-07 21:00:00,-0.012665,1.015678,1.015678,1.000000,1.000000,7.2874,7.1482
2024-08-07 21:00:00,7.016,7.016,7.000,7.000,73.54,2024-08-07 22:00:00,-0.002281,1.002286,1.002286,1.000000,1.000000,7.2336,7.0940
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-09-13 11:00:00,8.228,8.228,8.196,8.196,8.03,2024-09-13 12:00:00,-0.005461,1.003904,1.003904,1.000000,1.000000,8.3391,8.2686
2024-09-13 12:00:00,8.183,8.183,8.183,8.183,0.80,2024-09-13 13:00:00,-0.001586,1.000000,1.000000,1.000000,0.099626,8.3067,8.2488
2024-09-13 13:00:00,8.228,8.274,8.228,8.274,1.59,2024-09-13 14:00:00,0.011121,0.994440,1.000000,0.994440,1.000000,8.2887,8.2270
2024-09-13 14:00:00,8.305,8.305,8.263,8.305,29.65,2024-09-13 15:00:00,0.003747,1.000000,1.000000,0.994943,1.000000,8.2785,8.2398


In [4]:
## Create Trading Environment
import gymnasium as gym
import numpy as np
def reward_function(history):
        return np.log(history["portfolio_valuation", -1] / history["portfolio_valuation", -2])

env = gym.make("TradingEnv",
        name= "ICPUSD",
        df = df, # Your dataset with your custom features
        positions = [ -1, 0, 1], # -1 (=SHORT), 0(=OUT), +1 (=LONG)
        reward_function = reward_function,
        trading_fees = 0.01/100, # 0.01% per stock buy / sell (Binance fees)
        borrow_interest_rate= 0.0003/100, # 0.0003% per timestep (one timestep = 1h here)
    )

In [5]:
# Instantiate the agent
from stable_baselines3 import DQN
from stable_baselines3.common.evaluation import evaluate_policy

model = DQN("MlpPolicy", env, verbose=1)
# Train the agent and display a progress bar
model.learn(total_timesteps=int(2e5), progress_bar=True)



ModuleNotFoundError: No module named 'stable_baselines3'

## Run the environment

In [None]:
# Run an episode until it ends :
done, truncated = False, False
vec_env = model.get_env()
obs = vec_env.reset()

while not done and not truncated:
    # Pick a position by its index in your position list (=[-1, 0, 1])....usually something like : position_index = your_policy(observation)
    # At every timestep, pick a random position index from your position list (=[-1, 0, 1])
    action, _states = model.predict(obs, deterministic=True)
    obs, reward, done, truncated, info = vec_env.step(action)





In [None]:
# At the end of the episode you want to render
#env.unwrapped.save_for_render(dir = "render_logs")
env.save_for_render(dir = "render_logs")

In [None]:
from gym_trading_env.renderer import Renderer
renderer = Renderer(render_logs_dir="render_logs")

# Add Custom Lines (Simple Moving Average)
renderer.add_line( name= "sma10", function= lambda df : df["close"].rolling(10).mean(), line_options ={"width" : 1, "color": "purple"})
renderer.add_line( name= "sma20", function= lambda df : df["close"].rolling(20).mean(), line_options ={"width" : 1, "color": "blue"})

renderer.run()