In [1]:
import numpy as np
import matplotlib.pyplot as plt
import gymnasium as gym
from tqdm.notebook import tqdm 
from sklearn.preprocessing import robust_scale

from rainbow.agent import Rainbow

import sys
import gym_trading_env
import nest_asyncio


2024-05-29 22:51:07.902756: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-05-29 22:51:07.902834: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-05-29 22:51:07.938389: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-05-29 22:51:07.994544: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


# Create environments

In [2]:
def add_features(df):
    
    df["feature_close"] = robust_scale(df["close"].pct_change(fill_method=None))
    df["feature_open"] = robust_scale(df["open"]/df["close"])
    df["feature_high"] = robust_scale(df["high"]/df["close"])
    df["feature_low"] = robust_scale(df["low"]/df["close"])
    df["feature_volume"] = robust_scale(df["volume"] / df["volume"].rolling(7*24).max())

    df.dropna(inplace= True) # Clean your data !

    return df


def reward_function(history):
    position = history["position", -1]
    last_position = history["position", -2]
    data_close = history["data_close", -1]
    last_data_close = history["data_close", -2]    

    if position == 0:
        reward =  last_data_close - data_close
    if position == 1:
        reward =  data_close - last_data_close

    # fees
    if position != last_position:
        reward = reward - (history["portfolio_valuation", -2] * 0.0001)

    # h = history[-1]

    # print(h["step"], h["position"], h["data_close"], h["portfolio_valuation"], reward)

    return 3*reward

    

    #return 800*np.log(history["portfolio_valuation", -1] / history["portfolio_valuation", -2]) #log (p_t / p_t-1 )

def max_drawdown(history):
    networth_array = history['portfolio_valuation']
    _max_networth = networth_array[0]
    _max_drawdown = 0
    for networth in networth_array:
        if networth > _max_networth:
            _max_networth = networth
        drawdown = ( networth - _max_networth ) / _max_networth
        if drawdown < _max_drawdown:
            _max_drawdown = drawdown
    return f"{_max_drawdown*100:5.2f}%"

def make_env(dir):
    print(dir)
    env = gym.make(
        "MultiDatasetTradingEnv",
        
        
        dataset_dir= dir,
        preprocess= add_features,
        windows= 15,
        positions = [0, 1], # From -1 (=SHORT), to +1 (=LONG)
        initial_position = 0,
        trading_fees = 0.01/100, # 0.01% per stock buy / sell (Binance fees)
        borrow_interest_rate= 0.0003/100, # 0.0003% per timestep (= 1h here)
        reward_function = reward_function,
        portfolio_initial_value = 1000, # here, in USDT
        
        verbose= 1,
    )
    env.unwrapped.add_metric('Position Changes', lambda history : f"{ 100*np.sum(np.diff(history['position']) != 0)/len(history['position']):5.2f}%" )
    env.unwrapped.add_metric('Max Drawdown', max_drawdown)
    return env


training_envs = gym.vector.SyncVectorEnv([lambda: make_env("data/processed/training/*.pkl") for _ in range(6)])
validation_envs = gym.vector.SyncVectorEnv([lambda: make_env("data/processed/validation/*.pkl") for _ in range(6)])


data/processed/training/*.pkl
data/processed/training/*.pkl
data/processed/training/*.pkl
data/processed/training/*.pkl
data/processed/training/*.pkl
data/processed/training/*.pkl
data/processed/validation/*.pkl
data/processed/validation/*.pkl
data/processed/validation/*.pkl
data/processed/validation/*.pkl
data/processed/validation/*.pkl
data/processed/validation/*.pkl


In [3]:
agent = Rainbow(
    simultaneous_training_env = 6,
    
    # Distributional
    distributional= True,
    v_min= -200,
    v_max = 250,
    nb_atoms= 51, 
    # Prioritized Replay
    prioritized_replay = True,
    prioritized_replay_alpha= 0.5,
    prioritized_replay_beta_function = lambda episode, step : min(1, 0.5 + 0.5*step/150_000),
    
    # General
    multi_steps = 3,
    nb_states = 7,
    nb_actions = 2,
    gamma = 0.99,
    replay_capacity = 1E7,
    tau = 2000,
    
    # Model
    window= 15,
    units = [16,16,16],
    dropout= 0.2,
    adversarial= True,
    noisy= False,
    learning_rate = 3*2.5E-4,

    batch_size= 128,
    train_every = 10,
    epsilon_function = lambda episode, step : max(0.001, (1 - 2E-5)** step),
    name = "Rainbow",
)

2024-05-23 17:16:30.677488: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-05-23 17:16:30.703335: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-05-23 17:16:30.703387: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-05-23 17:16:30.706337: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-05-23 17:16:30.706546: I external/local_xla/xla/stream_executor

In [4]:
agent.epsilon_function = lambda episode, step : max(0.02, (1 - 1E-5)** step)

In [4]:
def train(steps = 100_000):
    print("___________________________________________ TRAINING ___________________________________________")
    if 'obs' not in globals():
        global obs
        obs, info = training_envs.reset()
    for _ in tqdm(range(steps), mininterval=int(30)):
        actions = agent.e_greedy_pick_actions(obs)
        next_obs, rewards, dones, truncateds, infos = training_envs.step(actions)

        agent.store_replays(obs, actions, rewards, next_obs, dones, truncateds)
        agent.train()

        obs = next_obs

def evaluation():
    print("___________________________________________ VALIDATION ___________________________________________")
    val_obs, info = validation_envs.reset()
    check = np.array([False for _ in range(val_obs.shape[0])])
    while not np.all(check):
        actions = agent.e_greedy_pick_actions(val_obs)
        next_obs, rewards, dones, truncateds, infos = validation_envs.step(actions)
        val_obs = next_obs
        check += dones + truncateds

In [None]:
import time

t_end = time.time() + 60 * 60 * 8.4
# this will run for x minutes
while time.time() < t_end:
    print('___________________________________________ ')
    print(f'--------> Timeleft: {int(t_end - time.time())}s')
    train(steps = 88000)
    evaluation()

___________________________________________ 
--------> Timeleft: 30239s
___________________________________________ TRAINING ___________________________________________


  0%|          | 0/88000 [00:00<?, ?it/s]

Market Return : 1031.21%   |   Portfolio Return : 1754.89%   |   Position Changes : 24.33%   |   Max Drawdown : -55.41%   |   
↳ Env 5 : 010 :   922232   |   04:56:17   |   Epsilon :  0.10%
Market Return : 1033.61%   |   Portfolio Return : 1021.33%   |   Position Changes : 24.35%   |   Max Drawdown : -64.02%   |   
↳ Env 3 : 010 :   922246   |   04:56:18   |   Epsilon :  0.10%
Market Return : 1033.61%   |   Portfolio Return : 871.07%   |   Position Changes : 24.33%   |   Max Drawdown : -66.86%   |   
↳ Env 0 : 010 :   922265   |   04:56:19   |   Epsilon :  0.10%
Market Return : 1033.61%   |   Portfolio Return : 937.16%   |   Position Changes : 24.32%   |   Max Drawdown : -65.38%   |   
↳ Env 2 : 010 :   922267   |   04:56:19   |   Epsilon :  0.10%
Market Return : 1033.61%   |   Portfolio Return : 979.30%   |   Position Changes : 24.32%   |   Max Drawdown : -63.20%   |   
↳ Env 4 : 010 :   922284   |   04:56:20   |   Epsilon :  0.10%
Market Return : 1031.21%   |   Portfolio Return : 157

  0%|          | 0/88000 [00:00<?, ?it/s]

Market Return : 1031.21%   |   Portfolio Return : 1567.53%   |   Position Changes : 23.70%   |   Max Drawdown : -52.96%   |   
↳ Env 5 : 011 :  1006084   |   05:42:07   |   Epsilon :  0.10%
Market Return : 1031.21%   |   Portfolio Return : 1825.08%   |   Position Changes : 23.70%   |   Max Drawdown : -54.00%   |   
↳ Env 3 : 011 :  1006098   |   05:42:08   |   Epsilon :  0.10%
Market Return : 1033.61%   |   Portfolio Return : 2188.28%   |   Position Changes : 23.72%   |   Max Drawdown : -61.45%   |   
↳ Env 0 : 011 :  1006100   |   05:42:08   |   Epsilon :  0.10%
Market Return : 1031.21%   |   Portfolio Return : 2092.64%   |   Position Changes : 23.67%   |   Max Drawdown : -55.37%   |   
Market Return : 1033.61%   |   Portfolio Return : 1787.40%   |   Position Changes : 23.70%   |   Max Drawdown : -61.27%   |   
↳ Env 2 : 011 :  1006119   |   05:42:10   |   Epsilon :  0.10%
↳ Env 4 : 011 :  1006119   |   05:42:10   |   Epsilon :  0.10%
Market Return : 1031.21%   |   Portfolio Return : 

  0%|          | 0/88000 [00:00<?, ?it/s]

Market Return : 1033.61%   |   Portfolio Return : 2690.76%   |   Position Changes : 24.88%   |   Max Drawdown : -57.13%   |   
↳ Env 5 : 012 :  1089919   |   06:35:46   |   Epsilon :  0.10%
Market Return : 1033.61%   |   Portfolio Return : 2820.55%   |   Position Changes : 24.93%   |   Max Drawdown : -54.16%   |   
↳ Env 3 : 012 :  1089933   |   06:35:47   |   Epsilon :  0.10%
Market Return : 1033.61%   |   Portfolio Return : 2819.98%   |   Position Changes : 24.92%   |   Max Drawdown : -54.95%   |   
↳ Env 0 : 012 :  1089935   |   06:35:47   |   Epsilon :  0.10%
Market Return : 1033.61%   |   Portfolio Return : 2661.98%   |   Position Changes : 24.90%   |   Max Drawdown : -57.97%   |   
↳ Env 2 : 012 :  1089954   |   06:35:48   |   Epsilon :  0.10%
Market Return : 1031.21%   |   Portfolio Return : 1780.83%   |   Position Changes : 24.70%   |   Max Drawdown : -54.26%   |   
↳ Env 4 : 012 :  1089971   |   06:35:49   |   Epsilon :  0.10%
Market Return : 1033.61%   |   Portfolio Return : 

In [6]:
evaluation()
evaluation()

___________________________________________ VALIDATION ___________________________________________


2024-05-22 09:42:01.867657: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:454] Loaded cuDNN version 8904


Market Return : 40.69%   |   Portfolio Return :  5.31%   |   Position Changes : 21.14%   |   Max Drawdown : -15.75%   |   
Market Return : 40.49%   |   Portfolio Return : 22.59%   |   Position Changes : 21.49%   |   Max Drawdown : -12.97%   |   
Market Return : 41.23%   |   Portfolio Return : -5.72%   |   Position Changes : 21.08%   |   Max Drawdown : -23.34%   |   
Market Return : 41.23%   |   Portfolio Return : -5.59%   |   Position Changes : 21.01%   |   Max Drawdown : -23.34%   |   
Market Return : 40.59%   |   Portfolio Return : 51.92%   |   Position Changes : 22.36%   |   Max Drawdown : -11.71%   |   
Market Return : 40.59%   |   Portfolio Return : 50.02%   |   Position Changes : 22.39%   |   Max Drawdown : -11.88%   |   
Market Return : 41.01%   |   Portfolio Return :  2.85%   |   Position Changes : 21.40%   |   Max Drawdown : -19.98%   |   
Market Return : 40.72%   |   Portfolio Return : 30.78%   |   Position Changes : 21.59%   |   Max Drawdown : -14.08%   |   
Market Return : 

In [5]:
modelname = "prio20hNewReward"

In [9]:
import dill, pickle
import tensorflow as tf
#agent.model = None
#agent.target_model = None
#agent.replay_memory = None

# tf.saved_model.save(agent.model, "model")
agent.model.save(modelname)

with open(modelname + "/model.pkl", "wb") as file:
    dill.dump(agent, file)

with open(modelname + "/replay_memory.pkl", "wb") as file:
    dill.dump(agent.replay_memory, file)

#with open(filename + "_replay_memory.pkl", "wb") as file:
#    pickle.dump(agent.replay_memory, file)

#with open(filename + "_target_model.pkl", "wb") as file:
    #pickle.dump(agent.target_model, file)

INFO:tensorflow:Assets written to: prio20hNewReward/assets


INFO:tensorflow:Assets written to: prio20hNewReward/assets


Saving agent ...


In [5]:
import dill, pickle
import tensorflow as tf
#agent.model = None
#agent.target_model = None
#agent.replay_memory = None

with open(modelname + "/model.pkl", "rb") as file:
        agent = dill.load(file)

with open(modelname + "/replay_memory.pkl", "rb") as file:
    agent.replay_memory = dill.load(file)
    
agent.model = tf.keras.models.load_model(modelname)
agent.target_model = tf.keras.models.load_model(modelname)

2024-05-22 09:41:22.832538: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-05-22 09:41:22.854989: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-05-22 09:41:22.855062: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-05-22 09:41:22.856549: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-05-22 09:41:22.856610: I external/local_xla/xla/stream_executor