In [None]:
# module import

import warnings
import gym_trading_env  # noqa
import gymnasium as gym
import numpy as np
import pandas as pd
import torch
from gym_trading_env.renderer import Renderer
from sb3_contrib import QRDQN
from sb3_contrib.qrdqn import MlpPolicy
from stable_baselines3.common.utils import get_device
# from stable_baselines3.ppo import MlpPolicy
from tqdm import TqdmExperimentalWarning
from sklearn.preprocessing import robust_scale

In [None]:
print(torch.cuda.is_available())
get_device()

In [None]:
# Preprocess


def SMA(df, ndays):
    SMA = pd.Series(df.close.rolling(ndays).mean(), name="SMA_" + str(ndays))
    return SMA.astype(float).round(2)


def BBANDS(df, n):
    MA = df.close.rolling(window=n).mean()
    SD = df.close.rolling(window=n).std()
    upperBand = MA + (2 * SD)
    lowerBand = MA - (2 * SD)
    return upperBand.astype(float).round(2), lowerBand.astype(float).round(2)


def RSI(df, periods=14):
    close_delta = df.close.diff()
    up = close_delta.clip(lower=0)
    down = -1 * close_delta.clip(upper=0)
    ma_up = up.ewm(com=periods - 1, adjust=True, min_periods=periods).mean()
    ma_down = down.ewm(com=periods - 1, adjust=True, min_periods=periods).mean()

    _rsi = ma_up / ma_down
    return (100 - (100 / (1 + _rsi))).astype(float).round(2)


def MACD(df):
    k = df["close"].ewm(span=12, adjust=False, min_periods=12).mean()
    d = df["close"].ewm(span=26, adjust=False, min_periods=26).mean()
    macd = k - d
    macd_s = macd.ewm(span=9, adjust=False, min_periods=9).mean()
    macd_h = macd - macd_s
    return df.index.map(macd), df.index.map(macd_s), df.index.map(macd_h)


def add_robust_features(df):
    df["feature_close"] = robust_scale(df.close.pct_change())
    df["feature_open"] = robust_scale(df.open/df.close)
    df["feature_high"] = robust_scale(df.high/df.close)
    df["feature_low"] = robust_scale(df.low/df.close)
    df["feature_volume"] = robust_scale(df.volume / df.volume.rolling(7*24).max())
    df.dropna(inplace= True)
    return df


def normalize(df):
    result = df.copy()
    columns = [x for x in df.columns if "feature" in x]
    for feature_name in columns:
        max_value = df[feature_name].max()
        min_value = df[feature_name].min()
        result[feature_name] = (df[feature_name] - min_value) / (max_value - min_value)
    return result


def robust(df):
    result = df.copy()
    columns = [x for x in df.columns if "feature" in x]
    for feature_name in columns:
        result[feature_name] = robust_scale(df[feature_name])
    return result


def preprocess(df):
    df["volume"] = df.volume.astype(float).round(2)
    df["feature_close"] = df.close
    df["feature_open"] = df.open
    df["feature_high"] = df.high
    df["feature_low"] = df.low
    df["feature_volume"] = df.volume
    df["feature_SMA_7"] = SMA(df, 7)
    df["feature_SMA_25"] = SMA(df, 25)
    df["feature_SMA_99"] = SMA(df, 99)
    df["feature_MiddleBand"], df["feature_LowerBand"] = BBANDS(df, 21)
    df["feature_MACD"], df["feature_MACD_S"], df["feature_MACD_H"] = MACD(df)
    df = df.dropna()

    df_robust = robust(df)

    df_robust["feature_RSI_6"] = RSI(df, periods=6)
    df_robust["feature_RSI_12"] = RSI(df, periods=12)
    df_robust["feature_RSI_24"] = RSI(df, periods=24)

    return df_robust


In [None]:
# Reward Function

def reward_only_position_changed(history):
    prev_position = history[-2]["position"]
    curr_position = history[-1]["position"]
    holding_fee = 0.01
    holding_cost = 0

    index = 1
    index_limit = len(history)

    while index < index_limit and history["position", -index] == prev_position:
        index += 1
        holding_cost -= holding_fee

    if prev_position == curr_position:
        if curr_position == 0:
            return holding_cost
        else:
            return 0
    else:
        return (history["portfolio_valuation", -1] / history["portfolio_valuation", -2] - 1)  # / sqrt(index)

In [None]:
# Enviornment
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.vec_env import SubprocVecEnv
from gym_trading_env.environments import MultiDatasetTradingEnv


def max_drawdown(history):
    networth_array = history['portfolio_valuation']
    _max_networth = networth_array[0]
    _max_drawdown = 0
    for networth in networth_array:
        if networth > _max_networth:
            _max_networth = networth
        drawdown = ( networth - _max_networth ) / _max_networth
        if drawdown < _max_drawdown:
            _max_drawdown = drawdown
    return f"{_max_drawdown*100:5.2f}%"


def make_env(dir="./data/train/month/**/*.pkl"):
    env = gym.make(
        "MultiDatasetTradingEnv",
        dataset_dir=dir,
        preprocess=add_robust_features,
        reward_function=reward_only_position_changed,
        positions=[-10, 0, 10],
        trading_fees=0.0001,
        borrow_interest_rate=0.00001,
    )
    env.unwrapped.add_metric(
        "Position Changes", lambda history: np.sum(np.diff(history["position"]) != 0)
    )
    env.unwrapped.add_metric("Episode Length", lambda history: len(history["position"]))
    env.unwrapped.add_metric('Max Drawdown', max_drawdown)
    return env


vec_env = make_vec_env(
    MultiDatasetTradingEnv,
    n_envs=64,
    # vec_env_cls=SubprocVecEnv,
    env_kwargs=dict(
        dataset_dir="./data/train/month/**/*.pkl",
        preprocess=preprocess,
        reward_function=reward_only_position_changed,
        positions=[-10, 0, 10],
        trading_fees=0.0001,
        borrow_interest_rate=0.00003,
        windows=60,
    )
)


# training_envs = gym.vector.SyncVectorEnv([lambda: make_env("./data/train/month/**/*.pkl") for _ in range(5)])
# validation_envs = gym.vector.SyncVectorEnv([lambda: make_env("./data/test/month/2023/*.pkl") for _ in range(5)])

In [None]:
# Train

warnings.filterwarnings("ignore", category=TqdmExperimentalWarning)
model = QRDQN(
    MlpPolicy,
    vec_env,
    # buffer_size=100_000_000,  # 1_000_000
    # n_steps=1024,
    batch_size=32,
    verbose=0,
    device="cuda",
    tensorboard_log="./tensorboard/",
    learning_rate =0.000001, #0.00005
    # use_sde=False,
)

# vec_env = model.get_env()
# obs = vec_env.reset()

In [None]:
model.learn(total_timesteps=100000000, progress_bar=True)

In [None]:
from stable_baselines3.common.evaluation import evaluate_policy

new_env = make_vec_env(
    MultiDatasetTradingEnv,
    n_envs=1,
    env_kwargs=dict(
        dataset_dir="./data/test/month/**/*.pkl",
        preprocess=add_robust_features,
        reward_function=reward_only_position_changed,
        positions=[-10, 0, 10],
        trading_fees=0.0001,
        borrow_interest_rate=0.00001,
    )
)

mean_reward, std_reward = evaluate_policy(model, new_env, n_eval_episodes=10, render=True)
print(f"Mean reward: {mean_reward}, Std reward: {std_reward}")

In [None]:
obs = vec_env.reset()
for _ in range(1000):
    action, _states = model.predict(obs, deterministic=True)
    obs, rewards, dones, info = vec_env.step(action)
    vec_env.render()

In [None]:
reward = 0.0
done, truncated = False, False
observation, info = env.reset()

warnings.filterwarnings("ignore", category=UserWarning)
while not done and not truncated:
    action = model.predict(observation, deterministic=True)
    observation, r, done, truncated, info = env.step(action[0])
    reward += r

# env.unwrapped.save_for_render(dir="render_logs")

In [None]:
# data = pd.read_csv("1.csv", parse_dates=["date"], index_col="date")

def get_random_data():
    from random import randint, choice
    import os
    data_dir = f"D:/Destktop/PyCharm_Projects/sb3/data/train/day/{randint(2018, 2022)}/{randint(1, 12)}"
    files = os.listdir(data_dir)
    random_file = choice(files)
    file_path = os.path.join(data_dir, random_file)
    return pd.read_pickle(file_path)

data = get_random_data()

evaluate_env = gym.make(
    "TradingEnv",
    df=data,
    positions=[-10, 10],
    trading_fees=0.0001,
    borrow_interest_rate=0.0003,
)
evaluate_env.unwrapped.add_metric(
    "Position Changes", lambda history: np.sum(np.diff(history["position"]) != 0)
)
evaluate_env.unwrapped.add_metric("Episode Length", lambda history: len(history["position"]))

evaluate_model = QRDQN(
    MlpPolicy,
    evaluate_env,
    buffer_size=100000000,
    batch_size=128,
    verbose=0,
    device="auto",
)
evaluate_model.load("./model/QRDQN/1m_+-10_0.01.zip")

reward = 0.0
done, truncated = False, False
observation, info = evaluate_env.reset()

warnings.filterwarnings("ignore", category=UserWarning)
while not done and not truncated:
    action = evaluate_model.predict(observation, deterministic=True)
    observation, r, done, truncated, info = evaluate_env.step(action[0])
    reward += r

print(reward)
# evaluate_env.unwrapped.save_for_render(dir="render_logs")

In [None]:
renderer = Renderer(render_logs_dir="render_logs")
renderer.add_metric(
    "Annual Market Return",
    lambda df: f"{((df['close'].iloc[-1] / df['close'].iloc[0]) ** (pd.Timedelta(days=365) / (df.index.values[-1] - df.index.values[0])) - 1) * 100:0.2f}%",
)
renderer.add_metric(
    "Annual Portfolio Return",
    lambda df: f"{((df['portfolio_valuation'].iloc[-1] / df['portfolio_valuation'].iloc[0]) ** (pd.Timedelta(days=365) / (df.index.values[-1] - df.index.values[0])) - 1) * 100:0.2f}%",
)
renderer.run()

In [None]:
model.save("./model/QRDQN/1m_+-10_0.01_2.zip")