In [2]:
import pandas as pd
import numpy as np
# import matplotlib.pyplot as plt

import statsmodels.api as sm
from statsmodels.tsa.stattools import adfuller

import gymnasium as gym
import gym_trading_env

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

from RLTrade.agent import DQNAgent, XGBoostAgent
from RLTrade.utils import ( 
    nonstationaryDGP, 
    FeatureEngineering
)

In [3]:
x, y = nonstationaryDGP()
spread = y - x
df = pd.DataFrame({'x': x, 'y': y, 'close': spread}) # close price of portfolio is the spread
df_train = df.copy()

feature_config = {
    "window": 7, # 7 days window, pad with first value when window is not available
    "mode": "diff", # difference between current and previous close price
    "subtract_mean": True,
}

fe = FeatureEngineering(df_train, main_feature_col="close")
fe.rolling_feature(**feature_config)
df_train = fe.df.copy()
df_train

env_action_space = [-1, 0, 1] # Positions : [-1=SHORT, 0=OUT, 1=LONG]
# env_action_space = [i/10 for i in range(-10, 11)] # Positions : [-1, -0.9, -0.8, ..., 0, 0.1, 0.2, ..., 1]

env = gym.make("TradingEnv",
        name= "stationaryDGP",
        df = df_train, # Your dataset with your custom features
        positions = env_action_space, # -1 (=SHORT), 0(=OUT), +1 (=LONG)
        trading_fees = 0.01/100, # 0.01% per stock buy / sell (Binance fees)
        borrow_interest_rate= 0.0003/100, # 0.0003% per timestep (one timestep = 1h here)
        # max_episode_duration=180
    )
env.unwrapped.add_metric('Position Changes', lambda history : np.sum(np.diff(history['position']) != 0) )
env.unwrapped.add_metric('Episode Length', lambda history : len(history['position']) )

n_actions = env.action_space.n
n_observations = len(env.reset()[0])

In [5]:
# Train a DQN agent using training data from a stationary data generating process
agent = DQNAgent(n_observations=n_observations, n_actions=n_actions)
for i_simulation in range(15):
    x, y = nonstationaryDGP()
    spread = y - x
    df = pd.DataFrame({'x': x, 'y': y, 'close': spread}) # close price of portfolio is the spread
    df_train = df.copy()
    
    feature_config = {
        "window": 7, # 7 days window, pad with first value when window is not available
        "mode": "diff", # difference between current and previous close price
        "subtract_mean": True,
    }

    fe = FeatureEngineering(df_train, main_feature_col="close")
    fe.rolling_feature(**feature_config)
    df_train = fe.df.copy()

    # Define the environment
    env = gym.make("TradingEnv",
            name= "stationaryDGP",
            df = df_train, # Your dataset with your custom features
            positions = env_action_space, # -1 (=SHORT), 0(=OUT), +1 (=LONG)
            trading_fees = 0.01/100, # 0.01% per stock buy / sell (Binance fees)
            borrow_interest_rate= 0.0003/100, # 0.0003% per timestep (one timestep = 1h here)
            # max_episode_duration=180
        )
    env.unwrapped.add_metric('Position Changes', lambda history : np.sum(np.diff(history['position']) != 0) )
    env.unwrapped.add_metric('Episode Length', lambda history : len(history['position']) )

    history_metrics, _ = agent.train(env, num_episodes=1) # One episode is one simulation

Market Return : 20.21%   |   Portfolio Return : -46.42%   |   Position Changes : 5722   |   Episode Length : 10000   |   
Market Return : 28.72%   |   Portfolio Return : -49.55%   |   Position Changes : 5232   |   Episode Length : 10000   |   
Market Return :  2.01%   |   Portfolio Return : -49.98%   |   Position Changes : 4963   |   Episode Length : 10000   |   
Market Return : -25.47%   |   Portfolio Return : -44.46%   |   Position Changes : 4823   |   Episode Length : 10000   |   
Market Return :  1.79%   |   Portfolio Return : -45.15%   |   Position Changes : 4539   |   Episode Length : 10000   |   
Market Return : 13.51%   |   Portfolio Return : -44.17%   |   Position Changes : 4515   |   Episode Length : 10000   |   
Market Return : -15.91%   |   Portfolio Return : -43.11%   |   Position Changes : 4511   |   Episode Length : 10000   |   
Market Return : 19.79%   |   Portfolio Return : -44.14%   |   Position Changes : 4416   |   Episode Length : 10000   |   
Market Return : 12.48%

In [6]:
x, y = nonstationaryDGP()
spread = y - x
df = pd.DataFrame({'x': x, 'y': y, 'close': spread}) # close price of portfolio is the spread
df_train = df.copy()

feature_config = {
    "window": 100, # 7 days window, pad with first value when window is not available
    "mode": "diff", # difference between current and previous close price
}

fe = FeatureEngineering(df_train, main_feature_col="close")
fe.rolling_mean_corrected_feature(**feature_config)
df_train = fe.df.copy()

env_action_space = [-1, 0, 1] # Positions : [-1=SHORT, 0=OUT, 1=LONG]
# env_action_space = [i/10 for i in range(-10, 11)] # Positions : [-1, -0.9, -0.8, ..., 0, 0.1, 0.2, ..., 1]

env = gym.make("TradingEnv",
        name= "stationaryDGP",
        df = df_train, # Your dataset with your custom features
        positions = env_action_space, # -1 (=SHORT), 0(=OUT), +1 (=LONG)
        trading_fees = 0.01/100, # 0.01% per stock buy / sell (Binance fees)
        borrow_interest_rate= 0.0003/100, # 0.0003% per timestep (one timestep = 1h here)
        # max_episode_duration=180
    )
env.unwrapped.add_metric('Position Changes', lambda history : np.sum(np.diff(history['position']) != 0) )
env.unwrapped.add_metric('Episode Length', lambda history : len(history['position']) )

n_actions = env.action_space.n
n_observations = len(env.reset()[0])

In [7]:
# Train a DQN agent using training data from a stationary data generating process
agent = XGBoostAgent(n_observations, n_actions)
for i_simulation in range(15):
    x, y = nonstationaryDGP()
    spread = y - x
    df = pd.DataFrame({'x': x, 'y': y, 'close': spread}) # close price of portfolio is the spread
    df_train = df.copy()

    feature_config = {
        "window": 100, # 7 days window, pad with first value when window is not available
        "mode": "diff", # difference between current and previous close price
    }

    fe = FeatureEngineering(df_train, main_feature_col="close")
    fe.rolling_mean_corrected_feature(**feature_config)
    df_train = fe.df.copy()

    # Define the environment
    env = gym.make("TradingEnv",
            name= "stationaryDGP",
            df = df_train, # Your dataset with your custom features
            positions = env_action_space, # -1 (=SHORT), 0(=OUT), +1 (=LONG)
            trading_fees = 0.01/100, # 0.01% per stock buy / sell (Binance fees)
            borrow_interest_rate= 0.0003/100, # 0.0003% per timestep (one timestep = 1h here)
            # max_episode_duration=180
        )
    env.unwrapped.add_metric('Position Changes', lambda history : np.sum(np.diff(history['position']) != 0) )
    env.unwrapped.add_metric('Episode Length', lambda history : len(history['position']) )

    history_metrics, _ = agent.train(env, num_episodes=1, replay_every=1) # One episode is one simulation

Market Return : 10.49%   |   Portfolio Return : -51.26%   |   Position Changes : 6549   |   Episode Length : 10000   |   
Market Return :  8.11%   |   Portfolio Return : -53.11%   |   Position Changes : 5612   |   Episode Length : 10000   |   
Market Return :  7.35%   |   Portfolio Return : -31.04%   |   Position Changes : 3158   |   Episode Length : 10000   |   
Market Return : -20.42%   |   Portfolio Return : -25.07%   |   Position Changes : 2097   |   Episode Length : 10000   |   
Market Return : -19.97%   |   Portfolio Return : -19.56%   |   Position Changes : 1301   |   Episode Length : 10000   |   
Market Return :  4.61%   |   Portfolio Return : -6.64%   |   Position Changes : 628   |   Episode Length : 10000   |   
Market Return : 21.86%   |   Portfolio Return : -3.04%   |   Position Changes : 293   |   Episode Length : 10000   |   
Market Return : -11.13%   |   Portfolio Return : -0.65%   |   Position Changes : 183   |   Episode Length : 10000   |   
Market Return :  0.29%   | 