In [1]:
import pandas as pd
import numpy as np
# import matplotlib.pyplot as plt

import statsmodels.api as sm
from statsmodels.tsa.stattools import adfuller

import gymnasium as gym
import gym_trading_env

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

from RLTrade.agent import DQNAgent, XGBoostAgent
from RLTrade.utils import ( 
    nonstationary_dgp, 
    FeatureEngineering
)

In [2]:
x, y = nonstationary_dgp()
spread = y - x
df = pd.DataFrame({'x': x, 'y': y, 'close': spread}) # close price of portfolio is the spread
df_train = df.copy()

feature_config = [
    {'type': 'rolling',
     'params': {
       'cols': ['close'],
       'window': 7,
       'mode': 'diff',
       'subtract_mean': True
     }},
]

fe = FeatureEngineering(df_train)
df_train = fe.build_features(feature_config)
df_train

env_action_space = [-1, 0, 1] # Positions : [-1=SHORT, 0=OUT, 1=LONG]
# env_action_space = [i/10 for i in range(-10, 11)] # Positions : [-1, -0.9, -0.8, ..., 0, 0.1, 0.2, ..., 1]

env = gym.make("TradingEnv",
        name= "stationaryDGP",
        df = df_train, # Your dataset with your custom features
        positions = env_action_space, # -1 (=SHORT), 0(=OUT), +1 (=LONG)
        trading_fees = 0.01/100, # 0.01% per stock buy / sell (Binance fees)
        borrow_interest_rate= 0.0003/100, # 0.0003% per timestep (one timestep = 1h here)
        # max_episode_duration=180
    )
env.unwrapped.add_metric('Position Changes', lambda history : np.sum(np.diff(history['position']) != 0) )
env.unwrapped.add_metric('Episode Length', lambda history : len(history['position']) )

n_actions = env.action_space.n
n_observations = len(env.reset()[0])

In [4]:
# Train a DQN agent using training data from a stationary data generating process
agent = DQNAgent(n_observations=n_observations, n_actions=n_actions)
for i_simulation in range(15):
    x, y = nonstationary_dgp()
    spread = y - x
    df = pd.DataFrame({'x': x, 'y': y, 'close': spread}) # close price of portfolio is the spread
    df_train = df.copy()

    fe = FeatureEngineering(df_train)
    df_train = fe.build_features(feature_config)

    # Define the environment
    env = gym.make("TradingEnv",
            name= "stationaryDGP",
            df = df_train, # Your dataset with your custom features
            positions = env_action_space, # -1 (=SHORT), 0(=OUT), +1 (=LONG)
            trading_fees = 0.01/100, # 0.01% per stock buy / sell (Binance fees)
            borrow_interest_rate= 0.0003/100, # 0.0003% per timestep (one timestep = 1h here)
            # max_episode_duration=180
        )
    env.unwrapped.add_metric('Position Changes', lambda history : np.sum(np.diff(history['position']) != 0) )
    env.unwrapped.add_metric('Episode Length', lambda history : len(history['position']) )

    history_metrics, _ = agent.train(env, num_episodes=1) # One episode is one simulation

Market Return : -4.73%   |   Portfolio Return : -51.76%   |   Position Changes : 5604   |   Episode Length : 10000   |   
Market Return : -23.38%   |   Portfolio Return : -34.64%   |   Position Changes : 3336   |   Episode Length : 10000   |   
Market Return : -1.57%   |   Portfolio Return : -33.16%   |   Position Changes : 3148   |   Episode Length : 10000   |   
Market Return : -0.15%   |   Portfolio Return : -30.51%   |   Position Changes : 3113   |   Episode Length : 10000   |   
Market Return : 32.64%   |   Portfolio Return : -36.42%   |   Position Changes : 3249   |   Episode Length : 10000   |   
Market Return :  2.53%   |   Portfolio Return : -28.31%   |   Position Changes : 3252   |   Episode Length : 10000   |   
Market Return : 14.90%   |   Portfolio Return : -33.43%   |   Position Changes : 3071   |   Episode Length : 10000   |   
Market Return : 15.17%   |   Portfolio Return : -34.12%   |   Position Changes : 3154   |   Episode Length : 10000   |   
Market Return : -3.76% 

In [5]:
x, y = nonstationary_dgp()
spread = y - x
df = pd.DataFrame({'x': x, 'y': y, 'close': spread}) # close price of portfolio is the spread
df_train = df.copy()

feature_config = [
    {'type': 'rolling_mean_corrected',
     'params': {
       'cols': ['close'],
       'window': 100,
       'mode': 'diff',
     }},
]

fe = FeatureEngineering(df_train)
df_train = fe.build_features(feature_config)

env_action_space = [-1, 0, 1] # Positions : [-1=SHORT, 0=OUT, 1=LONG]
# env_action_space = [i/10 for i in range(-10, 11)] # Positions : [-1, -0.9, -0.8, ..., 0, 0.1, 0.2, ..., 1]

env = gym.make("TradingEnv",
        name= "stationaryDGP",
        df = df_train, # Your dataset with your custom features
        positions = env_action_space, # -1 (=SHORT), 0(=OUT), +1 (=LONG)
        trading_fees = 0.01/100, # 0.01% per stock buy / sell (Binance fees)
        borrow_interest_rate= 0.0003/100, # 0.0003% per timestep (one timestep = 1h here)
        # max_episode_duration=180
    )
env.unwrapped.add_metric('Position Changes', lambda history : np.sum(np.diff(history['position']) != 0) )
env.unwrapped.add_metric('Episode Length', lambda history : len(history['position']) )

n_actions = env.action_space.n
n_observations = len(env.reset()[0])

In [6]:
# Train a DQN agent using training data from a stationary data generating process
agent = XGBoostAgent(n_observations, n_actions)
for i_simulation in range(15):
    x, y = nonstationary_dgp()
    spread = y - x
    df = pd.DataFrame({'x': x, 'y': y, 'close': spread}) # close price of portfolio is the spread
    df_train = df.copy()

    fe = FeatureEngineering(df_train)
    df_train = fe.build_features(feature_config)

    # Define the environment
    env = gym.make("TradingEnv",
            name= "stationaryDGP",
            df = df_train, # Your dataset with your custom features
            positions = env_action_space, # -1 (=SHORT), 0(=OUT), +1 (=LONG)
            trading_fees = 0.01/100, # 0.01% per stock buy / sell (Binance fees)
            borrow_interest_rate= 0.0003/100, # 0.0003% per timestep (one timestep = 1h here)
            # max_episode_duration=180
        )
    env.unwrapped.add_metric('Position Changes', lambda history : np.sum(np.diff(history['position']) != 0) )
    env.unwrapped.add_metric('Episode Length', lambda history : len(history['position']) )

    history_metrics, _ = agent.train(env, num_episodes=1, replay_every=1) # One episode is one simulation

Market Return : -0.20%   |   Portfolio Return : -53.24%   |   Position Changes : 6664   |   Episode Length : 10000   |   
Market Return : -0.65%   |   Portfolio Return : -57.62%   |   Position Changes : 6578   |   Episode Length : 10000   |   
Market Return : -1.17%   |   Portfolio Return : -49.86%   |   Position Changes : 5606   |   Episode Length : 10000   |   
Market Return : -2.80%   |   Portfolio Return : -24.08%   |   Position Changes : 4159   |   Episode Length : 10000   |   
Market Return :  7.95%   |   Portfolio Return : -26.65%   |   Position Changes : 3299   |   Episode Length : 10000   |   
Market Return : 23.43%   |   Portfolio Return : -19.08%   |   Position Changes : 3091   |   Episode Length : 10000   |   
Market Return : 21.89%   |   Portfolio Return : 11.09%   |   Position Changes : 2810   |   Episode Length : 10000   |   
Market Return :  9.75%   |   Portfolio Return : -7.21%   |   Position Changes : 2522   |   Episode Length : 10000   |   
Market Return : -12.36%   