In [1]:
import pandas as pd
import numpy as np
# import matplotlib.pyplot as plt

import statsmodels.api as sm
from statsmodels.tsa.stattools import adfuller

import gymnasium as gym
import gym_trading_env

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

from RLTrade.agent import DQNAgent, XGBoostAgent
from RLTrade.utils import (
    stationaryDGP, 
    nonstationaryDGP, 
    build_features, 
    build_rolling_feature,
)

In [3]:
x, y = nonstationaryDGP()
spread = y - x
df = pd.DataFrame({'x': x, 'y': y, 'close': spread}) # close price of portfolio is the spread
df_train = df.copy()

feature_config = {
        "feature_col": "close",
        "window": 7, # 7 days window, pad with first value when window is not available
        "mode": "diff", # difference between current and previous close price
        "mean_correction": True,
    }
df_train = build_rolling_feature(df_train, **feature_config)

env_action_space = [-1, 0, 1] # Positions : [-1=SHORT, 0=OUT, 1=LONG]
# env_action_space = [i/10 for i in range(-10, 11)] # Positions : [-1, -0.9, -0.8, ..., 0, 0.1, 0.2, ..., 1]

env = gym.make("TradingEnv",
        name= "stationaryDGP",
        df = df_train, # Your dataset with your custom features
        positions = env_action_space, # -1 (=SHORT), 0(=OUT), +1 (=LONG)
        trading_fees = 0.01/100, # 0.01% per stock buy / sell (Binance fees)
        borrow_interest_rate= 0.0003/100, # 0.0003% per timestep (one timestep = 1h here)
        # max_episode_duration=180
    )
env.unwrapped.add_metric('Position Changes', lambda history : np.sum(np.diff(history['position']) != 0) )
env.unwrapped.add_metric('Episode Length', lambda history : len(history['position']) )

n_actions = env.action_space.n
n_observations = len(env.reset()[0])

In [6]:
# Train a DQN agent using training data from a stationary data generating process
agent = DQNAgent(n_observations=n_observations, n_actions=n_actions)
for i_simulation in range(100):
    x, y = nonstationaryDGP()
    spread = y - x
    df = pd.DataFrame({'x': x, 'y': y, 'close': spread}) # close price of portfolio is the spread
    df_train = df.copy()

    df_train = build_rolling_feature(df_train, **feature_config)

    # Define the environment
    env = gym.make("TradingEnv",
            name= "stationaryDGP",
            df = df_train, # Your dataset with your custom features
            positions = env_action_space, # -1 (=SHORT), 0(=OUT), +1 (=LONG)
            trading_fees = 0.01/100, # 0.01% per stock buy / sell (Binance fees)
            borrow_interest_rate= 0.0003/100, # 0.0003% per timestep (one timestep = 1h here)
            # max_episode_duration=180
        )
    env.unwrapped.add_metric('Position Changes', lambda history : np.sum(np.diff(history['position']) != 0) )
    env.unwrapped.add_metric('Episode Length', lambda history : len(history['position']) )

    history_metrics, _ = agent.train(env, num_episodes=1) # One episode is one simulation

Market Return :  2.36%   |   Portfolio Return : -57.85%   |   Position Changes : 5685   |   Episode Length : 10000   |   
Market Return :  7.74%   |   Portfolio Return : -48.66%   |   Position Changes : 4993   |   Episode Length : 10000   |   
Market Return : -7.25%   |   Portfolio Return : -42.90%   |   Position Changes : 4869   |   Episode Length : 10000   |   
Market Return : -5.38%   |   Portfolio Return : -41.10%   |   Position Changes : 4659   |   Episode Length : 10000   |   
Market Return : -10.74%   |   Portfolio Return : -42.78%   |   Position Changes : 4607   |   Episode Length : 10000   |   
Market Return :  8.05%   |   Portfolio Return : -40.73%   |   Position Changes : 4544   |   Episode Length : 10000   |   
Market Return :  3.49%   |   Portfolio Return : -39.97%   |   Position Changes : 4417   |   Episode Length : 10000   |   
Market Return : -0.93%   |   Portfolio Return : -40.76%   |   Position Changes : 4150   |   Episode Length : 10000   |   
Market Return : -9.91% 

KeyboardInterrupt: 

In [2]:
x, y = nonstationaryDGP()
spread = y - x
df = pd.DataFrame({'x': x, 'y': y, 'close': spread}) # close price of portfolio is the spread
df_train = df.copy()

feature_config = {
        "feature_col": "close",
        "window": 100, # 7 days window, pad with first value when window is not available
        "mode": "diff", # difference between current and previous close price
        "mean_correction": True,
    }
df_train = build_rolling_feature(df_train, **feature_config)
df_train = df_train.drop(columns=[f"feature_rolling_{i}" for i in range(1, 100)])

env_action_space = [-1, 0, 1] # Positions : [-1=SHORT, 0=OUT, 1=LONG]
# env_action_space = [i/10 for i in range(-10, 11)] # Positions : [-1, -0.9, -0.8, ..., 0, 0.1, 0.2, ..., 1]

env = gym.make("TradingEnv",
        name= "stationaryDGP",
        df = df_train, # Your dataset with your custom features
        positions = env_action_space, # -1 (=SHORT), 0(=OUT), +1 (=LONG)
        trading_fees = 0.01/100, # 0.01% per stock buy / sell (Binance fees)
        borrow_interest_rate= 0.0003/100, # 0.0003% per timestep (one timestep = 1h here)
        # max_episode_duration=180
    )
env.unwrapped.add_metric('Position Changes', lambda history : np.sum(np.diff(history['position']) != 0) )
env.unwrapped.add_metric('Episode Length', lambda history : len(history['position']) )

n_actions = env.action_space.n
n_observations = len(env.reset()[0])

In [4]:
# Train a DQN agent using training data from a stationary data generating process
agent = XGBoostAgent(n_observations, n_actions)
for i_simulation in range(100):
    x, y = nonstationaryDGP()
    spread = y - x
    df = pd.DataFrame({'x': x, 'y': y, 'close': spread}) # close price of portfolio is the spread
    df_train = df.copy()

    df_train = build_rolling_feature(df_train, **feature_config)
    df_train = df_train.drop(columns=[f"feature_rolling_{i}" for i in range(1, 100)])

    # Define the environment
    env = gym.make("TradingEnv",
            name= "stationaryDGP",
            df = df_train, # Your dataset with your custom features
            positions = env_action_space, # -1 (=SHORT), 0(=OUT), +1 (=LONG)
            trading_fees = 0.01/100, # 0.01% per stock buy / sell (Binance fees)
            borrow_interest_rate= 0.0003/100, # 0.0003% per timestep (one timestep = 1h here)
            # max_episode_duration=180
        )
    env.unwrapped.add_metric('Position Changes', lambda history : np.sum(np.diff(history['position']) != 0) )
    env.unwrapped.add_metric('Episode Length', lambda history : len(history['position']) )

    history_metrics, _ = agent.train(env, num_episodes=1, replay_every=1) # One episode is one simulation

Market Return : -16.40%   |   Portfolio Return : -69.78%   |   Position Changes : 6651   |   Episode Length : 10000   |   
Market Return : -4.15%   |   Portfolio Return : -53.10%   |   Position Changes : 5653   |   Episode Length : 10000   |   
Market Return :  0.61%   |   Portfolio Return : -33.51%   |   Position Changes : 3545   |   Episode Length : 10000   |   
Market Return : 12.32%   |   Portfolio Return : -21.38%   |   Position Changes : 1671   |   Episode Length : 10000   |   
Market Return : -10.05%   |   Portfolio Return : -13.25%   |   Position Changes : 963   |   Episode Length : 10000   |   
Market Return : -7.67%   |   Portfolio Return : -4.29%   |   Position Changes : 549   |   Episode Length : 10000   |   
Market Return :  4.72%   |   Portfolio Return : -2.00%   |   Position Changes : 348   |   Episode Length : 10000   |   
Market Return :  6.37%   |   Portfolio Return : -0.14%   |   Position Changes : 244   |   Episode Length : 10000   |   
Market Return : 15.65%   |   

KeyboardInterrupt: 