In [137]:
import gymnasium as gym
from gymnasium.spaces import Discrete
from gymnasium.spaces import Box
import numpy as np
import pandas as pd

In [138]:
class TradingEnv(gym.Env):

    def __init__(self, data, training, episode_length = 250, budget=10000):
        self.portfolio_value = budget
        self.cur_row_num = 0
        self.starting_row_num = 0
        self.asset_allocation = 0.0
        self.data = data
        self.episode_length = episode_length
        self.training = training
    
        # action space: Sell 25%, sell 10%, no change, buy 10%, buy 25% (percentages are of total portfolio value, asset + cash, at each timestep)
        self.action_space = Discrete(5)

        # observation space: Close, Volume, SMA Ratio, RSI, Bandwidth, Asset Allocation
        self.observation_space = Box(low=np.array([0.0, 0.0, 0.0, 0.0, 0.0, 0.0]),
            high=np.array([np.inf, np.inf, np.inf, 100.0, np.inf, 1.0]), dtype=np.float64)

    # returns the current row in dataframe with current asset allocation appended
    def _get_obs(self):
        obs = np.array(self.data.iloc[self.cur_row_num, :])
        obs = np.append(obs, self.asset_allocation)
        return obs

    # returns current portfolio value
    def _get_info(self):
        return {'Portfolio Value': self.portfolio_value}

    # sets the starting row and starting asset allocation
    def reset(self, seed=5):
        super().reset(seed=seed)
        if self.training:
            self.starting_row_num = np.random.randint(0, len(self.data) - self.episode_length - 1)    # prevent out of bounds, also subtract 1 to avoid
                                                                                                 # weird edge cases for now (should change later)
        else:
            self.starting_row_num = 0

        self.cur_row_num = self.starting_row_num

        rand = np.random.rand()
        if rand < 0.7:
            self.asset_allocation = 0.0
        else:
            self.asset_allocation = np.random.rand()
        
        return self._get_obs(), self._get_info()

    # moves to the next row in data, updates reward and current portfolio value
    def step(self, action):
        self.cur_row_num += 1
        if (self.cur_row_num - self.starting_row_num) > self.episode_length:
            terminated = True
        else:
            terminated = False
        truncated = False
        self.asset_allocation = self._action_to_allocation(action)
        obs = self._get_obs()
        rew = self._get_reward()
        info = self._get_info()
        return obs, rew, terminated, truncated, info
    
    # converts action to asset allocation value
    def _action_to_allocation(self, action):
        allocation_change = 0.0
        if action == 0: allocation_change = -.25
        elif action == 1: allocation_change = -.1
        elif action == 2: allocation_change = 0.0
        elif action == 3: allocation_change = .1
        else: allocation_change = 0.25
        return max(0.0, min(1.0, self.asset_allocation + allocation_change))
    
    # returns reward in the form of regular percent return of the total portfolio (stock + cash) over this timestep
    # need to also account for slippage and commission costs - add later
    def _get_reward(self):
        asset_change = (self.data.iloc[self.cur_row_num, 0] - self.data.iloc[self.cur_row_num - 1, 0]) / self.data.iloc[self.cur_row_num - 1, 0]
        new_portfolio_value = self.portfolio_value * (self.asset_allocation * (1.0 + asset_change) + (1.0 - self.asset_allocation))
        reward = (new_portfolio_value - self.portfolio_value) / self.portfolio_value
        self.portfolio_value = new_portfolio_value
        return reward


In [139]:
from stable_baselines3.common.env_checker import check_env

In [140]:
data = pd.read_csv('Amazon Data.csv')
data.drop(labels=['Date'], axis=1, inplace=True)
data

Unnamed: 0,Close,Volume,SMA Ratio,RSI,Bandwidth
0,0.200260,56136000,1.368536,62.081040,0.451564
1,0.187500,79008000,1.345808,53.301818,0.412582
2,0.195052,75744000,1.323756,42.939429,0.407098
3,0.200000,16296000,1.308761,42.105261,0.347698
4,0.198958,30936000,1.283152,46.601793,0.310476
...,...,...,...,...,...
6851,219.389999,24819700,1.070065,50.749226,0.086124
6852,220.220001,33956600,1.061704,44.291792,0.074616
6853,224.190002,27515600,1.059974,44.360545,0.068819
6854,227.610001,31849800,1.057722,43.023099,0.066103


In [141]:
# splitting data into training, validation, and test sets
# 0.7 training, 0.1 validation, 0.2 testing

training_data = data.iloc[:int(data.shape[0] * 0.7), :].copy(deep=True)
validation_data = data.iloc[int(data.shape[0] * 0.7):int(data.shape[0] * 0.8), :].copy(deep=True)
testing_data = data.iloc[int(data.shape[0] * 0.8):, :].copy(deep=True)

In [142]:
training_data

Unnamed: 0,Close,Volume,SMA Ratio,RSI,Bandwidth
0,0.200260,56136000,1.368536,62.081040,0.451564
1,0.187500,79008000,1.345808,53.301818,0.412582
2,0.195052,75744000,1.323756,42.939429,0.407098
3,0.200000,16296000,1.308761,42.105261,0.347698
4,0.198958,30936000,1.283152,46.601793,0.310476
...,...,...,...,...,...
4794,41.129501,79962000,1.030820,47.001890,0.049475
4795,40.917999,148128000,1.028067,46.143048,0.051013
4796,38.816002,216822000,1.021927,38.819899,0.075920
4797,39.491001,108266000,1.018205,42.034074,0.084623


In [143]:
validation_data

Unnamed: 0,Close,Volume,SMA Ratio,RSI,Bandwidth
4799,38.278000,100530000,1.006927,43.011603,0.108282
4800,38.351501,77450000,1.001274,37.972623,0.118181
4801,37.752499,102442000,0.993392,37.529148,0.131262
4802,39.246498,119688000,0.986360,47.898735,0.128769
4803,39.387501,68252000,0.980035,48.836398,0.127451
...,...,...,...,...,...
5479,98.225998,63712000,1.057151,52.300541,0.094632
5480,99.281502,58000000,1.057955,50.971172,0.093225
5481,99.724503,54070000,1.057113,50.365844,0.092045
5482,100.040497,52626000,1.054246,50.670351,0.084207


In [144]:
testing_data

Unnamed: 0,Close,Volume,SMA Ratio,RSI,Bandwidth
5484,97.152496,98542000,1.046944,48.159762,0.071622
5485,95.622498,89864000,1.041169,41.377792,0.067453
5486,94.926498,58218000,1.034977,36.623140,0.072131
5487,93.338997,89414000,1.028293,34.688257,0.085417
5488,92.765999,94266000,1.021859,36.422545,0.099475
...,...,...,...,...,...
6851,219.389999,24819700,1.070065,50.749226,0.086124
6852,220.220001,33956600,1.061704,44.291792,0.074616
6853,224.190002,27515600,1.059974,44.360545,0.068819
6854,227.610001,31849800,1.057722,43.023099,0.066103


In [145]:
min_max_values = {}
for label in ['Close', 'Volume', 'SMA Ratio', 'RSI', 'Bandwidth']:
    min_max_values[label] = [training_data[label].min(), training_data[label].max()]
    training_data[label] = (training_data[label] - min_max_values[label][0]) / (min_max_values[label][1] - min_max_values[label][0])
min_max_values

{'Close': [0.1812500059604644, 42.21799850463867],
 'Volume': [9744000, 2086584000],
 'SMA Ratio': [0.617760612431569, 1.7465069917435987],
 'RSI': [17.177916817881, 95.82972391076224],
 'Bandwidth': [0.0245751126388905, 1.2275161158358934]}

In [146]:
training_data

Unnamed: 0,Close,Volume,SMA Ratio,RSI,Bandwidth
0,0.000452,0.022338,0.665141,0.570910,0.354954
1,0.000149,0.033351,0.645005,0.459289,0.322549
2,0.000328,0.031779,0.625469,0.327539,0.317990
3,0.000446,0.003155,0.612184,0.316933,0.268611
4,0.000421,0.010204,0.589496,0.374103,0.237668
...,...,...,...,...,...
4794,0.974106,0.033810,0.365945,0.379190,0.020699
4795,0.969075,0.066632,0.363506,0.368270,0.021978
4796,0.919071,0.099708,0.358066,0.275162,0.042683
4797,0.935128,0.047438,0.354769,0.316028,0.049917


In [147]:
training_data.describe()

Unnamed: 0,Close,Volume,SMA Ratio,RSI,Bandwidth
count,4799.0,4799.0,4799.0,4799.0,4799.0
mean,0.169364,0.074033,0.361483,0.462516,0.16936
std,0.207661,0.074536,0.109443,0.158764,0.140356
min,0.0,0.0,0.0,0.0,0.0
25%,0.03808,0.035333,0.306916,0.351053,0.072242
50%,0.076391,0.054561,0.362009,0.459076,0.124078
75%,0.247831,0.083794,0.409188,0.569186,0.221932
max,1.0,1.0,1.0,1.0,1.0


In [148]:
env = TradingEnv(training_data, training=True)
check_env(env, warn=True)