In [78]:
import gymnasium as gym
from gymnasium.spaces import Discrete
from gymnasium.spaces import Box
import numpy as np
import pandas as pd

In [79]:
class TradingEnv(gym.Env):

    def __init__(self, data, training, episode_length = 250, budget=10000):
        self.portfolio_value = budget
        self.cur_row_num = 0
        self.starting_row_num = 0
        self.asset_allocation = 0.0
        self.data = data
        self.episode_length = episode_length
        self.training = training
    
        # action space: Sell 25%, sell 10%, no change, buy 10%, buy 25% (percentages are of total portfolio value, asset + cash, at each timestep)
        self.action_space = Discrete(5)

        # observation space: Close, Volume, SMA Ratio, RSI, Bandwidth, Asset Allocation
        self.observation_space = Box(low=np.array([0.0, 0.0, 0.0, 0.0, 0.0, 0.0]),
            high=np.array([np.inf, np.inf, np.inf, 100.0, np.inf, 1.0]), dtype=np.float64)

    # returns the current row in dataframe with current asset allocation appended
    def _get_obs(self):
        obs = np.array(self.data.iloc[self.cur_row_num, :])
        obs = np.append(obs, self.asset_allocation)
        return obs

    # returns current portfolio value
    def _get_info(self):
        return {'Portfolio Value': self.portfolio_value}

    # sets the starting row and starting asset allocation
    def reset(self, seed=5):
        super().reset(seed=seed)
        if self.training:
            self.starting_row_num = np.random.randint(0, len(self.data) - self.episode_length - 1)    # prevent out of bounds, also subtract 1 to avoid
                                                                                                 # weird edge cases for now (should change later)
        else:
            self.starting_row_num = 0

        self.cur_row_num = self.starting_row_num

        rand = np.random.rand()
        if rand < 0.7:
            self.asset_allocation = 0.0
        else:
            self.asset_allocation = np.random.rand()
        
        return self._get_obs(), self._get_info()

    # moves to the next row in data, updates reward and current portfolio value
    def step(self, action):
        self.cur_row_num += 1
        if (self.cur_row_num - self.starting_row_num) > self.episode_length:
            terminated = True
        else:
            terminated = False
        truncated = False
        self.asset_allocation = self._action_to_allocation(action)
        obs = self._get_obs()
        rew = self._get_reward()
        info = self._get_info()
        return obs, rew, terminated, truncated, info
    
    # converts action to asset allocation value
    def _action_to_allocation(self, action):
        allocation_change = 0.0
        if action == 0: allocation_change = -.25
        elif action == 1: allocation_change = -.1
        elif action == 2: allocation_change = 0.0
        elif action == 3: allocation_change = .1
        else: allocation_change = 0.25
        return max(0.0, min(1.0, self.asset_allocation + allocation_change))
    
    # returns reward in the form of regular percent return of the total portfolio (stock + cash) over this timestep
    # need to also account for slippage and commission costs - add later
    def _get_reward(self):
        asset_change = (self.data.iloc[self.cur_row_num, 0] - self.data.iloc[self.cur_row_num - 1, 0]) / self.data.iloc[self.cur_row_num - 1, 0]
        new_portfolio_value = self.portfolio_value * (self.asset_allocation * (1.0 + asset_change) + (1.0 - self.asset_allocation))
        reward = (new_portfolio_value - self.portfolio_value) / self.portfolio_value
        self.portfolio_value = new_portfolio_value
        return reward


In [80]:
from stable_baselines3.common.env_checker import check_env

In [81]:
data = pd.read_csv('Amazon Data.csv')
data.drop(labels=['Date'], axis=1, inplace=True)
data

Unnamed: 0,Close,Volume,SMA Ratio,RSI,Bandwidth
0,0.200260,56136000,1.368536,62.081040,0.451564
1,0.187500,79008000,1.345808,53.301818,0.412582
2,0.195052,75744000,1.323756,42.939429,0.407098
3,0.200000,16296000,1.308761,42.105261,0.347698
4,0.198958,30936000,1.283152,46.601793,0.310476
...,...,...,...,...,...
6851,219.389999,24819700,1.070065,50.749226,0.086124
6852,220.220001,33956600,1.061704,44.291792,0.074616
6853,224.190002,27515600,1.059974,44.360545,0.068819
6854,227.610001,31849800,1.057722,43.023099,0.066103


In [82]:
env = TradingEnv(data, training=True)
check_env(env, warn=True)

In [84]:
data.shape

(6856, 5)

In [85]:
data['Close'].min()

0.1812500059604644

In [88]:
min_max_values = {}
for label in ['Close', 'Volume', 'SMA Ratio', 'RSI', 'Bandwidth']:
    min_max_values[label] = [data[label].min(), data[label].max()]
    data[label] = (data[label] - min_max_values[label][0]) / (min_max_values[label][1] - min_max_values[label][0])
min_max_values

{'Close': [0.1812500059604644, 232.92999267578125],
 'Volume': [9744000, 2086584000],
 'SMA Ratio': [0.617760612431569, 1.7465069917435987],
 'RSI': [15.489939905385228, 95.82972391076224],
 'Bandwidth': [0.0169479102029917, 1.2275161158358934]}

In [89]:
data

Unnamed: 0,Close,Volume,SMA Ratio,RSI,Bandwidth
0,0.000082,0.022338,0.665141,0.579926,0.359018
1,0.000027,0.033351,0.645005,0.470649,0.326817
2,0.000059,0.031779,0.625469,0.341667,0.322287
3,0.000081,0.003155,0.612184,0.331284,0.273219
4,0.000076,0.010204,0.589496,0.387253,0.242471
...,...,...,...,...,...
6851,0.941826,0.007259,0.400714,0.438877,0.057143
6852,0.945392,0.011658,0.393307,0.358500,0.047637
6853,0.962449,0.008557,0.391774,0.359356,0.042849
6854,0.977143,0.010644,0.389779,0.342709,0.040605


In [91]:
data.describe()

Unnamed: 0,Close,Volume,SMA Ratio,RSI,Bandwidth
count,6856.0,6856.0,6856.0,6856.0,6856.0
mean,0.173034,0.061324,0.359162,0.466658,0.150233
std,0.245386,0.066123,0.096176,0.150851,0.126838
min,0.0,0.0,0.0,0.0,0.0
25%,0.00852,0.026161,0.314379,0.361261,0.066321
50%,0.038762,0.044141,0.36061,0.462538,0.110094
75%,0.313223,0.070376,0.400854,0.56575,0.185583
max,1.0,1.0,1.0,1.0,1.0
