In [5]:
import gymnasium as gym
from gymnasium import spaces
from gymnasium.envs.registration import register

import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from collections import namedtuple

In [6]:
df = pd.read_csv('ETHUSD_5.csv', header=None, names=['Time', 'Open', 'High', 'Low', 'Close', 'Volume', 'Trades'])
df['Time'] = pd.to_datetime(df['Time'], unit='s')
df.set_index('Time', inplace=True)
df = df.to_numpy()

In [16]:
class TradeEnv(gym.Env):
    
    def __init__(self, input_dim, max_steps, cash_balance, df):
        super(TradeEnv, self).__init__()
        self.input_dim = input_dim
        self.action_space = spaces.Discrete(3)  # actions: buy, sell and hold
        self.observation_space = spaces.Box(low=np.zeros(self.input_dim), high=np.ones(self.input_dim) * np.inf, dtype=np.float64)
        self.df = df
        self.df_counter = 0

        self.current_step = 0
        self.max_steps = max_steps  # episode terminates when day ends

        self.state = None

        self.hold_counter = 0
        self.impossible_sell_counter = 0
        self.impossible_buy_counter = 0

        self.current_price = 0
        self.initial_cash = cash_balance

        self.inventory = {'Last Buy Price': 0,
                          'Holdings': 0,
                          'Cash Balance': cash_balance,
                          'Profit': 0,
                          'Volatility': 0,
                          'Sharpe Ratio': 0}
        

    def step(self, action):
        self.current_step += 1
        self.df_counter += 1
        done = self.current_step == self.max_steps

        reward = self.calculate_pnl(action)
        self.state = self.update_state(self.df[self.df_counter])

        info = {"Profit": self.inventory["Profit"]}

        return self.state, reward, done, info
    
    def update_state(self, next_state):
        self.state = next_state
        return self.state
    

    def reset(self):
        self.__init__(self.input_dim, self.max_steps, self.initial_cash, self.df)
        self.df_counter = 0
        self.state = self.df[self.df_counter]

        info = {}

        return self.state, info


    def calculate_pnl(self, action):

        self.current_price = self.state[0]  # select open price for OHLCVT data

        if action == 0: # buy
            if self.inventory["Holdings"] == 0:   # need to have empty inventory to buy
                buy_quantity = self.inventory["Cash Balance"] / self.current_price
                self.inventory['Cash Balance'] = 0
                self.inventory["Holdings"] = buy_quantity
                self.hold_counter = 0
                self.inventory["Last Buy Price"] = self.current_price
                reward = 0
            else:
                self.impossible_buy_counter += 1
                reward = -self.impossible_buy_counter

        elif action == 1: # sell
            if self.inventory["Holdings"] > 0:  # need to own in order to sell
                self.inventory["Cash Balance"] += self.inventory["Holdings"] * self.current_price
                reward = self.inventory["Cash Balance"] - self.inventory["Holdings"] * self.inventory["Last Buy Price"]
                self.inventory["Holdings"] = 0
                self.inventory["Last Buy Price"] = 0
                self.hold_counter = 0
                self.inventory['Profit'] += reward
            else:
                self.impossible_sell_counter += 1
                reward = -self.impossible_sell_counter

        elif action == 2: # hold
            self.hold_counter += 1
            reward = -self.hold_counter / self.initial_cash

        return reward

In [31]:
register(
     id="TradeEnv-v1",
     entry_point="__main__:TradeEnv",
     kwargs={'input_dim': 6, 'max_steps': 288, 'cash_balance': 1000}
)

  logger.warn(f"Overriding environment {new_spec.id} already in registry.")


In [17]:
env = TradeEnv(6, 10, 1000, df=df)

# Test

In [18]:
first_state, _ = env.reset()

In [19]:
first_state

array([1934.02      , 1936.57      , 1933.66      , 1935.45      ,
        121.35739837,   40.        ])

In [20]:
next_state, reward, done, info  = env.step(0)

In [21]:
next_state

array([1935.91      , 1937.2       , 1934.58      , 1936.95      ,
          2.38832811,   14.        ])

In [22]:
env.inventory

{'Last Buy Price': 1934.02,
 'Holdings': 0.5170577346666528,
 'Cash Balance': 0,
 'Profit': 0,
 'Volatility': 0,
 'Sharpe Ratio': 0}

In [23]:
next_state, reward, done, info  = env.step(1)

In [24]:
next_state

array([1937.03      , 1937.86      , 1930.85      , 1932.37      ,
         63.91082145,   31.        ])

In [25]:
env.inventory

{'Last Buy Price': 0,
 'Holdings': 0,
 'Cash Balance': 1000.97723911852,
 'Profit': 0.9772391185200604,
 'Volatility': 0,
 'Sharpe Ratio': 0}