# RayTradingEnv Gym Regsitry

- The RayTradingEnv which has an optimized format for the Ray Rllib Reinforcement Learning framework can still be used as OpenAI Gym environment
- Pass a dictionary to specify environment parameters

In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
import gym
import numpy as np
import pandas as pd
from gym.envs.registration import register

In [3]:
from ray_trading_env import RayTradingEnvironment

INFO:ray_trading_env:ray_trading_env logger started.


### Register and Make Gym Environment

In [4]:
max_steps = 252

In [5]:
register(
    id='ray-trading-v0',
    entry_point='ray_trading_env:RayTradingEnvironment',
    max_episode_steps=max_steps
)

In [6]:
register

<function gym.envs.registration.register(id, **kwargs)>

##### Make Gym Env with default parameters

In [7]:
# with default parameters
trading_environment = gym.make('ray-trading-v0')

INFO:ray_trading_env:got data for AAPL...


##### Make Gym Env with custom parameters

In [8]:
# specify parameters in a confif dictionary
config_dict =  {
    "trading_days": 333,
    "trading_cost_bps": 5e-3,
    "time_cost_bps": 2e-4,
    "ticker": "AAPL",
    "get_data_from_csv": True,
    # note: need to specify path when using Tune
    "data_path": "/home/jovyan/machine-learning-for-trading/AAPL_prices.csv",
}

In [11]:
trading_environment = gym.make('ray-trading-v0', 
                               config=config_dict)

INFO:ray_trading_env:got data for AAPL...


In [12]:
print(trading_environment.env.ticker)
print(trading_environment.env.trading_days)
print(trading_environment.env.trading_cost_bps)
print(trading_environment.env.time_cost_bps)

AAPL
333
0.005
0.0002


### Explore Functions

In [13]:
trading_environment.action_space

Discrete(3)

In [14]:
trading_environment.action_space.n

3

In [15]:
trading_environment.observation_space

Box(returns    -0.518692
ret_2     -13.186786
ret_5      -9.157841
ret_10     -6.979122
ret_21     -5.289787
rsi        -1.529044
macd       -5.407722
atr        -0.615589
stoch      -2.762308
ultosc     -3.964109
dtype: float32, returns     0.332152
ret_2      11.431712
ret_5      10.235379
ret_10      9.135829
ret_21      8.238228
rsi         1.499695
macd        5.705033
atr         5.415272
stoch       2.712635
ultosc      2.763141
dtype: float32, (10,), float32)

In [16]:
trading_environment.reward_range

(-inf, inf)

In [17]:
trading_environment.spec.max_episode_steps

252

In [18]:
trading_environment.spec.max_episode_steps

252

### Run Gym Env Loop with random actions (no learning involved)

In [20]:
max_episodes = 10
max_episode_steps = trading_environment.spec.max_episode_steps

result_list = []
for episode in range(1, max_episodes + 1):
    current_state = trading_environment.reset()
    print(".",end="")
    for step in range(max_episode_steps):
        # take random action
        action = np.random.randint(3)
        new_state, reward, done, _ = trading_environment.step(action)
        current_state = new_state
        
    # call the result method of the cuatom trading env    
    result = trading_environment.env.simulator.result().iloc[-1]
    result_list.append(result)

..........

In [22]:
results = pd.DataFrame(result_list).reset_index().drop(columns="index")

In [23]:
results

Unnamed: 0,action,nav,market_nav,market_return,strategy_return,position,cost,trade
0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0
1,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0
2,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0
3,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0
4,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0
5,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0
6,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0
7,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0
8,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0
9,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0
