In [11]:
import time
import datetime
import pandas as pd
from finrl_meta.env_fx_trading.env_fx import tgym
from ray.rllib.agents.ars import ars

# Training

`train()` function based on `get_model()` and `train_model()` from finrl_meta's agents/rllib_models.py file. 

In [12]:
def train(env, agent, if_vix = True,**kwargs):
    import ray
    
    # Prepare
    learning_rate = 2 ** -15
    batch_size = 2 ** 11
    gamma = 0.99
    seed = 312
    net_dimension = 2 ** 9
    cwd = './'+str(agent)
    total_episodes = 1
    
    df = pd.read_csv("./data/split/EURUSD/yearly/EURUSD_2022_0.csv")
        
    model_config = ars.DEFAULT_CONFIG.copy()
    model_config["env"] = tgym
    model_config["log_level"] = "WARN"
    model_config["seed"] = seed
    model_config["reuse_actors"] = True
    model_config["env_config"] = {
        "if_train": True,
        'seed': seed,
        'df': df
    }
    
    # Train
    ray.init(
        ignore_reinit_error=True
    )  # Other Ray APIs will not work until `ray.init()` is called.

    trainer = ars.ARSTrainer(env=tgym, config=model_config)

    for i in range(total_episodes):
        trainer.train()

    ray.shutdown()
    
    # Save
    trainer.save(cwd)
    print(f"Model saved: {cwd}")

Train the model - should only take a minute or two as configured

In [13]:
train(env=tgym,agent="ars")

2022-09-22 10:26:40,210	INFO worker.py:963 -- Calling ray.init() again after it has already been called.
2022-09-22 10:27:20,034	INFO ars.py:270 -- Creating shared noise table.


self.action_space: Box([0.], [3.], (1,), float32)
initial done:
observation_list:['Open', 'High', 'Low', 'Close', 'minute', 'hour', 'day', 'macd', 'boll_ub', 'boll_lb', 'rsi_30', 'dx_30', 'close_30_sma', 'close_60_sma']
 assets:['EURUSD']
 time serial: 2022-01-03 00:00:00 -> 2022-03-29 00:00:00 length: 5849


2022-09-22 10:27:30,115	INFO ars.py:275 -- Creating actors.
2022-09-22 10:27:30,130	INFO trainable.py:156 -- Trainable.setup took 49.887 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


[2m[36m(Worker pid=1684)[0m self.action_space: Box([0.], [3.], (1,), float32)
[2m[36m(Worker pid=1684)[0m initial done:
[2m[36m(Worker pid=1684)[0m observation_list:['Open', 'High', 'Low', 'Close', 'minute', 'hour', 'day', 'macd', 'boll_ub', 'boll_lb', 'rsi_30', 'dx_30', 'close_30_sma', 'close_60_sma']
[2m[36m(Worker pid=1684)[0m  assets:['EURUSD']
[2m[36m(Worker pid=1684)[0m  time serial: 2022-01-03 00:00:00 -> 2022-03-29 00:00:00 length: 5849
[2m[36m(Worker pid=18488)[0m self.action_space: Box([0.], [3.], (1,), float32)
[2m[36m(Worker pid=18488)[0m initial done:
[2m[36m(Worker pid=18488)[0m observation_list:['Open', 'High', 'Low', 'Close', 'minute', 'hour', 'day', 'macd', 'boll_ub', 'boll_lb', 'rsi_30', 'dx_30', 'close_30_sma', 'close_60_sma']
[2m[36m(Worker pid=18488)[0m  assets:['EURUSD']
[2m[36m(Worker pid=18488)[0m  time serial: 2022-01-03 00:00:00 -> 2022-03-29 00:00:00 length: 5849
Model saved: ./ars


# Testing

Test script based on `DRLAgent.DRL_prediction()` from finrl_meta's agents/rllib_models.py file.

The environment defines `action_space`:
```
self.action_space = spaces.Box(low=0,
                               high=3,
                               shape=(len(self.assets),))
```

Yet (usually) this is producing actions below 0, in the 0 to -1 range

Other code built on this (modified environment, etc) where we have a -3 to 3 `Box` action space, sometimes sees actions as far out-of-bounds as +- 60.

If it doesn't produce any OOB (log will tell you), retrain - it sometimes takes a couple tries to see it.

In [14]:
import pandas as pd
from finrl_meta.env_fx_trading.env_fx import tgym
import time
import datetime

datafile ="./data/split/EURUSD/weekly/EURUSD_2021_0.csv"
#datafile = "./data/split/EURUSD/yearly/EURUSD_2022_0.csv"
df = pd.read_csv(datafile)

agent_path='./ars/checkpoint_000001/checkpoint-1'

env_config = {
    "df": df,
    "if_train": False,
}
model_config = ars.DEFAULT_CONFIG.copy()
model_config["env"] = tgym
model_config["log_level"] = "DEBUG"
model_config["env_config"] = env_config

env_instance = tgym(config=env_config)

trainer = ars.ARSTrainer(env=tgym, config=model_config)

trainer.restore(agent_path)
print("Restoring from checkpoint path", agent_path)

# test on the testing env
state = env_instance.reset()
episode_actions = list()
oob_actions = list()
done = False

while not done:
    action_raw = trainer.compute_single_action(state, explore=False)
    state, _, done, _ = env_instance.step(action_raw)

    action = action_raw[0]
    if action <= env_instance.action_space.low or action >= env_instance.action_space.high:
        print(f"OOB action: {action}")
        oob_actions.append(action)

    episode_actions.append(action)

print(f"episode actions:\n{episode_actions}")
print(f"\n{len(oob_actions)} actions out of {len(episode_actions)} were out-of-bounds")


self.action_space: Box([0.], [3.], (1,), float32)
initial done:
observation_list:['Open', 'High', 'Low', 'Close', 'minute', 'hour', 'day', 'macd', 'boll_ub', 'boll_lb', 'rsi_30', 'dx_30', 'close_30_sma', 'close_60_sma']
 assets:['EURUSD']
 time serial: 2021-01-04 00:00:00 -> 2021-01-08 23:45:00 length: 480
self.action_space: Box([0.], [3.], (1,), float32)
initial done:
observation_list:['Open', 'High', 'Low', 'Close', 'minute', 'hour', 'day', 'macd', 'boll_ub', 'boll_lb', 'rsi_30', 'dx_30', 'close_30_sma', 'close_60_sma']
 assets:['EURUSD']
 time serial: 2021-01-04 00:00:00 -> 2021-01-08 23:45:00 length: 480


2022-09-22 10:29:32,752	INFO ars.py:270 -- Creating shared noise table.
2022-09-22 10:29:35,597	INFO services.py:1462 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m
2022-09-22 10:29:47,863	INFO ars.py:275 -- Creating actors.
2022-09-22 10:29:47,901	INFO trainable.py:156 -- Trainable.setup took 17.719 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
2022-09-22 10:29:57,543	INFO trainable.py:535 -- Restored on 127.0.0.1 from checkpoint: ./ars/checkpoint_000001/checkpoint-1
2022-09-22 10:29:57,545	INFO trainable.py:543 -- Current state after restoring: {'_iteration': 1, '_timesteps_total': 49514, '_time_total': 61.33639597892761, '_episodes_total': None}


[2m[36m(Worker pid=24648)[0m self.action_space: Box([0.], [3.], (1,), float32)
[2m[36m(Worker pid=24648)[0m initial done:
[2m[36m(Worker pid=24648)[0m observation_list:['Open', 'High', 'Low', 'Close', 'minute', 'hour', 'day', 'macd', 'boll_ub', 'boll_lb', 'rsi_30', 'dx_30', 'close_30_sma', 'close_60_sma']
[2m[36m(Worker pid=24648)[0m  assets:['EURUSD']
[2m[36m(Worker pid=24648)[0m  time serial: 2021-01-04 00:00:00 -> 2021-01-08 23:45:00 length: 480
[2m[36m(Worker pid=16512)[0m self.action_space: Box([0.], [3.], (1,), float32)
[2m[36m(Worker pid=16512)[0m initial done:
[2m[36m(Worker pid=16512)[0m observation_list:['Open', 'High', 'Low', 'Close', 'minute', 'hour', 'day', 'macd', 'boll_ub', 'boll_lb', 'rsi_30', 'dx_30', 'close_30_sma', 'close_60_sma']
[2m[36m(Worker pid=16512)[0m  assets:['EURUSD']
[2m[36m(Worker pid=16512)[0m  time serial: 2021-01-04 00:00:00 -> 2021-01-08 23:45:00 length: 480
Restoring from checkpoint path ./ars/checkpoint_000001/checkpoin

OOB action: -0.07887902855873108
OOB action: -0.0791180431842804
OOB action: -0.07732255756855011
OOB action: -0.0765271931886673
OOB action: -0.07689955830574036
OOB action: -0.07640044391155243
OOB action: -0.07464161515235901
OOB action: -0.0760270357131958
OOB action: -0.07681892812252045
OOB action: -0.07594528794288635
OOB action: -0.0739230364561081
OOB action: -0.07607896625995636
OOB action: -0.07690927386283875
OOB action: -0.07573650777339935
OOB action: -0.07331453263759613
OOB action: -0.07596486806869507
OOB action: -0.07486556470394135
OOB action: -0.07245124876499176
OOB action: -0.06878603994846344
OOB action: -0.07293762266635895
OOB action: -0.07233117520809174
OOB action: -0.06937508285045624
OOB action: -0.06515124440193176
OOB action: -0.09322184324264526
OOB action: -0.09408712387084961
OOB action: -0.09218698740005493
OOB action: -0.08774666488170624
OOB action: -0.09106579422950745
OOB action: -0.09208576381206512
OOB action: -0.08955062925815582
OOB action: -0