In [None]:
import time
import datetime
import pandas as pd
from finrl_meta.env_fx_trading.env_fx import tgym
from ray.rllib.agents.ars import ars

# Training

`train()` function based on `get_model()` and `train_model()` from finrl_meta's agents/rllib_models.py file. 

In [None]:
def train(env, agent, if_vix = True,**kwargs):
    import ray
    
    # Prepare
    learning_rate = 2 ** -15
    batch_size = 2 ** 11
    gamma = 0.99
    seed = 312
    net_dimension = 2 ** 9
    cwd = './'+str(agent)
    total_episodes = 1
    
    df = pd.read_csv("./data/split/EURUSD/yearly/EURUSD_2022_0.csv")
        
    model_config = ars.DEFAULT_CONFIG.copy()
    model_config["env"] = tgym
    model_config["log_level"] = "WARN"
    model_config["seed"] = seed
    model_config["reuse_actors"] = True
    model_config["env_config"] = {
        "if_train": True,
        'seed': seed,
        'df': df
    }
    
    # Train
    ray.init(
        ignore_reinit_error=True
    )  # Other Ray APIs will not work until `ray.init()` is called.

    trainer = ars.ARSTrainer(env=tgym, config=model_config)

    for i in range(total_episodes):
        trainer.train()

    ray.shutdown()
    
    # Save
    trainer.save(cwd)
    print(f"Model saved: {cwd}")

Train the model - should only take a minute or two as configured

In [None]:
train(env=tgym,agent="ars")

# Testing

Test script based on `DRLAgent.DRL_prediction()` from finrl_meta's agents/rllib_models.py file.

The environment defines `action_space`:
```
self.action_space = spaces.Box(low=0,
                               high=3,
                               shape=(len(self.assets),))
```

Yet (usually) this is producing actions below 0, in the 0 to -1 range

Other code built on this (modified environment, etc) where we have a -3 to 3 `Box` action space, sometimes sees actions as far out-of-bounds as +- 60.

In [None]:
import pandas as pd
from finrl_meta.env_fx_trading.env_fx import tgym
import time
import datetime

datafile ="./data/split/EURUSD/weekly/EURUSD_2021_0.csv"
#datafile = "./data/split/EURUSD/yearly/EURUSD_2022_0.csv"
df = pd.read_csv(datafile)

agent_path='./ars/checkpoint_000001/checkpoint-1'

env_config = {
    "df": df,
    "if_train": False,
}
model_config = ars.DEFAULT_CONFIG.copy()
model_config["env"] = tgym
model_config["log_level"] = "DEBUG"
model_config["env_config"] = env_config

env_instance = tgym(config=env_config)

trainer = ars.ARSTrainer(env=tgym, config=model_config)

trainer.restore(agent_path)
print("Restoring from checkpoint path", agent_path)

# test on the testing env
state = env_instance.reset()
episode_actions = list()
oob_actions = list()
done = False

while not done:
    action_raw = trainer.compute_single_action(state, explore=False)
    state, _, done, _ = env_instance.step(action_raw)

    action = action_raw[0]
    if action <= env_instance.action_space.low or action >= env_instance.action_space.high:
        print(f"OOB action: {action}")
        oob_actions.append(action)

    episode_actions.append(action)

print(f"episode actions:\n{episode_actions}")
print(f"\n{len(oob_actions)} actions out of {len(episode_actions)} were out-of-bounds")
