In [1]:
import gymnasium as gym

from stable_baselines3 import PPO
from stable_baselines3.common.buffers import RolloutBuffer

from forexgym.utils import Query, CurrencyPair
from forexgym.envs import ContinuousActionEnvironment, DiscreteActionEnvironment
import pandas as pd


In [3]:
def select_close(df: pd.DataFrame, *args, **kwargs) -> pd.DataFrame:
    
    return pd.DataFrame(df["Close"])

def article_processor(df: pd.DataFrame, *args, **kwargs) -> pd.DataFrame:
    df["x1"] = ((df["Close"] - df["Close"].shift(1) ) / df["Close"]).shift(1) 
    df["x2"] = ((df["High"] - df["High"].shift(1) ) / df["High"]).shift(1) 
    df["x3"] = ((df["Low"] - df["Low"].shift(1) ) / df["Low"]).shift(1) 
    df["x4"] = (df["High"] - df["Close"]) / df["Close"] 
    df["x5"] = (df["Close"] - df["Low"]) / df["Close"]
    df["x6"] = df['Close'].ewm(span=26, adjust=False).mean()
    
    return df.drop(["Open", "High", "Low", "Close", "Date"], axis=1)

In [None]:
#timeframes = ["1m", "5m", "15m", "30m", "1H", "4H", "1D"]
timeframes = ["15m", "1H"]

query = Query(episode_length=256, trading_timeframe="15m", trading_column="Close", lookback=10)

query.add_query(
    timeframe="15m",
    window_size=16,
    data_processor=article_processor
)
query.add_query(
    timeframe="1H",
    window_size=4,
    data_processor=article_processor
)

print(query.observation_size)

# env = ContinuousActionEnvironment(
#     currency_tickers={
#         "EURUSD": timeframes,
#         "GBPUSD": timeframes
#     },
#     query=query,
#     reward_type="continuous",
#     reward_multiplier=1e3,
#     episode_length=256,
# )

7


In [4]:
query.observation_size

120

In [15]:
model = PPO("MlpPolicy", env, verbose=1, n_steps=4096, tensorboard_log="runs/", device="cpu", rollout_buffer_class=RolloutBuffer, max_grad_norm=0.5)

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [16]:
model.learn(total_timesteps=1_000_000)

Logging to runs/PPO_5
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 256       |
|    ep_rew_mean     | -0.000443 |
| time/              |           |
|    fps             | 1098      |
|    iterations      | 1         |
|    time_elapsed    | 3         |
|    total_timesteps | 4096      |
----------------------------------
-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 256           |
|    ep_rew_mean          | -0.000877     |
| time/                   |               |
|    fps                  | 830           |
|    iterations           | 2             |
|    time_elapsed         | 9             |
|    total_timesteps      | 8192          |
| train/                  |               |
|    approx_kl            | 0.00037371155 |
|    clip_fraction        | 0.000415      |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.43         |
|    explain

<stable_baselines3.ppo.ppo.PPO at 0x1fa403999d0>

In [None]:
obs = env.reset()
rewards = 0
for i in range(1000):
    action, _states = model.predict(obs, deterministic=True)
    obs, reward, done, info = env.step(action)
    #vec_env.render()
    # VecEnv resets automatically
    rewards += reward
    if done:
        obs = env.reset()

env.close()

In [7]:
obs, info = env.reset()
terminated = False
rewards = 0
while not terminated:
    action = model.predict(obs, deterministic=True)
    obs, reward, terminated, truncated, info = env.step(action[0])
    rewards += reward
    print(reward)
    
print(info)

0.0009200000000000319
0.0003799999999998249
0.00024000000000001798
0.00038999999999989043
0.00042000000000008697
0.00031000000000003247
0.00042999999999993044
0.00045999999999990493
0.00019999999999997797
0.00024000000000001798
0.0006199999999998429
0.000140000000000029
0.0006099999999999994
0.001140000000000141
0.000520000000000076
0.00038000000000004697
0.0001900000000001345
7.00000000000145e-05
0.0004899999999998794
0.0001900000000001345
0.0001500000000000945
8.999999999992347e-05
0.0003799999999998249
0.00043999999999999595
0.00017000000000000348
8.999999999992347e-05
0.00031000000000003247
0.00042999999999993044
0.00018999999999991246
2.999999999997449e-05
0.00015999999999993797
0.00036999999999998145
0.0001100000000000545
0.000520000000000076
0.00024999999999986144
0.0001100000000000545
8.999999999992347e-05
0.00012999999999996348
7.999999999985796e-05
1.999999999990898e-05
0.0001500000000000945
4.999999999988347e-05
0.0
7.00000000000145e-05
0.000180000000000069
8.999999999992347

In [18]:
action[0]

array(1, dtype=int64)

In [10]:
model.save("models/ppo_01")