In [1]:
import os

import numpy as np
import gymnasium as gym
import gym_market

from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.results_plotter import load_results, ts2xy
from stable_baselines3.common.callbacks import BaseCallback

ModuleNotFoundError: No module named 'gym_market'

In [None]:
class SaveOnBestTrainingRewardCallback(BaseCallback):
    """
    Callback for saving a model (the check is done every ``check_freq`` steps)
    based on the training reward (in practice, we recommend using ``EvalCallback``).

    :param check_freq: (int)
    :param log_dir: (str) Path to the folder where the model will be saved.
      It must contains the file created by the ``Monitor`` wrapper.
    :param verbose: (int)
    """

    def __init__(self, check_freq: int, log_dir: str, verbose=1):
        super().__init__(verbose)
        self.check_freq = check_freq
        self.log_dir = log_dir
        self.save_path = os.path.join(log_dir, "models")
        self.best_model_save_path = os.path.join(log_dir, "best_model")
        self.best_mean_reward = -np.inf

    def _init_callback(self) -> None:
        # Create folder if needed
        if self.save_path is not None:
            os.makedirs(self.save_path, exist_ok=True)

    def _on_step(self) -> bool:
        if self.n_calls % self.check_freq == 0:
            self.model.save(os.path.join(self.save_path, f"model_step_{self.n_calls}.zip"))
            print(f'checkpoint: {os.path.join(self.save_path, f"model_step_{self.n_calls}.zip")}')

            # Retrieve training reward
            x, y = ts2xy(load_results(self.log_dir), "timesteps")
            if len(x) > 0:
                # Mean training reward over the last 100 episodes
                mean_reward = np.mean(y[-96:])
                if self.verbose > 0:
                    print(f"Num timesteps: {self.num_timesteps}")
                    print(
                        f"Best mean reward: {self.best_mean_reward:.2f} - Last mean reward per episode: {mean_reward:.2f}"
                    )

                # New best model, you could save the agent here
                if mean_reward > self.best_mean_reward:
                    self.best_mean_reward = mean_reward
                    # Example for saving best model
                    if self.verbose > 0:
                        print(f"Saving new best model to {self.best_model_save_path}.zip")
                    self.model.save(self.best_model_save_path)

        return True

In [None]:
log_dir = "./log/"
os.makedirs(log_dir, exist_ok=True)

In [None]:
lobs_trainset_dir = "./data/lobs/"
tapes_trainset_dir = "./data/tapes/"

In [None]:
lobs_trainset_files = os.listdir(lobs_trainset_dir)
tapes_trainset_files = os.listdir(tapes_trainset_dir)

## PPO

In [2]:
from stable_baselines3 import PPO

In [None]:
callback = SaveOnBestTrainingRewardCallback(check_freq=3000, log_dir=log_dir)

flag = 0
for i in range(100):
    for lob_data_dir,tape_data_dir in zip(lobs_trainset_files,tapes_trainset_files):
        lob_data_path = os.path.join(lobs_trainset_dir, lob_data_dir)
        tape_data_path = os.path.join(tapes_trainset_dir, tape_data_dir)
        
        env = gym.make('Market-v0',lob_data_dir=lob_data_path,tape_data_dir=tape_data_path,render_mode = 'human')
    
        env = Monitor(env, log_dir)
    
        if flag == 0:
            model = PPO("MlpPolicy", env, verbose=1,n_steps=1000,batch_size=50)
        else:
            model.set_env(env)
    
        
        model.learn(total_timesteps=3000,callback=callback)
        env.close()