In [None]:
%load_ext autoreload
%autoreload 2
%load_ext line_profiler
import cantera as ct
import os
import gym 
import numpy as np 
from stable_baselines.common.policies import MlpPolicy, CnnLstmPolicy,MlpLstmPolicy
from stable_baselines.common.vec_env import SubprocVecEnv
from stable_baselines.bench import Monitor
from stable_baselines.results_plotter import load_results, ts2xy
from stable_baselines.common import set_global_seeds
from stable_baselines import ACKTR
from envs.sim_env import SimEnv

In [None]:
def make_env(rank, log_dir, seed=0):
    """
    Utility function for multiprocessed env.
    
    :param num_env: (int) the number of environments you wish to have in subprocesses
    :param seed: (int) the inital seed for RNG
    :param rank: (int) index of the subprocess
    """
    def _init():
        env = Monitor(SimEnv(), log_dir)
        env.seed(seed + rank)
        return env
    set_global_seeds(seed)
    return _init

In [None]:
best_mean_reward, n_steps = -np.inf, 0

def callback(_locals, _globals):
  """
  Callback called at each step (for DQN an others) or after n steps (see ACER or PPO2)
  :param _locals: (dict)
  :param _globals: (dict)
  """
  global n_steps, best_mean_reward
  # Print stats every 1000 calls
  if (n_steps + 1) % 1000 == 0:
      # Evaluate policy training performance
      x, y = ts2xy(load_results(log_dir), 'timesteps')
      if len(x) > 0:
          mean_reward = np.mean(y[-100:])
          print(x[-1], 'timesteps')
          print("Best mean reward: {:.2f} - Last mean reward per episode: {:.2f}".format(best_mean_reward, mean_reward))

          # New best model, you could save the agent here
          if mean_reward > best_mean_reward:
              best_mean_reward = mean_reward
              # Example for saving best model
              print("Saving new best model")
              _locals['self'].save(log_dir + 'best_model.pkl')
  n_steps += 1
  return True

In [None]:
num_cpu = 8  # Number of processes to use

# Create log dir
log_dirs = [f"/tmp/gym/{i}" for i in range(num_cpu)]
[os.makedirs(log_dir, exist_ok=True) for log_dir in log_dirs]
# Create the vectorized environment
env = SubprocVecEnv([make_env(i, log_dirs[i]) for i in range(num_cpu)])
