# Saving and Loading the model

In [12]:
from stable_baselines3 import PPO, A2C
import os
save_dir = '/tmp/gym/'
os.makedirs(save_dir, exist_ok=True)
model = PPO('MlpPolicy', 'Pendulum-v1', verbose=1).learn(8_000)
model.save(f'{save_dir}/ppo-pendulum')

Using cpu device
Creating environment from the given name 'Pendulum-v1'
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 200       |
|    ep_rew_mean     | -1.39e+03 |
| time/              |           |
|    fps             | 11507     |
|    iterations      | 1         |
|    time_elapsed    | 0         |
|    total_timesteps | 2048      |
----------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 200          |
|    ep_rew_mean          | -1.23e+03    |
| time/                   |              |
|    fps                  | 7589         |
|    iterations           | 2            |
|    time_elapsed         | 0            |
|    total_timesteps      | 4096         |
| train/                  |              |
|    approx_kl            | 0.0024779988 |
|    clip_fraction        | 

In [13]:
!ls -lah /tmp/gym/

total 272
drwxr-xr-x@  3 geraldamasi  wheel    96B Dec 21 01:11 [34m.[m[m
drwxrwxrwt  27 root         wheel   864B Dec 21 01:11 [30m[42m..[m[m
-rw-r--r--@  1 geraldamasi  wheel   135K Dec 21 01:11 ppo-pendulum.zip


In [14]:
# Sample an observation from the environment
obs = model.env.observation_space.sample()
print(f'pre saved {model.predict(obs, deterministic=True)}')

pre saved (array([0.00488489], dtype=float32), None)


In [15]:
# Deleting the model
del model

In [17]:
# Loading the model
loaded_model = PPO.load(f'{save_dir}/ppo-pendulum')
print(f'loaded {loaded_model.predict(obs, deterministic=True)}')

loaded (array([0.00488489], dtype=float32), None)


## Wrappers

Monitor Wrapper to log episode statistics and DummyVecEnv to vectorize the environment

In [25]:
import numpy as np
import gymnasium as gym
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3 import A2C

env = gym.make('Pendulum-v1')
# env = Monitor(env)
# env = DummyVecEnv([lambda: env])
model = A2C('MlpPolicy', env, verbose=1).learn(1_000)

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 200       |
|    ep_rew_mean        | -1.16e+03 |
| time/                 |           |
|    fps                | 4574      |
|    iterations         | 100       |
|    time_elapsed       | 0         |
|    total_timesteps    | 500       |
| train/                |           |
|    entropy_loss       | -1.4      |
|    explained_variance | 0.0762    |
|    learning_rate      | 0.0007    |
|    n_updates          | 99        |
|    policy_loss        | -27.7     |
|    std                | 0.984     |
|    value_loss         | 698       |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 200       |
|    ep_rew_mean        | -1.04e+03 |
| time/                 |           |
|    fps                | 4742  

In [None]:
# TODO: Code a custom wrapper.
