In [None]:
import gymnasium as gym

from stable_baselines3 import SAC

env = gym.make("Pendulum-v1", render_mode="human")

model = SAC("MlpPolicy", env, verbose=1)
model.learn(total_timesteps=10000, log_interval=4)
model.save("sac_pendulum")

del model # remove to demonstrate saving and loading

model = SAC.load("sac_pendulum")

obs, info = env.reset()
while True:
    action, _states = model.predict(obs, deterministic=True)
    obs, reward, terminated, truncated, info = env.step(action)
    if terminated or truncated:
        obs, info = env.reset()

### Train without tensorboard

In [None]:
import gymnasium as gym
from stable_baselines3 import SAC

env = gym.make("Pendulum-v1", render_mode="human")

# Define the SAC model
model = SAC("MlpPolicy", env, verbose=1)

# Train the model
model.learn(total_timesteps=10000, log_interval=4)

# Save the model
model.save("sac_pendulum")

### Train with tensorboard

In [None]:
import gymnasium as gym
from stable_baselines3 import SAC
import os

models_dir = "models/SAC"
logdir = "/home/jlukas/Desktop/My_Project/AI_Stable_GYM/logs"

if not os.path.exists(models_dir):
    os.makedirs(models_dir)

if not os.path.exists(logdir):
    os.makedirs(logdir)

# Create and wrap the environment
env = gym.make("Pendulum-v1", render_mode="human")

# Define the SAC model
model = SAC("MlpPolicy", env, verbose=1, tensorboard_log=logdir)

# Train the model
TIMESTEPS = 10000
iters = 0
for i in range(30):
    model.learn(total_timesteps=TIMESTEPS, reset_num_timesteps=False, tb_log_name="SAC")
    model.save(f"{models_dir}/{TIMESTEPS*i}")

env.close()

In [4]:
import gymnasium as gym
from stable_baselines3 import SAC
from stable_baselines3.common.env_util import make_vec_env
import os

models_dir = "models/SAC"
model_path = f"{models_dir}/90000"

# Create the environment for evaluation
env = gym.make("Pendulum-v1", render_mode="human")

# Load the trained model
model = SAC.load(model_path)

# Run the trained model in the environment
observation, info = env.reset()
episode_over = False

while not episode_over:
    # Use the trained model to predict actions
    action, _ = model.predict(observation, deterministic=True)
    observation, reward, terminated, truncated, info = env.step(action)
    episode_over = terminated or truncated

env.close()