In [None]:
from stable_baselines3 import SAC
from stable_baselines3.common.noise import NormalActionNoise
from stable_baselines3.common.monitor import Monitor
import os
import pandas as pd
import numpy as np
import gymnasium as gym
from tqdm import tqdm
import matplotlib.pyplot as plt

# Function to run Stable-Baselines3 SAC
def run_SB3_SAC(env_type, steps, save_dir):
    env = gym.make(env_type)
    env = Monitor(env)  # To record statistics

    # Initialize Stable-Baselines3 SAC
    model = SAC(
        "MlpPolicy",
        env,
        learning_rate=3e-4,  # Default learning rate for SAC
        gamma=0.99,
        tau=0.005,
        verbose=1
    )

    # Train the model
    model.learn(total_timesteps=steps)

    # Save the model
    model.save(f"{save_dir}/SB3_SAC_{env_type}")

    # Retrieve training rewards
    episode_rewards = env.get_episode_rewards()

    return episode_rewards

# Comparison script
environments = ['Hopper-v5', 'HalfCheetah-v5', 'BipedalWalker-v3']
render_mode = ['human', 'human', 'human']
steps = 100_000  # Total training steps
save_dir = os.getcwd()

results = {}

for i, env_type in enumerate(environments):
    # Run Stable-Baselines3 SAC
    print(f"Running Stable-Baselines3 SAC on {env_type}...")
    sb3_scores = run_SB3_SAC(env_type, steps, save_dir)

    # Save results
    results[env_type] = {"sb3": sb3_scores}

    # Plot comparison (you can compare SAC with any other algorithm like DDPG here if needed)
    plt.figure()
    plt.plot(np.arange(1, len(sb3_scores) + 1), sb3_scores, label="SB3 SAC")
    plt.ylabel('Cumulative Reward')
    plt.xlabel('Episodes')
    plt.title(f"SAC Performance on {env_type}")
    plt.legend()
    plt.savefig(f"{save_dir}/SAC_{env_type}.png")
    plt.close()

# Save results to CSV
for env, data in results.items():
    pd.DataFrame({
        "SB3_SAC": data["sb3"]
    }).to_csv(f"{save_dir}/results_SAC_{env}.csv", index=False)


In [None]:
from stable_baselines3 import SAC
import gymnasium as gym
import os

# Define environments and model paths
environments = ['Hopper-v5', 'HalfCheetah-v5', 'BipedalWalker-v3']
save_dir = os.getcwd()

# Loop through environments and load corresponding models
for env_type in environments:
    # Load the environment
    env = gym.make(env_type, render_mode="human")
    
    # Load the model
    model_path = f"{save_dir}/SB3_SAC_{env_type}.zip"
    model = SAC.load(model_path, env=env)

    # Start evaluation
    obs = env.reset()[0]  # Extract initial observation from reset
    done = False
    cumulative_reward = 0
    print(f"Evaluating {env_type}...")
    
    while not done:
        # Predict action and step through the environment
        action, _states = model.predict(obs, deterministic=True)
        obs, reward, done, truncate, info = env.step(action)
        cumulative_reward += reward

        # Render the environment
        env.render()

    print(f"Cumulative reward for {env_type}: {cumulative_reward}")
    env.close()
