In [2]:
# Install the necessary packages
!pip install stable-baselines3[extra] gym==0.21.0
!pip install box2d-py
!pip install moviepy

import gym
from stable_baselines3 import A2C, DDPG
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.callbacks import EvalCallback
from stable_baselines3.common import results_plotter
import matplotlib.pyplot as plt
from moviepy.editor import VideoFileClip

def create_env():
    """Create and return the MountainCarContinuous-v0 environment."""
    env = gym.make('MountainCarContinuous-v0')
    env = Monitor(env)  # Monitor to record videos and stats
    env = DummyVecEnv([lambda: env])  # Vectorized environment for Stable Baselines3
    return env

def train_and_evaluate(model_class, env, total_timesteps, video_folder):
    """
    Train and evaluate a model.

    Args:
        model_class: The model class from Stable Baselines3 (e.g., A2C, DDPG).
        env: The environment to train the model on.
        total_timesteps: The number of timesteps to train the model.
        video_folder: The folder to save videos.

    Returns:
        The trained model.
    """
    # Initialize the model
    model = model_class('MlpPolicy', env, verbose=1)

    # Define callback for saving videos
    eval_callback = EvalCallback(env, best_model_save_path=f'{video_folder}/best_model',
                                 log_path=video_folder, eval_freq=10000,
                                 deterministic=True, render=False)

    # Train the model
    model.learn(total_timesteps=total_timesteps, callback=eval_callback)

    # Save the model
    model.save(f'{video_folder}/{model_class.__name__.lower()}_model')

    # Evaluate the model
    mean_reward, _ = evaluate_policy(model, env, n_eval_episodes=10)
    print(f'{model_class.__name__} Mean Reward: {mean_reward}')

    return model

def record_videos(env, model, video_path):
    """Record videos of the trained model's performance."""
    env = gym.make('MountainCarContinuous-v0')
    env = Monitor(env, video_path, force=True)
    env = DummyVecEnv([lambda: env])

    # Load the trained model
    model.set_env(env)

    # Run the model and record the video
    obs = env.reset()
    for _ in range(1000):
        action, _states = model.predict(obs, deterministic=True)
        obs, reward, done, _info = env.step(action)
        if done:
            obs = env.reset()

def plot_results(video_folder):
    """Plot the results of training."""
    results_plotter.plot_results([video_folder], 10000, results_plotter.X_TIMESTEPS, 'A2C vs DDPG Training Results')
    plt.show()

def main():
    video_folder = '/content/videos'  # Define video folder path
    total_timesteps = 100  # Define total timesteps for training

    # Create the environment
    env = create_env()

    # Train and evaluate A2C
    print("Training A2C...")
    a2c_model = train_and_evaluate(A2C, env, total_timesteps, video_folder)
    print("Recording A2C video...")
    record_videos(env, a2c_model, f'{video_folder}/a2c.mp4')

    # Train and evaluate DDPG
    print("Training DDPG...")
    ddpg_model = train_and_evaluate(DDPG, env, total_timesteps, video_folder)
    print("Recording DDPG video...")
    record_videos(env, ddpg_model, f'{video_folder}/ddpg.mp4')

    # Plot the results
    plot_results(video_folder)

if __name__ == "__main__":
    main()


Collecting gym==0.21.0
  Downloading gym-0.21.0.tar.gz (1.5 MB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.5 MB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m1.5/1.5 MB[0m [31m95.2 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.5/1.5 MB[0m [31m39.4 MB/s[0m eta [36m0:00:00[0m
[?25h  [1;31merror[0m: [1msubprocess-exited-with-error[0m
  
  [31m×[0m [32mpython setup.py egg_info[0m did not run successfully.
  [31m│[0m exit code: [1;36m1[0m
  [31m╰─>[0m See above for output.
  
  [1;35mnote[0m: This error originates from a subprocess, and is likely not a problem with pip.
  Preparing metadata (setup.py) ... [?25l[?25herror
[1;31merror[0m: [1mmetadata-generation-failed[0m

[31m×[0m Encountered error while generating package metadata.
[31m╰─>[0m See above for output.

[1;35mnote[0m: This is an issue with the package men

ModuleNotFoundError: No module named 'stable_baselines3'