This repository contains an implementation of the RL Algorithm Proximal Policy Optimization. The implementation is based on the paper Proximal Policy Optimization Algorithms by Schulman et al. and is inspired from RSL_RL.
It is meant to be used as alongside Isaac Sim Orbit. This allows us to train agents using multiple parallel environments. The default hyperparameters are optimized for Isaac Sim Orbit.
To use this package make sure Isaac Sim and Orbit have been installed correctly. Installation instructions can be found here Isaac Sim and Isaac Sim Orbit. Make sure you setup the virtual environment for orbit and activate it before proceeding.
pip install .
Or in development mode
pip install -e .
We only support continuous action spaces at the moment.
import gymnasium as gym
from isaac_ppo import PPO, Hyperparameters
from omni.isaac.orbit.app.app_launcher import AppLauncher
from utils.argparser import get_argparser
"""
Launch Isaac Sim as global variables
"""
parser = get_argparser()
# Append AppLauncher cli args
AppLauncher.add_app_launcher_args(parser)
# Parse the arguments
args_cli = parser.parse_args()
# Launch Omniverse
app_launcher = AppLauncher(args_cli)
simulation_app = app_launcher.app
# Import the necessary modules after Isaac Sim is launched
from omni.isaac.orbit_tasks.utils import parse_env_cfg
# Parse the arguments
# Environment configuration
env_cfg = parse_env_cfg(args_cli.task, use_gpu=not args_cli.cpu, num_envs=args_cli.num_envs)
# Create the environment
env = gym.make(args_cli.task, cfg=env_cfg)
# Create the hyperparameters object (modify anything if needed)
hyperparameters = Hyperparameters()
hyperparameters.lr = 1e-4
# Create the agent
agent = PPO(env, hyperparameters)
agent.policy_path = 'policies'
# Learn
agent.learn(max_steps=2000)
Then run the python file
python train.py --task Isaac-Velocity-Flat-Unitree-A1-v0 --num_envs 1024
import gymnasium as gym
from isaac_ppo import PPO, Hyperparameters
from omni.isaac.orbit.app.app_launcher import AppLauncher
from utils.argparser import get_argparser
"""
Launch Isaac Sim as global variables
"""
parser = get_argparser()
# Append AppLauncher cli args
AppLauncher.add_app_launcher_args(parser)
# Parse the arguments
args_cli = parser.parse_args()
# Launch Omniverse
app_launcher = AppLauncher(args_cli)
simulation_app = app_launcher.app
# Import the necessary modules after Isaac Sim is launched
from omni.isaac.orbit_tasks.utils import parse_env_cfg
# Parse the arguments
# Environment configuration
env_cfg = parse_env_cfg(args_cli.task, use_gpu=not args_cli.cpu, num_envs=args_cli.num_envs)
# Create the environment
env = gym.make(args_cli.task, cfg=env_cfg)
# Create the hyperparameters object
hyperparameters = Hyperparameters()
# Create the agent
agent = PPO(env, hyperparameters)
agent.policy_path = 'policies'
# Learn
# agent.learn(max_steps=2000)
agent.simulate('policies/ppo_actor_critic.pth')
Then run the python file
python evaluate.py --task Isaac-Velocity-Flat-Unitree-A1-v0 --num_envs 1