<a href="https://colab.research.google.com/github/alexeiplatzer/unitree-go2-mjx-rl/blob/main/notebooks/Universal.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Univeral Notebook for Quadruped RL Training in MJX

## Hardware Setup

In [1]:
# @title Setup configuration

# @markdown Choose your hardware option:
hardware = "Colab" # @param ["local","Colab","Kaggle"]

# @markdown Choose whether you want to build the rendering setup for training
# @markdown with vision, and with what backend:
vision_backend = "None" # @param ["None","MJX","Madrona"]

In [2]:
# @title run this cell once each time on a new machine

import time

if vision_backend == "Madrona":
    # Install madrona MJX
    print("Intalling Madrona MJX...")
    start_time = time.perf_counter()
    print("Setting up environment... (Step 1/3)")
    !pip uninstall -y jax
    !pip install jax["cuda12_local"]==0.4.35

    !sudo apt install libx11-dev libxrandr-dev libxinerama-dev libxcursor-dev libxi-dev mesa-common-dev

    !mkdir modules
    !git clone https://github.com/shacklettbp/madrona_mjx.git modules/madrona_mjx

    !git -C modules/madrona_mjx submodule update --init --recursive

    !mkdir modules/madrona_mjx/build

    if hardware == "Kaggle":
        !sudo apt-get install -y nvidia-cuda-toolkit

    print("Building the Madrona backend ... (Step 2/3)")
    !cd modules/madrona_mjx/build && cmake -DLOAD_VULKAN=OFF .. && make -j 8

    print ("Installing Madrona MJX ... (Step 3/3)")
    !pip install -e modules/madrona_mjx

    minutes, seconds = divmod((time.perf_counter() - start_time), 60)
    print(f"Finished installing Madrona MJX in {minutes} m {seconds:.2f} s")

# Clones and installs our Quadruped RL package
!git clone https://github.com/alexeiplatzer/unitree-go2-mjx-rl.git
!pip install -e unitree-go2-mjx-rl/

Cloning into 'unitree-go2-mjx-rl'...
remote: Enumerating objects: 993, done.[K
remote: Counting objects: 100% (282/282), done.[K
remote: Compressing objects: 100% (171/171), done.[K
remote: Total 993 (delta 144), reused 199 (delta 83), pack-reused 711 (from 1)[K
Receiving objects: 100% (993/993), 22.33 MiB | 5.85 MiB/s, done.
Resolving deltas: 100% (508/508), done.
Obtaining file:///home/platzer/TUM/Thesis/unitree-go2-mjx-rl/notebooks/unitree-go2-mjx-rl
  Installing build dependencies ... [?25ldone
[?25h  Checking if build backend supports build_editable ... [?25ldone
[?25h  Getting requirements to build editable ... [?25ldone
[?25h  Preparing editable metadata (pyproject.toml) ... [?25ldone
Building wheels for collected packages: quadruped_mjx_rl
  Building editable for quadruped_mjx_rl (pyproject.toml) ... [?25ldone
[?25h  Created wheel for quadruped_mjx_rl: filename=quadruped_mjx_rl-0.0.1-0.editable-py3-none-any.whl size=1902 sha256=c6c354bd6f2ebb6a2a736dd

### Now restart the session and continue.
### You can skip setup next time while you are on the same machine.

## Training

In [4]:
# @title Configuration for both local and for Colab instances.

repo_path = "./unitree-go2-mjx-rl"

# Refresh the repo for recent changes
# Important in development
!git -C {repo_path} pull

# On your second reading, load the compiled rendering backend to save time!
import os
os.environ["XLA_PYTHON_CLIENT_PREALLOCATE"] = "false"

# Check if MuJoCo installation was successful
import distutils.util
import os
import subprocess
# if subprocess.run('nvidia-smi').returncode:
#     raise RuntimeError(
#         'Cannot communicate with GPU. '
#         'Make sure you are using a GPU Colab runtime. '
#         'Go to the Runtime menu and select Choose runtime type.'
#     )

# Add an ICD config so that glvnd can pick up the Nvidia EGL driver.
# This is usually installed as part of an Nvidia driver package, but the Colab
# kernel doesn't install its driver via APT, and as a result the ICD is missing.
# (https://github.com/NVIDIA/libglvnd/blob/master/src/EGL/icd_enumeration.md)
NVIDIA_ICD_CONFIG_PATH = '/usr/share/glvnd/egl_vendor.d/10_nvidia.json'
# if not os.path.exists(NVIDIA_ICD_CONFIG_PATH):
#     with open(NVIDIA_ICD_CONFIG_PATH, 'w') as f:
#         f.write("""{
#         "file_format_version" : "1.0.0",
#         "ICD" : {
#             "library_path" : "libEGL_nvidia.so.0"
#         }
#     }
#     """)

# Configure MuJoCo to use the EGL rendering backend (requires GPU)
print('Setting environment variable to use GPU rendering:')
%env MUJOCO_GL=egl

try:
    print('Checking that the installation succeeded:')
    import mujoco

    mujoco.MjModel.from_xml_string('<mujoco/>')
except Exception as e:
    raise e from RuntimeError(
        'Something went wrong during installation. Check the shell output above '
        'for more information.\n'
        'If using a hosted Colab runtime, make sure you enable GPU acceleration '
        'by going to the Runtime menu and selecting "Choose runtime type".'
    )

print('Installation successful.')

# Tell XLA to use Triton GEMM, this improves steps/sec by ~30% on some GPUs
xla_flags = os.environ.get('XLA_FLAGS', '')
xla_flags += ' --xla_gpu_triton_gemm_any=True'
os.environ['XLA_FLAGS'] = xla_flags

Already up to date.
Setting environment variable to use GPU rendering:
env: MUJOCO_GL=egl
Checking that the installation succeeded:
Installation successful.


In [5]:
#@title Prepare experiments directory
from etils.epath import Path

experiments_dir = Path("experiments")
trained_policy_dir = experiments_dir / "trained_policies"
!mkdir -p {trained_policy_dir}
configs_dir = experiments_dir / "configs"
!mkdir -p {configs_dir}

experiment_configs = {}

In [6]:
#@title Configure a training procedure
from quadruped_mjx_rl.configs import predefined_robot_configs
from quadruped_mjx_rl import environments
from quadruped_mjx_rl.configs.config_classes import models
from quadruped_mjx_rl.configs import TrainingConfig
from quadruped_mjx_rl.configs import save_configs

#@markdown Fill out a name for the experiment and all configuration parameters.
#@markdown If you want to add another experiment, change the parameters and run
#@markdown this cell again.
experiment_name = "my_experiment" #@param {type:"string"}
config_dict = {}
experiment_configs[experiment_name] = config_dict

#@markdown ---
#@markdown #### Choose the policy approximation method
model_architecture = "Actor-Critic" #@param ["Actor-Critic", "Teacher-Student"]
training_algorithm = "ppo" #@param ["ppo"]
vision_backend = "None" #@param ["None","MJX","Madrona"]

#@markdown ---
#@markdown #### Choose the robot
robot = "unitree_go2" #@param ["unitree_go2", "google_barkour_vb"]
robot_config = predefined_robot_configs[robot]()
config_dict["robot_config"] = robot_config
init_scene_path = f"{repo_path}/resources/{robot}/scene_mjx.xml"
# TODO: add scenes for vision

#@markdown ---
#@markdown #### Configure Environment
if model_architecture == "Actor-Critic" and training_algorithm == "ppo":
    env_class = environments.QuadrupedJoystickEnhancedEnv
    env_config_class = environments.EnhancedEnvironmentConfig
else:
    raise NotImplementedError

simulation_timestep = 0.002 #@param {type:"number"}
control_timestep = 0.002 #@param {type:"number"}

env_config = env_config_class(
    sim=env_config_class.SimConfig(
        sim_dt=simulation_timestep,
        ctrl_dt=control_timestep,
    ),
)
config_dict["env_config"] = env_config

env = env_class(
    environment_config=env_config,
    robot_config=robot_config,
    init_scene_path=init_scene_path,
)

#@markdown ---
#@markdown #### Configure Model
if model_architecture == "Actor-Critic":
    #@markdown ---
    #@markdown Model hyperparameters for the "Actor-Critic" architecture:
    policy_layers = [256, 256] #@param
    value_layers = [256, 256] #@param

    model_config_class = models.ActorCriticConfig
    model_config = model_config_class(
        modules=model_config_class.ActorCriticModulesConfig(
            policy=policy_layers,
            value=value_layers,
        ),
    )
elif model_architecture == "Teacher-Student":
    #@markdown ---
    #@markdown Model hyperparameters for the "Teacher-Student" architecture:
    policy_layers = [256, 256] #@param
    value_layers = [256, 256] #@param
    teacher_encoder_layers = [256, 256] #@param
    student_encoder_layers = [256, 256] #@param
    latent_representation_size = 16 # @param {"type":"integer"}

    model_config_class = models.TeacherStudentConfig
    model_config = model_config_class(
        modules=model_config_class.TeacherStudentModulesConfig(
            policy=policy_layers,
            value=value_layers,
            encoder=teacher_encoder_layers,
            adapter=student_encoder_layers,
        ),
        latent_size=latent_representation_size,
    )
config_dict["model_config"] = model_config

#@markdown ---
#@markdown #### #@title Configure training procedure
if vision_backend == "None":
    #@markdown ---
    #@markdown #### Training without vision:
    training_config = TrainingConfig(
        num_timesteps=1_000_000 #@param {"type":"integer"}
        ,num_evals=5 #@param {"type":"integer"}
        ,reward_scaling=1 #@param {"type":"integer"}
        ,episode_length=1000 #@param {"type":"integer"}
        ,normalize_observations=True #@param {"type":"boolean"}
        ,action_repeat=1 #@param {"type":"integer"}
        ,unroll_length=10 #@param {"type":"integer"}
        ,num_minibatches=8 #@param {"type":"integer"}
        ,num_updates_per_batch=8 #@param {"type":"integer"}
        ,discounting=0.97 #@param {"type":"number"}
        ,learning_rate=0.0005  #@param {"type":"number"}
        ,entropy_cost=0.005  #@param {"type":"number"}
        ,num_envs=512 #@param {"type":"integer"}
        ,batch_size=256 #@param {"type":"integer"}
    )
elif vision_backend == "Madrona":
    training_config_class = TrainingWithVisionConfig
else:
    raise NotImplementedError
config_dict["training_config"] = training_config

config_file_path = configs_dir / f"{experiment_name}.yaml"
save_configs(config_file_path, *config_dict.values())
print(f"Experiment configs saved to {config_file_path}")

trained_policy_save_path = trained_policy_dir / f"{experiment_name}"
print(f"Trained policy saved to {trained_policy_save_path}")

Error saving config: EnhancedEnvironmentConfig(observation_noise=EnvironmentConfig.ObservationNoiseConfig(general_noise=0.05), control=EnvironmentConfig.ControlConfig(action_scale=0.3), command=EnvironmentConfig.CommandConfig(resampling_time=500, ranges=EnvironmentConfig.CommandConfig.RangesConfig(lin_vel_x=(-0.6, 1.5), lin_vel_y=(-0.8, 0.8), ang_vel_yaw=(-0.7, 0.7))), domain_rand=EnhancedEnvironmentConfig.DomainRandConfig(kick_vel=0.05, kick_interval=10), sim=EnhancedEnvironmentConfig.SimConfig(ctrl_dt=0.002, sim_dt=0.002, override=EnhancedEnvironmentConfig.SimConfig.OverrideConfig(Kp=35.0, Kd=0.5)), rewards=EnhancedEnvironmentConfig.RewardConfig(tracking_sigma=0.25, termination_body_height=0.18, scales=EnhancedEnvironmentConfig.RewardConfig.ScalesConfig(tracking_lin_vel=1.5, tracking_ang_vel=0.8, lin_vel_z=-2.0, ang_vel_xy=-0.05, orientation=-5.0, torques=-0.0002, action_rate=-0.01, feet_air_time=0.2, stand_still=-0.5, termination=-1.0, foot_slip=-0.1)), environment_class='Enhanced',

KeyError: "Could not find config key for <class 'quadruped_mjx_rl.environments.ppo_enhanced.EnhancedEnvironmentConfig'>"

In [None]:
from quadruped_mjx_rl.training import train
from quadruped_mjx_rl.models.agents.ppo.raw_ppo import train as raw_train

train(
    env=env,
    model_config=model_config,
    training_config=training_config,
    train_fn=raw_train,
    model_save_path=trained_policy_save_path,
    checkpoints_save_path=None, #TODO: enable
    vision=False, #TODO: enable
)

## Results