In [1]:
import gymnasium as gym
env = gym.make('Humanoid-v4')


# required before you can step the environment
env.reset()

# sample action:
print("sample action:", env.action_space.sample())

# observation space shape:
print("observation space shape:", env.observation_space.shape)

# sample observation:
print("sample observation:", env.observation_space.sample())

env.close()

sample action: [-0.11018831  0.09457526 -0.28614342  0.2794108   0.36133528 -0.19552514
  0.30025086  0.20448248  0.31246755 -0.1565927   0.23305222  0.00726664
 -0.1862242   0.24485491  0.10894941  0.07770261  0.1208683 ]
observation space shape: (376,)
sample observation: [ 1.21733944  0.60394594 -0.07357566  1.10293267  1.18723622 -1.70769241
  1.2004401  -0.19067725 -1.04928795  1.78402248  0.67644498  0.42606554
  0.81762542 -1.08009781  0.20219481  1.60562563 -0.10394895 -1.51046782
  0.58062628 -0.37438945 -2.12491592  1.02258037  1.23373753  0.98835966
 -0.60922387 -0.92949304 -0.43229081 -1.30217823 -2.34723294  0.27594536
  0.58376492 -1.47727247  0.80260465  0.84557671 -0.25601444  0.06962598
 -0.73833631 -0.37128311 -1.44544674 -1.02412388 -1.26879718 -0.51507769
 -1.60145627  0.36950438 -0.02680477 -0.46450176  0.40989873 -0.93453295
  0.77591484  0.25996619  0.78915781  1.33222138 -0.72468206 -0.95304336
  1.02023716  1.54929846  1.76269681 -0.10941759  0.0128211   0.3896

## Environment

In [2]:
import numpy as np

from gymnasium import utils
from gymnasium.envs.mujoco import MuJocoPyEnv
from gymnasium.spaces import Box


def mass_center(model, sim):
    mass = np.expand_dims(model.body_mass, 1)
    xpos = sim.data.xipos
    return (np.sum(mass * xpos, 0) / np.sum(mass))[0]


class HumanoidEnv(MuJocoPyEnv, utils.EzPickle):
    metadata = {
        "render_modes": [
            "human",
            "rgb_array",
            "depth_array",
        ],
        "render_fps": 67,
    }

    def __init__(self, **kwargs):
        observation_space = Box(
            low=-np.inf, high=np.inf, shape=(376,), dtype=np.float64
        )
        MuJocoPyEnv.__init__(
            self, "humanoid.xml", 5, observation_space=observation_space, **kwargs
        )
        utils.EzPickle.__init__(self, **kwargs)

    def _get_obs(self):
        data = self.sim.data
        return np.concatenate(
            [
                data.qpos.flat[2:],
                data.qvel.flat,
                data.cinert.flat,
                data.cvel.flat,
                data.qfrc_actuator.flat,
                data.cfrc_ext.flat,
            ]
        )

    def step(self, a):
        pos_before = mass_center(self.model, self.sim)
        self.do_simulation(a, self.frame_skip)
        pos_after = mass_center(self.model, self.sim)

        alive_bonus = 5.0
        data = self.sim.data
        lin_vel_cost = 1.25 * (pos_after - pos_before) / self.dt
        quad_ctrl_cost = 0.1 * np.square(data.ctrl).sum()
        quad_impact_cost = 0.5e-6 * np.square(data.cfrc_ext).sum()
        quad_impact_cost = min(quad_impact_cost, 10)
        reward = lin_vel_cost - quad_ctrl_cost - quad_impact_cost + alive_bonus
        qpos = self.sim.data.qpos
        terminated = bool((qpos[2] < 1.0) or (qpos[2] > 2.0))

        if self.render_mode == "human":
            self.render()
        return (
            self._get_obs(),
            reward,
            terminated,
            False,
            dict(
                reward_linvel=lin_vel_cost,
                reward_quadctrl=-quad_ctrl_cost,
                reward_alive=alive_bonus,
                reward_impact=-quad_impact_cost,
            ),
        )

    def reset_model(self):
        c = 0.01
        self.set_state(
            self.init_qpos + self.np_random.uniform(low=-c, high=c, size=self.model.nq),
            self.init_qvel
            + self.np_random.uniform(
                low=-c,
                high=c,
                size=self.model.nv,
            ),
        )
        return self._get_obs()

    def viewer_setup(self):
        assert self.viewer is not None
        self.viewer.cam.trackbodyid = 1
        self.viewer.cam.distance = self.model.stat.extent * 1.0
        self.viewer.cam.lookat[2] = 2.0
        self.viewer.cam.elevation = -20

# Humanoid Standup

In [3]:
import numpy as np
import gymnasium as gym
from gymnasium import utils
from gymnasium.envs.mujoco import MuJocoPyEnv
from gymnasium.spaces import Box


class HumanoidStandupEnv(MuJocoPyEnv, utils.EzPickle):
    metadata = {
        "render_modes": [
            "human",
            "rgb_array",
            "depth_array",
        ],
        "render_fps": 67,
    }

    def __init__(self, **kwargs):
        observation_space = Box(
            low=-np.inf, high=np.inf, shape=(376,), dtype=np.float64
        )
        MuJocoPyEnv.__init__(
            self,
            "humanoidstandup.xml",
            5,
            observation_space=observation_space,
            **kwargs
        )
        utils.EzPickle.__init__(self, **kwargs)

    def _get_obs(self):
        data = self.sim.data
        return np.concatenate(
            [
                data.qpos.flat[2:],
                data.qvel.flat,
                data.cinert.flat,
                data.cvel.flat,
                data.qfrc_actuator.flat,
                data.cfrc_ext.flat,
            ]
        )

    def step(self, a):
        self.do_simulation(a, self.frame_skip)
        pos_after = self.sim.data.qpos[2]
        data = self.sim.data
        uph_cost = (pos_after - 0) / self.model.opt.timestep

        quad_ctrl_cost = 0.1 * np.square(data.ctrl).sum()
        quad_impact_cost = 0.5e-6 * np.square(data.cfrc_ext).sum()
        quad_impact_cost = min(quad_impact_cost, 10)
        reward = uph_cost - quad_ctrl_cost - quad_impact_cost + 1

        if self.render_mode == "human":
            self.render()
        return (
            self._get_obs(),
            reward,
            False,
            False,
            dict(
                reward_linup=uph_cost,
                reward_quadctrl=-quad_ctrl_cost,
                reward_impact=-quad_impact_cost,
            ),
        )

    def reset_model(self):
        c = 0.01
        self.set_state(
            self.init_qpos + self.np_random.uniform(low=-c, high=c, size=self.model.nq),
            self.init_qvel
            + self.np_random.uniform(
                low=-c,
                high=c,
                size=self.model.nv,
            ),
        )
        return self._get_obs()

    def viewer_setup(self):
        assert self.viewer is not None
        self.viewer.cam.trackbodyid = 1
        self.viewer.cam.distance = self.model.stat.extent * 1.0
        self.viewer.cam.lookat[2] = 0.8925
        self.viewer.cam.elevation = -20

In [4]:
standup = HumanoidStandupEnv()

DependencyNotInstalled: No module named 'mujoco_py'. (HINT: you need to install mujoco-py, and also perform the setup instructions here: https://github.com/openai/mujoco-py.)