In [1]:
pip show mujoco

Name: mujoco
Version: 3.3.7
Summary: MuJoCo Physics Simulator
Home-page: https://github.com/google-deepmind/mujoco
Author: 
Author-email: Google DeepMind <mujoco@deepmind.com>
License-Expression: Apache-2.0
Location: /usr/local/lib/python3.10/dist-packages
Requires: absl-py, etils, glfw, numpy, pyopengl
Required-by: metaworld, robosuite
Note: you may need to restart the kernel to use updated packages.


In [2]:
import mujoco

In [3]:

class CustomMujocoRenderer:
    """This is the MuJoCo renderer manager class for every MuJoCo environment.

    The class has two main public methods available:
    - :meth:`render` - Renders the environment in three possible modes: "human", "rgb_array", or "depth_array"
    - :meth:`close` - Closes all contexts initialized with the renderer

    """

    def __init__(
        self,
        model: "mujoco.MjModel",
        data: "mujoco.MjData",
        default_cam_config: dict | None = None,
        width: int | None = None,
        height: int | None = None,
        max_geom: int = 1000,
        camera_id: int | None = None,
        camera_name: str | None = None,
        visual_options: dict[int, bool] = {},
    ):
        """A wrapper for clipping continuous actions within the valid bound.

        Args:
            model: MjModel data structure of the MuJoCo simulation
            data: MjData data structure of the MuJoCo simulation
            default_cam_config: dictionary with attribute values of the viewer's default camera, https://mujoco.readthedocs.io/en/latest/XMLreference.html?highlight=camera#visual-global
            width: width of the OpenGL rendering context
            height: height of the OpenGL rendering context
            max_geom: maximum number of geometries to render
            camera_id: The integer camera id from which to render the frame in the MuJoCo simulation
            camera_name: The string name of the camera from which to render the frame in the MuJoCo simulation. This argument should not be passed if using cameara_id instead and vice versa
        """
        self.model = model
        self.data = data
        self._viewers = {}
        self.viewer = None
        self.default_cam_config = default_cam_config
        self.width = width
        self.height = height
        self.max_geom = max_geom
        self._vopt = visual_options

        # set self.camera_id using `camera_id` or `camera_name`
        if camera_id is not None and camera_name is not None:
            raise ValueError(
                "Both `camera_id` and `camera_name` cannot be"
                " specified at the same time."
            )

        no_camera_specified = camera_name is None and camera_id is None
        if no_camera_specified:
            camera_name = "track"

        if camera_id is None:
            self.camera_id = mujoco.mj_name2id(
                self.model,
                mujoco.mjtObj.mjOBJ_CAMERA,
                camera_name,
            )
        else:
            self.camera_id = camera_id

    def render(
        self,
        render_mode: str | None,
    ):
        """Renders a frame of the simulation in a specific format and camera view.

        Args:
            render_mode: The format to render the frame, it can be: "human", "rgb_array", "depth_array", or "rgbd_tuple"

        Returns:
            If render_mode is "rgb_array" or "depth_array" it returns a numpy array in the specified format. "rgbd_tuple" returns a tuple of numpy arrays of the form (rgb, depth). "human" render mode does not return anything.
        """
        if render_mode != "human":
            assert (
                self.width is not None and self.height is not None
            ), f"The width: {self.width} and height: {self.height} cannot be `None` when the render_mode is not `human`."

        viewer = self._get_viewer(render_mode=render_mode)

        if render_mode in ["rgb_array", "depth_array", "rgbd_tuple"]:
            return viewer.render(render_mode=render_mode, camera_id=self.camera_id)
        elif render_mode == "human":
            return viewer.render()
        elif render_mode == "segmentation":
            return viewer.render(render_mode="rgb_array", camera_id=self.camera_id, segmentation=True)

    def _get_viewer(self, render_mode: str | None):
        """Initializes and returns a viewer class depending on the render_mode
        - `WindowViewer` class for "human" render mode
        - `OffScreenViewer` class for "rgb_array", "depth_array", or "rgbd_tuple" render mode
        """
        self.viewer = self._viewers.get(render_mode)
        if self.viewer is None:
            if render_mode == "human":
                self.viewer = WindowViewer(
                    self.model,
                    self.data,
                    self.width,
                    self.height,
                    self.max_geom,
                    self._vopt,
                )
            elif render_mode in {"rgb_array", "depth_array", "rgbd_tuple", "segmentation"}:
                self.viewer = OffScreenViewer(
                    self.model,
                    self.data,
                    self.width,
                    self.height,
                    self.max_geom,
                    self._vopt,
                )
            else:
                raise AttributeError(
                    f"Unexpected mode: {render_mode}, expected modes: human, rgb_array, depth_array, or rgbd_tuple"
                )
            # Add default camera parameters
            self._set_cam_config()
            self._viewers[render_mode] = self.viewer

        if len(self._viewers.keys()) > 1:
            # Only one context can be current at a time
            self.viewer.make_context_current()

        return self.viewer

    def _set_cam_config(self):
        """Set the default camera parameters"""
        assert self.viewer is not None
        if self.default_cam_config is not None:
            for key, value in self.default_cam_config.items():
                if isinstance(value, np.ndarray):
                    getattr(self.viewer.cam, key)[:] = value
                else:
                    setattr(self.viewer.cam, key, value)

    def close(self):
        """Close the OpenGL rendering contexts of all viewer modes"""
        for _, viewer in self._viewers.items():
            viewer.close()

In [None]:
def render_metaworld_env(env,camera_name: str, width: int, height: int, mode:str):
    assert isinstance(env.unwrapped, SawyerXYZEnv)
    assert mode in ["rgb_array", "depth_array", "rgbd_tuple", "segmentation"]
    renderer = CustomMujocoRenderer(
        env.unwrapped.model,
        env.unwrapped.data, 
        None,
        width=width,
        height=height,
        camera_name=camera_name,
    )

    
    img = renderer.render(render_mode=mode)

    # I don't know why, but the corner2 images are flipped along the y axis for me... 
    # so for now, just flipping them.

    
    if camera_name == "corner2":
        if mode == "rgbd_tuple":
            img = (np.flip(img[0], axis=0), np.flip(img[1], axis=0))
        else:
            img = np.flip(img, axis=0)
    
    return img



if __name__ == "__main__":
    env = gym.make('Meta-World/MT1', env_name='hammer-v3', render_mode='rgb_array')
    env.reset()
    import matplotlib.pyplot as plt
    img = render_metaworld_env(env, "corner2", 480, 480, "segmentation")
    obj_type, obj_id = img[:,:,0], img[:,:,1]
    plt.imsave('debug.png', obj_id)
