forked from google-deepmind/mujoco
/
renderer.py
288 lines (243 loc) · 10 KB
/
renderer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
# Copyright 2022 DeepMind Technologies Limited
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Defines a renderer class for the MuJoCo Python native bindings."""
from typing import Optional, Union
from mujoco import _enums
from mujoco import _functions
from mujoco import _render
from mujoco import _structs
from mujoco import gl_context
import numpy as np
class Renderer:
"""Renders MuJoCo scenes."""
def __init__(
self,
model: _structs.MjModel,
height: int = 240,
width: int = 320,
max_geom: int = 10000
) -> None:
"""Initializes a new `Renderer`.
Args:
model: an MjModel instance.
height: image height in pixels.
width: image width in pixels.
max_geom: Optional integer specifying the maximum number of geoms that can
be rendered in the same scene. If None this will be chosen automatically
based on the estimated maximum number of renderable geoms in the model.
Raises:
ValueError: If `camera_id` is outside the valid range, or if `width` or
`height` exceed the dimensions of MuJoCo's offscreen framebuffer.
"""
buffer_width = model.vis.global_.offwidth
buffer_height = model.vis.global_.offheight
if width > buffer_width:
raise ValueError(f"""
Image width {width} > framebuffer width {buffer_width}. Either reduce the image
width or specify a larger offscreen framebuffer in the model XML using the
clause:
<visual>
<global offwidth="my_width"/>
</visual>""".lstrip())
if height > buffer_height:
raise ValueError(f"""
Image height {height} > framebuffer height {buffer_height}. Either reduce the
image height or specify a larger offscreen framebuffer in the model XML using
the clause:
<visual>
<global offheight="my_height"/>
</visual>""".lstrip())
self._width = width
self._height = height
self._model = model
self._scene = _structs.MjvScene(model=model, maxgeom=max_geom)
self._scene_option = _structs.MjvOption()
self._rect = _render.MjrRect(0, 0, self._width, self._height)
# Create render contexts.
# TODO(nimrod): Figure out why pytype doesn't like gl_context.GLContext
self._gl_context = gl_context.GLContext(width, height) # type: ignore
self._gl_context.make_current()
self._mjr_context = _render.MjrContext(
model, _enums.mjtFontScale.mjFONTSCALE_150.value
)
_render.mjr_setBuffer(
_enums.mjtFramebuffer.mjFB_OFFSCREEN.value, self._mjr_context
)
self._mjr_context.readDepthMap = _enums.mjtDepthMap.mjDEPTH_ZEROFAR
# Default render flags.
self._depth_rendering = False
self._segmentation_rendering = False
@property
def model(self):
return self._model
@property
def scene(self) -> _structs.MjvScene:
return self._scene
@property
def height(self):
return self._height
@property
def width(self):
return self._width
def enable_depth_rendering(self):
self._segmentation_rendering = False
self._depth_rendering = True
def disable_depth_rendering(self):
self._depth_rendering = False
def enable_segmentation_rendering(self):
self._segmentation_rendering = True
self._depth_rendering = False
def disable_segmentation_rendering(self):
self._segmentation_rendering = False
def render(self, *, out: Optional[np.ndarray] = None) -> np.ndarray:
"""Renders the scene as a numpy array of pixel values.
Args:
out: Alternative output array in which to place the resulting pixels. It
must have the same shape as the expected output but the type will be
cast if necessary. The expted shape depends on the value of
`self._depth_rendering`: when `True`, we expect `out.shape == (width,
height)`, and `out.shape == (width, height, 3)` when `False`.
Returns:
A new numpy array holding the pixels with shape `(H, W)` or `(H, W, 3)`,
depending on the value of `self._depth_rendering` unless
`out is None`, in which case a reference to `out` is returned.
"""
original_flags = self._scene.flags.copy()
# Using segmented rendering for depth makes the calculated depth more accurate at far distances
if self._depth_rendering or self._segmentation_rendering:
self._scene.flags[_enums.mjtRndFlag.mjRND_SEGMENT] = True
self._scene.flags[_enums.mjtRndFlag.mjRND_IDCOLOR] = True
self._gl_context.make_current()
if self._depth_rendering:
out_shape = (self._height, self._width)
out_dtype = np.float32
else:
out_shape = (self._height, self._width, 3)
out_dtype = np.uint8
if out is None:
out = np.empty(out_shape, dtype=out_dtype)
else:
if out.shape != out_shape:
raise ValueError(
f'Expected `out.shape == {out_shape}`. Got `out.shape={out.shape}`'
' instead. When using depth rendering, the out array should be of'
' shape `(width, height)` and otherwise (width, height, 3).'
f' Got `(self.height, self.width)={(self.height, self.width)}` and'
f' `self._depth_rendering={self._depth_rendering}`.'
)
# Render scene and read contents of RGB and depth buffers.
_render.mjr_render(self._rect, self._scene, self._mjr_context)
if self._depth_rendering:
_render.mjr_readPixels(None, out, self._rect, self._mjr_context)
# Get the distances to the near and far clipping planes.
extent = self._model.stat.extent
near = self._model.vis.map.znear * extent
far = self._model.vis.map.zfar * extent
# Calculate OpenGL perspective matrix values in float32 precision
# so they are close to what glFrustum returns
# https://registry.khronos.org/OpenGL-Refpages/gl2.1/xhtml/glFrustum.xml
zfar = np.float32(far)
znear = np.float32(near)
C = -(zfar + znear)/(zfar - znear)
D = -(np.float32(2)*zfar*znear)/(zfar - znear)
# In reverse Z mode the perspective matrix is transformed by the following
C = np.float32(-0.5)*C - np.float32(0.5)
D = np.float32(-0.5)*D
# We need 64 bits to convert Z from ndc to metric depth without noticeable losses in precision
out_64 = out.astype(np.float64)
# Undo OpenGL projection
# Note: We do not need to take action to convert from window coordinates to
# normalized device coordinates because in reversed Z mode the mapping
# is identity
out_64 = D / (out_64 + C)
# Cast result back to float32 for backwards compatibility
# This has a small accuracy cost
out[:] = out_64.astype(np.float32)
# Reset scene flags.
np.copyto(self._scene.flags, original_flags)
elif self._segmentation_rendering:
_render.mjr_readPixels(out, None, self._rect, self._mjr_context)
# Convert 3-channel uint8 to 1-channel uint32.
image3 = out.astype(np.uint32)
segimage = (
image3[:, :, 0]
+ image3[:, :, 1] * (2**8)
+ image3[:, :, 2] * (2**16)
)
# Remap segid to 2-channel (object ID, object type) pair.
# Seg ID 0 is background -- will be remapped to (-1, -1).
ngeoms = self._scene.ngeom
segid2output = np.full(
(ngeoms + 1, 2), fill_value=-1, dtype=np.int32
) # Seg id cannot be > ngeom + 1.
visible_geoms = [g for g in self._scene.geoms[:ngeoms] if g.segid != -1]
visible_segids = np.array([g.segid + 1 for g in visible_geoms], np.int32)
visible_objid = np.array([g.objid for g in visible_geoms], np.int32)
visible_objtype = np.array([g.objtype for g in visible_geoms], np.int32)
segid2output[visible_segids, 0] = visible_objid
segid2output[visible_segids, 1] = visible_objtype
out = segid2output[segimage]
# Reset scene flags.
np.copyto(self._scene.flags, original_flags)
else:
_render.mjr_readPixels(out, None, self._rect, self._mjr_context)
out[:] = np.flipud(out)
return out
def update_scene(
self,
data: _structs.MjData,
camera: Union[int, str, _structs.MjvCamera] = -1,
scene_option: Optional[_structs.MjvOption] = None
):
"""Updates geometry used for rendering.
Args:
data: An instance of `MjData`.
camera: An instance of `MjvCamera`, a string or an integer
scene_option: A custom `MjvOption` instance to use to render
the scene instead of the default.
Raises:
ValueError: If `camera_id` is outside the valid range, or if camera does
not exist.
"""
if not isinstance(camera, _structs.MjvCamera):
camera_id = camera
if isinstance(camera_id, str):
camera_id = _functions.mj_name2id(
self._model, _enums.mjtObj.mjOBJ_CAMERA.value, camera_id
)
if camera_id == -1:
raise ValueError(f'The camera "{camera}" does not exist.')
if camera_id < -1 or camera_id >= self._model.ncam:
raise ValueError(f'The camera id {camera_id} is out of'
f' range [-1, {self._model.ncam}).')
# Render camera.
camera = _structs.MjvCamera()
camera.fixedcamid = camera_id
# Defaults to mjCAMERA_FREE, otherwise mjCAMERA_FIXED refers to a
# camera explicitly defined in the model.
if camera_id == -1:
camera.type = _enums.mjtCamera.mjCAMERA_FREE
_functions.mjv_defaultFreeCamera(self._model, camera)
else:
camera.type = _enums.mjtCamera.mjCAMERA_FIXED
scene_option = scene_option or self._scene_option
_functions.mjv_updateScene(
self._model,
data,
scene_option,
None,
camera, _enums.mjtCatBit.mjCAT_ALL.value,
self._scene,
)