In [1]:
import mujoco

In [2]:
import torchrl

In [16]:
dm_control_env = torchrl.envs.DMControlEnv('humanoid', 'stand', device='cpu')

In [17]:
%time dm_control_env.rollout(100)

CPU times: user 143 ms, sys: 8.38 ms, total: 151 ms
Wall time: 148 ms


TensorDict(
    fields={
        action: Tensor(shape=torch.Size([100, 21]), device=cpu, dtype=torch.float64, is_shared=False),
        com_velocity: Tensor(shape=torch.Size([100, 3]), device=cpu, dtype=torch.float64, is_shared=False),
        done: Tensor(shape=torch.Size([100, 1]), device=cpu, dtype=torch.bool, is_shared=False),
        extremities: Tensor(shape=torch.Size([100, 12]), device=cpu, dtype=torch.float64, is_shared=False),
        head_height: Tensor(shape=torch.Size([100, 1]), device=cpu, dtype=torch.float64, is_shared=False),
        joint_angles: Tensor(shape=torch.Size([100, 21]), device=cpu, dtype=torch.float64, is_shared=False),
        next: TensorDict(
            fields={
                com_velocity: Tensor(shape=torch.Size([100, 3]), device=cpu, dtype=torch.float64, is_shared=False),
                done: Tensor(shape=torch.Size([100, 1]), device=cpu, dtype=torch.bool, is_shared=False),
                extremities: Tensor(shape=torch.Size([100, 12]), device=cpu

In [18]:
100/0.148

675.6756756756757

In [25]:
dm_control_env.action_spec

BoundedTensorSpec(
    shape=torch.Size([21]),
    space=ContinuousBox(
        low=Tensor(shape=torch.Size([21]), device=cpu, dtype=torch.float64, contiguous=True),
        high=Tensor(shape=torch.Size([21]), device=cpu, dtype=torch.float64, contiguous=True)),
    device=cpu,
    dtype=torch.float64,
    domain=continuous)

In [9]:
dm_control_env.observation_spec

CompositeSpec(
    joint_angles: UnboundedContinuousTensorSpec(
        shape=torch.Size([21]),
        space=None,
        device=cuda,
        dtype=torch.float64,
        domain=continuous),
    head_height: UnboundedContinuousTensorSpec(
        shape=torch.Size([1]),
        space=ContinuousBox(
            low=Tensor(shape=torch.Size([1]), device=cuda:0, dtype=torch.float32, contiguous=True),
            high=Tensor(shape=torch.Size([1]), device=cuda:0, dtype=torch.float32, contiguous=True)),
        device=cuda,
        dtype=torch.float64,
        domain=continuous),
    extremities: UnboundedContinuousTensorSpec(
        shape=torch.Size([12]),
        space=None,
        device=cuda,
        dtype=torch.float64,
        domain=continuous),
    torso_vertical: UnboundedContinuousTensorSpec(
        shape=torch.Size([3]),
        space=None,
        device=cuda,
        dtype=torch.float64,
        domain=continuous),
    com_velocity: UnboundedContinuousTensorSpec(
        sha

In [3]:
import mujoco._simulation_pool

In [4]:
model = mujoco.MjModel.from_xml_path("../test_mjx/quick-mjx-performance-test/humanoid_mjx.xml")

56

In [6]:
import torch
import torchrl.data
import numpy as np
import tensordict

In [33]:
torch.Size((15,))

torch.Size([15])

In [7]:
import os

In [8]:
os.cpu_count()

20

In [10]:
class CustomMujocoEnv(torchrl.envs.EnvBase):
    def __init__(self, mj_model, seed=None, batch_size=[], device="cpu",
                 worker_thread_count=os.cpu_count()):
        super().__init__(device=device, batch_size=batch_size)
        self._mj_model = mj_model
        self._make_spec()
        if seed is None:
            seed = torch.empty((), dtype=torch.int64).random_().item()
        self.set_seed(seed)
        flat_batch_size = self.batch_size.numel()
        self.simulation_pool = mujoco._simulation_pool.SimulationPool(mj_model, flat_batch_size, worker_thread_count)

    def _make_spec(self):
        state_size = mujoco.mj_stateSize(self._mj_model, mujoco.mjtState.mjSTATE_FULLPHYSICS)
        action_size = mujoco.mj_stateSize(self._mj_model, mujoco.mjtState.mjSTATE_CTRL)
        self.observation_spec = torchrl.data.CompositeSpec(
            fullphysics = torchrl.data.UnboundedContinuousTensorSpec(
                shape=self.batch_size + (state_size,),
                dtype=torch.float32
            ),
            shape=self.batch_size
        )
        #Not sure about this one...
        self.state_spec = self.observation_spec.clone()

        self.action_spec = torchrl.data.BoundedTensorSpec(
            low=-torch.ones(self.batch_size + (action_size,), dtype=torch.float32, device=self.device),
            high=torch.ones(self.batch_size + (action_size,), dtype=torch.float32, device=self.device),
            device=self.device,
            dtype=torch.float32)

        self.reward_spec = torchrl.data.UnboundedContinuousTensorSpec(shape=self.batch_size+(1,))

    def _reset(self, td):
        flat_batch_size = self.batch_size.numel()
        self.simulation_pool.setReset(np.ones(flat_batch_size, dtype=np.bool_))
        self.simulation_pool.step()
        self.simulation_pool.setReset(np.zeros(flat_batch_size, dtype=np.bool_))
        out = tensordict.TensorDict({
            "fullphysics": self._getPhysicsState(),
        }, batch_size=self.batch_size)
        return out

    def _step(self, action):
        state_size = mujoco.mj_stateSize(self._mj_model, mujoco.mjtState.mjSTATE_FULLPHYSICS)
        self.simulation_pool.step()
        out = tensordict.TensorDict({
            "fullphysics": self._getPhysicsState(),
            "reward": self._getReward(),
            "done": self._getDone()
        }, batch_size=self.batch_size)
        return out

    def _set_seed(self, seed):
        rng = torch.manual_seed(seed)
        self.rng = rng
        
    def _getPhysicsState(self):
        state_size = mujoco.mj_stateSize(self._mj_model, mujoco.mjtState.mjSTATE_FULLPHYSICS)
        fullphysics = torch.from_numpy(np.array(self.simulation_pool.getState()))
        return fullphysics.to(device=self.device, dtype=torch.float32).reshape(self.batch_size + (state_size,))

    def _getReward(self):
        #TODO: proper reward function
        return torch.zeros(self.batch_size, dtype=torch.float32, device=self.device)

    def _getDone(self):
        #TODO: proper termination function
        return torch.zeros(self.batch_size, dtype=torch.bool, device=self.device)

In [297]:
env.simulation_pool.getState?

[0;31mDocstring:[0m
getState(self: mujoco._simulation_pool.SimulationPool) -> memoryview

Get a memory view of the state (nroll x nstate).
[0;31mType:[0m      method

In [11]:
env = CustomMujocoEnv(model, batch_size=torch.Size((4000,)), device='cuda')

In [12]:
%time env.step(env.rand_action())

CPU times: user 259 ms, sys: 20.3 ms, total: 280 ms
Wall time: 97.9 ms


TensorDict(
    fields={
        action: Tensor(shape=torch.Size([4000, 21]), device=cuda:0, dtype=torch.float32, is_shared=True),
        next: TensorDict(
            fields={
                done: Tensor(shape=torch.Size([4000, 1]), device=cuda:0, dtype=torch.bool, is_shared=True),
                fullphysics: Tensor(shape=torch.Size([4000, 56]), device=cuda:0, dtype=torch.float32, is_shared=True),
                reward: Tensor(shape=torch.Size([4000, 1]), device=cuda:0, dtype=torch.float32, is_shared=True),
                terminated: Tensor(shape=torch.Size([4000, 1]), device=cuda:0, dtype=torch.bool, is_shared=True)},
            batch_size=torch.Size([4000]),
            device=cuda,
            is_shared=True)},
    batch_size=torch.Size([4000]),
    device=cuda,
    is_shared=True)

In [282]:
%time env.rollout(100);

CPU times: user 27.7 s, sys: 73.8 ms, total: 27.8 s
Wall time: 1.52 s


In [283]:
4000*100/1.53

261437.908496732

In [179]:
import tqdm

In [284]:
def dumb_rollout(env):
    for t in range(100):
        env.simulation_pool.step()

In [285]:
%time dumb_rollout(env)

CPU times: user 20.7 s, sys: 58.2 ms, total: 20.7 s
Wall time: 1.1 s


In [178]:
2048*100/0.880

232727.27272727274

In [149]:
2048*100/0.899

227808.6763070078

In [238]:
torchrl.envs.utils.check_env_specs(env)

2024-03-22 15:30:06,228 [torchrl][INFO] check_env_specs succeeded!


In [15]:
env.simulation_pool.

<mujoco._simulation_pool.SimulationPool at 0x7e49b3cc9b30>

In [17]:
os

AttributeError: module 'os' has no attribute 'glob'