In [1]:
from rlbench.environment import Environment
from rlbench.action_modes import ArmActionMode, ActionMode
from rlbench.observation_config import ObservationConfig
from rlbench.tasks import ReachTarget
import numpy as np

state_types = [ 'gripper_open',
                'joint_velocities',
                'joint_positions',
                'joint_forces',
                'gripper_pose',
                'gripper_joint_positions',
                'gripper_touch_forces',
                'task_low_dim_state']

In [2]:
# Customized interface
from RLBenchEnv import SimulationEnvironment

In [3]:
env = SimulationEnvironment(task_name=ReachTarget, state_type_list=state_types, headless=True)

In [4]:
obs = env.reset()

In [5]:
obs

array([ 1.00000000e+00, -3.81469727e-05,  1.00135803e-04, -1.66893005e-04,
       -5.10215759e-04, -1.28746033e-04,  8.48770142e-04, -9.53674316e-06,
       -7.14897942e-06,  1.76037103e-01,  6.23193409e-06, -8.73283327e-01,
        1.03763214e-05,  1.22119141e+00,  7.85384059e-01,  1.40346866e-02,
        2.37096653e+01, -2.50278115e-02, -1.61151009e+01, -1.63688511e-01,
       -4.32816982e+00,  1.95555724e-02,  2.78910279e-01, -8.15735944e-03,
        1.47112620e+00, -8.00509497e-06,  9.92764175e-01, -1.23509535e-05,
        1.20081015e-01,  4.00007367e-02,  3.99990082e-02,  1.78048911e-03,
       -4.35605645e-04, -4.87267982e-07,  1.78270938e-03,  4.35239082e-04,
       -4.32133689e-08,  3.88525039e-01,  1.48332551e-01,  9.24807906e-01])

In [2]:
# The Original RLBench interface

In [2]:
class Agent(object):

    def __init__(self, action_size):
        self.action_size = action_size

    def act(self, obs):
        arm = np.random.normal(0.0, 0.1, size=(self.action_size - 1,))
        gripper = [1.0]  # Always open
        return np.concatenate([arm, gripper], axis=-1)

In [3]:
obs_config = ObservationConfig()
obs_config.set_all(True)

In [4]:
action_mode = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY)
env = Environment(
    action_mode, obs_config=obs_config, headless=True)
env.launch()

task = env.get_task(ReachTarget)

agent = Agent(env.action_size)

In [5]:
act_dim = env.action_size
act_dim

8

In [6]:
obs_dim = 7

In [7]:
act_dim = env.action_space.shape[0]-1

max_action = float(env.action_space.high[0])

AttributeError: 'Environment' object has no attribute 'action_space'

In [10]:
descriptions, obs = task.reset()

In [11]:
state = getattr(obs,'gripper_open')
state

1.0

In [12]:
state = getattr(obs,'gripper_pose')
state

array([ 2.78803885e-01, -8.15328024e-03,  1.47128594e+00,  8.08814002e-06,
        9.92747426e-01, -1.67505041e-05,  1.20219149e-01])

In [13]:
state.shape

(7,)

In [13]:
np.array([getattr(obs,'task_low_dim_state')])

array([[0.05291733, 0.08702243, 0.9220044 ]])

In [None]:
np.array([getattr(obs,'joint_positions')])

In [None]:
action = agent.act(obs)

In [None]:
obs, reward, terminate = task.step(action)