In [3]:

import logging
logging.getLogger("robosuite").setLevel(logging.WARNING)
logging.disable(logging.INFO)

import robosuite as suite
import numpy as np
import imageio
import os

In [4]:
os.environ.get("MUJOCO_GL", "not set")

'egl'

In [2]:
# create environment instance
env = suite.make(
    env_name="Lift", # try with other tasks like "Stack" and "Door"
    robots="Panda",  # try with other robots like "Sawyer" and "Jaco"
    has_renderer=False,
    has_offscreen_renderer=True,
    use_camera_obs=True,
    camera_names="agentview",
    camera_heights=256,
    camera_widths=256,
)
frames = []

# reset the environment
env.reset()

for i in range(1000):
    action = np.random.randn(*env.action_spec[0].shape) * 0.1
    obs, reward, done, info = env.step(action)  # take action in the environment
    frames.append(obs["agentview_image"])

imageio.mimsave("rollout.mp4", frames, fps=20)

In [3]:
env._check_success()

np.False_

In [4]:
for key, val in obs.items():
    print(f"{key}: {val.shape}")

# print(env.robots[0].controller)
print(obs["object-state"])

robot0_joint_pos: (7,)
robot0_joint_pos_cos: (7,)
robot0_joint_pos_sin: (7,)
robot0_joint_vel: (7,)
robot0_eef_pos: (3,)
robot0_eef_quat: (4,)
robot0_eef_quat_site: (4,)
robot0_gripper_qpos: (2,)
robot0_gripper_qvel: (2,)
agentview_image: (256, 256, 3)
cube_pos: (3,)
cube_quat: (4,)
gripper_to_cube_pos: (3,)
robot0_proprio-state: (43,)
object-state: (10,)
[ 7.80728054e-04 -2.54182700e-02  8.20942662e-01 -1.22487623e-17
  5.47286568e-18  9.91054743e-01  1.33455973e-01  1.36488349e-01
  1.54813090e-02 -2.00228054e-01]


In [5]:
%load_ext autoreload
%autoreload 2

from envs import RobosuiteGymWrapper

env = RobosuiteGymWrapper()
obs, info = env.reset()

print(f"obs: {obs.shape}")
print(f"info: {info}")

for i in range(20):
    action = env.action_space.sample()
    obs, reward, terminated, truncated, info = env.step(action)
    print(f"Reward: {reward:.4f}, Terminated: {terminated}, Truncated: {truncated}")

obs: (11,)
info: {}
Reward: 0.0146, Terminated: False, Truncated: False
Reward: 0.0154, Terminated: False, Truncated: False
Reward: 0.0152, Terminated: False, Truncated: False
Reward: 0.0148, Terminated: False, Truncated: False
Reward: 0.0141, Terminated: False, Truncated: False
Reward: 0.0139, Terminated: False, Truncated: False
Reward: 0.0139, Terminated: False, Truncated: False
Reward: 0.0141, Terminated: False, Truncated: False
Reward: 0.0136, Terminated: False, Truncated: False
Reward: 0.0126, Terminated: False, Truncated: False
Reward: 0.0116, Terminated: False, Truncated: False
Reward: 0.0099, Terminated: False, Truncated: False
Reward: 0.0104, Terminated: False, Truncated: False
Reward: 0.0133, Terminated: False, Truncated: False
Reward: 0.0157, Terminated: False, Truncated: False
Reward: 0.0159, Terminated: False, Truncated: False
Reward: 0.0180, Terminated: False, Truncated: False
Reward: 0.0184, Terminated: False, Truncated: False
Reward: 0.0167, Terminated: False, Truncated

In [6]:
%load_ext autoreload
%autoreload 2

import os
os.environ["WANDB_MODE"] = "disabled"

from envs.robosuite_wrapper import RobosuiteGymWrapper
from training.ppo import PPOTrainer
from configs.ppo_config import PPOConfig

import logging
logging.getLogger("robosuite").setLevel(logging.WARNING)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [7]:
# Create env and trainer
env = RobosuiteGymWrapper(env_name="Lift")
config = PPOConfig(rollout_steps=512)  # smaller for quick test
trainer = PPOTrainer(config, env, device="cuda")

In [8]:
# Test just 1-2 iterations
trainer.train(total_timesteps=1024)

Iter 0/2 | Timesteps: 512 | Avg reward: 0.16 | Policy loss: -0.0141 | Value loss: 0.0001
