In [1]:
# eagerx imports
import eagerx
import eagerx_interbotix.demo as demo

In [2]:
# Set parameters
rate = 10

# Create graph
graph = eagerx.Graph.create()

In [3]:
# Create box
box = demo.Box.make(
    "box",
    rate=rate,
    sensors=["position", "yaw"],
)
graph.add(box)

graph.connect(source=box.sensors.position, observation="pos")
graph.connect(source=box.sensors.yaw, observation="yaw")

pybullet build time: May 20 2022 19:44:17


In [4]:
# Create goal
goal = demo.BoxGoal.make(
    "goal",
    rate=rate,
    sensors=["position", "yaw"],
)
graph.add(goal)

graph.connect(source=goal.sensors.position, observation="pos_desired")
graph.connect(source=goal.sensors.yaw, observation="yaw_desired")

In [5]:
# Create arm
arm = demo.PandaArm.make(
    name="panda",
    rate=rate,
    sensors=["position", "velocity", "force_torque", "ee_pos", "ee_orn"],
    actuators=["vel_control"],
)
graph.add(arm)

# Connecting observations
graph.connect(source=arm.sensors.position, observation="joints")
graph.connect(source=arm.sensors.velocity, observation="velocity")
graph.connect(source=arm.sensors.force_torque, observation="force_torque")
graph.connect(source=arm.sensors.ee_pos, observation="ee_position")

In [6]:
# Create IK node
ik = demo.PandaIK.make(
    name="inverse_kinematics",
    rate=rate,
)
graph.add(ik)

# Create safety node
safe = demo.VelocityControl.make(
    name="safety",
    rate=rate,
    joint_names=arm.config.joint_names,
    joint_upper=arm.config.joint_upper,
    joint_lower=arm.config.joint_lower,
    vel_limit=[0.2 * vl for vl in arm.config.vel_limit],
)
graph.add(safe)

# Connecting goal
graph.connect(source=ik.outputs.dtarget, target=safe.inputs.goal)

# Connecting safety filter to arm
graph.connect(source=arm.sensors.position, target=safe.inputs.position)
graph.connect(source=arm.sensors.velocity, target=safe.inputs.velocity)
graph.connect(source=safe.outputs.filtered, target=arm.actuators.vel_control)

# Connect IK
graph.connect(source=arm.sensors.position, target=ik.inputs.current)
graph.connect(source=arm.sensors.ee_pos, target=ik.inputs.xyz)
graph.connect(source=arm.sensors.ee_orn, target=ik.inputs.orn)

# Connecting actions
graph.connect(action="dxyz", target=ik.inputs.dxyz)
graph.connect(action="dyaw", target=ik.inputs.dyaw)

In [7]:
from eagerx.backends.single_process import SingleProcess
backend = SingleProcess.make()

from eagerx_pybullet.engine import PybulletEngine
engine = PybulletEngine.make(rate=rate)

env = demo.BoxPushEnv.make(
    name="PandaEnv",
    rate=rate,
    graph=graph,
    engine=engine,
    backend=backend,
)
obs = env.reset()

In [8]:
graph.gui()

QStandardPaths: XDG_RUNTIME_DIR not set, defaulting to '/tmp/runtime-jelle'


In [9]:
for i in range(5):
    env.reset()
    rewards = 0
    done = False
    while not done:
        action = env.action_space.sample()
        obs, reward, done, info = env.step(action)
        rewards += reward
    print(f"Sum of rewards: {rewards}")

Sum of rewards: -227.62448208739917
Sum of rewards: -253.7293158246853
Sum of rewards: -152.2758541580414
Sum of rewards: -142.9140844319438
Sum of rewards: -239.72212593079902


In [10]:
env.shutdown()

In [11]:
from pathlib import Path
import eagerx_interbotix


# Load graph
graph = eagerx.Graph.load(f"{demo.LOG_DIR}/graph.yaml")

# Initialize env
env = demo.BoxPushEnv.make(
    name="ViperEnv",
    rate=rate,
    graph=graph,
    engine=engine,
    backend=backend,
)
obs = env.reset()

In [12]:
import stable_baselines3 as sb
import numpy as np


steps = 25_000
model = sb.SAC.load(f"{demo.LOG_DIR}/rl_model_{steps}_steps", env, verbose=0)
print("Evaluating policy for different numbers of steps.")
episodic_reward = []
while steps <= 1_600_000:
    i += 1
    print(f"--------------\nSteps: {steps}")
    # Load model parameters
    model.set_parameters(f"{demo.LOG_DIR}/rl_model_{steps}_steps")

    # Evaluate
    obs, done = env.reset(), False
    rewards = 0
    while not done:
        action, _states = model.predict(obs, deterministic=True)
        obs, reward, done, info = env.step(action)
        rewards += reward
    episodic_reward.append(rewards)
    print(f"Mean episodic reward: {np.mean(episodic_reward)}")
    steps += 75_000

  from .autonotebook import tqdm as notebook_tqdm


Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Evaluating policy for different numbers of steps.
--------------
Steps: 25000
Mean episodic reward: -268.65456541419775
--------------
Steps: 100000
Mean episodic reward: -228.4136743320675
--------------
Steps: 175000
Mean episodic reward: -211.628618395691
--------------
Steps: 250000
Mean episodic reward: -191.074027099569
--------------
Steps: 325000
Mean episodic reward: -182.00825886863706
--------------
Steps: 400000
Mean episodic reward: -168.05234024038901
--------------
Steps: 475000
Mean episodic reward: -154.17232534074785
--------------
Steps: 550000
Mean episodic reward: -145.55054082054636
--------------
Steps: 625000
Mean episodic reward: -136.67174250447943
--------------
Steps: 700000
Mean episodic reward: -131.67257891216605
--------------
Steps: 775000
Mean episodic reward: -127.50370307199788
--------------
Steps: 850000
Mean episodic reward: -123.5412142526158
--------------
Steps: 925000

The X11 connection broke: I/O error (code 1)
X connection to :1 broken (explicit kill or server shutdown).


argv[0]=
startThreads creating 1 threads.
starting thread 0
started thread 0 
argc=3
argv[0] = --unused
argv[1] = 
argv[2] = --start_demo_name=Physics Server
ExampleBrowserThreadFunc started
X11 functions dynamically loaded using dlopen/dlsym OK!
X11 functions dynamically loaded using dlopen/dlsym OK!
Creating context
Created GL 3.3 context
Direct GLX rendering context obtained
Making context current
GL_VENDOR=Intel
GL_RENDERER=Mesa Intel(R) UHD Graphics (CML GT2)
GL_VERSION=4.6 (Core Profile) Mesa 21.2.6
GL_SHADING_LANGUAGE_VERSION=4.60
pthread_getconcurrency()=0
Version = 4.6 (Core Profile) Mesa 21.2.6
Vendor = Intel
Renderer = Mesa Intel(R) UHD Graphics (CML GT2)
b3Printf: Selected demo: Physics Server
startThreads creating 1 threads.
starting thread 0
started thread 0 
MotionThreadFunc thread started
ven = Intel
Workaround for some crash in the Intel OpenGL driver on Linux/Ubuntu
ven = Intel
Workaround for some crash in the Intel OpenGL driver on Linux/Ubuntu

b3Printf: No inertial

In [None]:
env.shutdown()

<p align="center">
  <img src="gif/box_pushing_pybullet.gif" width="40%"  alt="box_sim"/>
  <img src="gif/box_pushing_real.gif" width="40%"  alt="box_real"/>
</p>

<p align="center">
  <img src="gif/pendulum_sim.gif" width="40%"  alt="pendulum_sim"/> 
  <img src="gif/pendulum_real.gif" width="40%"  alt="pendulum_real"/>
</p>
<img src="gif/all.gif" width="80%" />