In [1]:
import eagerx

In [2]:
eagerx.initialize("eagerx_core", anonymous=True, log_level=eagerx.log.INFO)

... logging to /home/jelle/.ros/log/22f836fe-bb47-11ec-9faa-3f010fe2a31f/roslaunch-jelle-Alienware-m15-R4-161367.log
[1mstarted roslaunch server http://145.94.60.89:36135/[0m
ros_comm version 1.15.14


SUMMARY

PARAMETERS
 * /rosdistro: noetic
 * /rosversion: 1.15.14

NODES

auto-starting new master
[1mprocess[master]: started with pid [161401][0m
[1mROS_MASTER_URI=http://localhost:11311[0m
[1msetting /run_id to 22f836fe-bb47-11ec-9faa-3f010fe2a31f[0m
[1mprocess[rosout-1]: started with pid [161427][0m
started core service [/rosout]


<roslaunch.parent.ROSLaunchParent at 0x7f015c4b0580>

In [3]:
from eagerx.core.env import EagerxEnv
from eagerx.core.graph import Graph
from eagerx.wrappers import Flatten

# Implementation specific
import eagerx.nodes  # Registers butterworth_filter # noqa # pylint: disable=unused-import
import eagerx_ode  # Registers OdeBridge # noqa # pylint: disable=unused-import
import eagerx_tutorials  # Registers Pendulum # noqa # pylint: disable=unused-import

In [4]:
# Other
import numpy as np
import stable_baselines3 as sb

In [None]:
# Define rate (depends on rate of ode)
rate = 30.0

# Initialize empty graph
graph = Graph.create()

# Create pendulum
pendulum = eagerx.Object.make("Pendulum", "pendulum", sensors=["pendulum_output", "action_applied"])

graph.add(pendulum)

# Create Butterworth filter
bf = eagerx.Node.make("ButterworthFilter", name="bf", rate=rate, Wn=13)
graph.add(bf)

# Connect the nodes
graph.connect(action="action", target=bf.inputs.signal)
graph.connect(source=bf.outputs.filtered, target=pendulum.actuators.pendulum_input)
graph.connect(source=pendulum.sensors.pendulum_output, observation="observation", window=1)
graph.connect(source=pendulum.sensors.action_applied, observation="action_applied", window=1)

# Define bridges
bridge = eagerx.Bridge.make("OdeBridge", rate=rate, is_reactive=True)

# Define step function
def step_fn(prev_obs, obs, action, steps):
    state = obs["observation"][0]
    u = action["action"][0]
    
    # Calculate reward
    sin_th, cos_th, thdot = state
    th = np.arctan2(sin_th, cos_th)
    
    cost = th**2 + 0.1 * thdot**2 + 0.001 * (u**2)
    
    # Determine done flag
    done = steps > 500
    
    # Set info:
    info = dict()
    
    return obs, -cost, done, info

# Initialize Environment
env = Flatten(EagerxEnv(name="rx", rate=rate, graph=graph, bridge=bridge, step_fn=step_fn))

# Initialize learner
model = sb.SAC("MlpPolicy", env, verbose=1, device="cpu")

# Train for 3 minutes
model.learn(total_timesteps=int(1800 * rate))

[INFO] [1649867476.421185]: Node "/rx/env/supervisor" initialized.
[INFO] [1649867476.552694]: Waiting for nodes "['bridge']" to be initialized.
[INFO] [1649867477.188952]: Node "/rx/environment" initialized.
Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
[INFO] [1649867478.324800]: Nodes initialized.
[INFO] [1649867478.585955]: Pipelines initialized.
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 501       |
|    ep_rew_mean     | -4.02e+03 |
| time/              |           |
|    episodes        | 4         |
|    fps             | 76        |
|    time_elapsed    | 26        |
|    total_timesteps | 2004      |
| train/             |           |
|    actor_loss      | 69.7      |
|    critic_loss     | 1.06      |
|    ent_coef        | 0.642     |
|    ent_coef_loss   | -0.0887   |
|    learning_rate   | 0.0003    |
|    n_updates       | 1903      |
----------------------------------
----