# Running LSPI on the UR5

In [1]:
from __future__ import print_function
import rosgym
import gym
import numpy as np
import os
import rospy
import imp
import random
import requests
import time
import yaml
from os import path

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
import sys
sys.path.append(os.path.dirname(os.getcwd()) + "/rl_agents/python")

In [4]:
import basic_agents
from geometry_msgs.msg import PointStamped
from visualization_msgs.msg import Marker

In [5]:
def make_environment(robot_conf_file, planning_group, num_joints, z_limit,
                     x_limits, y_limits, **kwargs):
    """
    Creates an OpenAI Gym environment corresponding to the supplied robot
    configuration file, and configuration options.

    :return: rosgym environment
    """

    env = rosgym.make_randomgoal_robot_env(
        "ur5_config_random_goal", robot_conf_file, planning_group, num_joints,
        z_limit, x_limits[0], x_limits[1], y_limits[0], y_limits[1])
    return env

class SimpleConfig(object):
    def __init__(self, config, defaults):
        self._config = config
        self._defaults = defaults

    def __getitem__(self, key):
        return self._config.get(key, self._defaults.get(key))

In [6]:
defaults = {
    "robot_conf_file":
    "/home/cannon/rl_wksp/src/rosgym/src/rosgym/ur5_config_random_goal.py",
    "planning_group":
    "manipulator",
    "num_joints":
    3,
    "z_limit":
    2.0,
    "x_min":
    -2.0,
    "x_max":
    2.0,
    "y_min":
    -2.0,
    "y_max":
    2.0,
    "num_tests":
    10,
    "num_episodes":
    100,
}

In [7]:
sc = SimpleConfig({}, defaults)

# make our random agent
print("Attempting to make the rosgym environment...")
ur5env = make_environment(sc["robot_conf_file"], sc["planning_group"],
                       sc["num_joints"], sc["z_limit"], [sc["x_min"], sc["x_max"]],
                       [sc["y_min"], sc["y_max"]])
print("Made the rosgym environment...")

Attempting to make the rosgym environment...


Connecting to control group manipulator
Found move groups: ['endeffector', 'forearm', 'manipulator', 'shoulder', 'upper_arm', 'wrist_1', 'wrist_2']
Found planning scene: <moveit_commander.planning_scene_interface.PlanningSceneInterface object at 0x7f8cdac18350>


Connecting reset handler
Finished connecting wt_commander
Set up MoveGroupInterface
[INFO] [1554974447.056666, 46.088000]: Waiting for get_planning_scene
Set up PlanningSceneInterface
Initialized SimpleActionClient


Finished connecting reset handler
Waiting for sensor /joint_states
Got initial data for sensor /joint_states
RobotAgent_UR5 has been initialized.
Finished initializing robot
Initialized sensors
Connecting to control group manipulator


Finished waiting


Initialized move groups and joints
Initialized service proxies
Getting new goal
Sampling new goal
Goal is: [-0.3054623372679364, 0.18661042552733695, 0.5351967249896288]


Finding new goal pose
Finding new goal pose
Finding new goal pose
Found goal pose: x: -0.305462337268
y: 0.186610425527
z: 0.53519672499
Made the rosgym environment...


In [8]:
ur5env.action_space.spaces[0].low

array([-6.2831853, -6.2831853, -6.2831853])

In [9]:
ur5env.action_space.spaces[0].high

array([ 6.2831853,  6.2831853,  6.2831853])

In [10]:
ur5env.observation_space

Tuple(Box(6,), Box(6,))

So the low and high positions of our action space are $-2\pi$ and $2\pi$.
A good start for working on the discretization would probably just be to 
uniformly chunk our action space.

In [11]:
# this discretizer does that
from lspi.util import ur5_discretizer

In [12]:
actions = ur5_discretizer(ur5env.action_space, 4)

In [13]:
actions[0]

(array([-6.2831853, -6.2831853, -6.2831853]),)

## generating sample data

We need to generate our sample data after we've already wrapped
this space in a discrete action environment.

In [14]:
from lspi.util import DiscreteEnvWrapper

env = DiscreteEnvWrapper(ur5env, actions)

In [16]:
from lspi.policy import RandomPolicy
from lspi.util import SampleGenerator

rp = RandomPolicy(env)
data = SampleGenerator(env, rp).sample(10, 1000)

Planning to [1.5707963267948966, 0.7853981633974483, 0, 0, 0, 0]  on joints  ['elbow_joint', 'shoulder_lift_joint', 'shoulder_pan_joint', 'wrist_1_joint', 'wrist_2_joint', 'wrist_3_joint']
Failed to plan to trajectory!
Retrying Reset...
Planning to [1.5707963267948966, 0.7853981633974483, 0, 0, 0, 0]  on joints  ['elbow_joint', 'shoulder_lift_joint', 'shoulder_pan_joint', 'wrist_1_joint', 'wrist_2_joint', 'wrist_3_joint']
Successfully planned to position. Executing trajectory.


Sampling new goal


Finding new goal pose
Found goal pose: x: 0.0163792327277
y: -0.38737962413
z: 0.761430410219
New target pose: [0.016379232727700317, -0.38737962413025107, 0.7614304102187317]
Planning to [1.5707963267948966, 0.7853981633974483, 0, 0, 0, 0]  on joints  ['elbow_joint', 'shoulder_lift_joint', 'shoulder_pan_joint', 'wrist_1_joint', 'wrist_2_joint', 'wrist_3_joint']
Successfully planned to position. Executing trajectory.


Sampling new goal


Finding new goal pose
Finding new goal pose
Finding new goal pose
Finding new goal pose
Finding new goal pose
Found goal pose: x: 0.0335958566342
y: -0.112661960754
z: 0.583514364331
New target pose: [0.033595856634212815, -0.11266196075403795, 0.583514364330558]
Planning to [1.5707963267948966, 0.7853981633974483, 0, 0, 0, 0]  on joints  ['elbow_joint', 'shoulder_lift_joint', 'shoulder_pan_joint', 'wrist_1_joint', 'wrist_2_joint', 'wrist_3_joint']
Successfully planned to position. Executing trajectory.


Sampling new goal


Finding new goal pose
Found goal pose: x: -0.425785244933
y: -0.420562651298
z: 0.446672185948
New target pose: [-0.4257852449334898, -0.42056265129801607, 0.44667218594751246]
Planning to [1.5707963267948966, 0.7853981633974483, 0, 0, 0, 0]  on joints  ['elbow_joint', 'shoulder_lift_joint', 'shoulder_pan_joint', 'wrist_1_joint', 'wrist_2_joint', 'wrist_3_joint']
Successfully planned to position. Executing trajectory.


Sampling new goal


Finding new goal pose
Found goal pose: x: 0.319217499873
y: -0.121927116518
z: 0.359578950707
New target pose: [0.3192174998734818, -0.12192711651821492, 0.35957895070727114]
Planning to [1.5707963267948966, 0.7853981633974483, 0, 0, 0, 0]  on joints  ['elbow_joint', 'shoulder_lift_joint', 'shoulder_pan_joint', 'wrist_1_joint', 'wrist_2_joint', 'wrist_3_joint']
Successfully planned to position. Executing trajectory.


Sampling new goal


Finding new goal pose
Finding new goal pose
Finding new goal pose
Found goal pose: x: -0.377163084887
y: -0.148690006359
z: 0.112576089776
New target pose: [-0.37716308488732375, -0.14869000635861454, 0.11257608977569226]
Planning to [1.5707963267948966, 0.7853981633974483, 0, 0, 0, 0]  on joints  ['elbow_joint', 'shoulder_lift_joint', 'shoulder_pan_joint', 'wrist_1_joint', 'wrist_2_joint', 'wrist_3_joint']
Successfully planned to position. Executing trajectory.


Sampling new goal


Finding new goal pose
Found goal pose: x: -0.418809923266
y: -0.380992647353
z: 0.537357612937
New target pose: [-0.418809923265931, -0.3809926473532162, 0.5373576129368961]
Planning to [1.5707963267948966, 0.7853981633974483, 0, 0, 0, 0]  on joints  ['elbow_joint', 'shoulder_lift_joint', 'shoulder_pan_joint', 'wrist_1_joint', 'wrist_2_joint', 'wrist_3_joint']
Successfully planned to position. Executing trajectory.


Sampling new goal


Finding new goal pose
Found goal pose: x: -0.376251700707
y: 0.0623826156482
z: 0.603599747422
New target pose: [-0.37625170070735303, 0.06238261564816817, 0.6035997474219215]
Planning to [1.5707963267948966, 0.7853981633974483, 0, 0, 0, 0]  on joints  ['elbow_joint', 'shoulder_lift_joint', 'shoulder_pan_joint', 'wrist_1_joint', 'wrist_2_joint', 'wrist_3_joint']
Failed to plan to trajectory!
Retrying Reset...
Planning to [1.5707963267948966, 0.7853981633974483, 0, 0, 0, 0]  on joints  ['elbow_joint', 'shoulder_lift_joint', 'shoulder_pan_joint', 'wrist_1_joint', 'wrist_2_joint', 'wrist_3_joint']
Successfully planned to position. Executing trajectory.


Sampling new goal


Finding new goal pose
Finding new goal pose
Finding new goal pose
Found goal pose: x: -0.582374285935
y: -0.0427582856051
z: 0.568624442565
New target pose: [-0.582374285934934, -0.042758285605123104, 0.5686244425651003]
Planning to [1.5707963267948966, 0.7853981633974483, 0, 0, 0, 0]  on joints  ['elbow_joint', 'shoulder_lift_joint', 'shoulder_pan_joint', 'wrist_1_joint', 'wrist_2_joint', 'wrist_3_joint']
Failed to plan to trajectory!
Retrying Reset...
Planning to [1.5707963267948966, 0.7853981633974483, 0, 0, 0, 0]  on joints  ['elbow_joint', 'shoulder_lift_joint', 'shoulder_pan_joint', 'wrist_1_joint', 'wrist_2_joint', 'wrist_3_joint']
Failed to plan to trajectory!
Retrying Reset...
Planning to [1.5707963267948966, 0.7853981633974483, 0, 0, 0, 0]  on joints  ['elbow_joint', 'shoulder_lift_joint', 'shoulder_pan_joint', 'wrist_1_joint', 'wrist_2_joint', 'wrist_3_joint']
Successfully planned to position. Executing trajectory.


Sampling new goal


Finding new goal pose
Found goal pose: x: 0.373511634873
y: -0.389739641422
z: 0.203779540179
New target pose: [0.3735116348734456, -0.38973964142174666, 0.2037795401792401]
Planning to [1.5707963267948966, 0.7853981633974483, 0, 0, 0, 0]  on joints  ['elbow_joint', 'shoulder_lift_joint', 'shoulder_pan_joint', 'wrist_1_joint', 'wrist_2_joint', 'wrist_3_joint']
Successfully planned to position. Executing trajectory.


Sampling new goal


Finding new goal pose
Finding new goal pose
Finding new goal pose
Finding new goal pose
Found goal pose: x: 0.278668013844
y: -0.122037503803
z: 0.775028383044
New target pose: [0.27866801384387907, -0.12203750380337779, 0.7750283830438948]


#### basis

For the first try we'll just use quadratic matrices.

In [35]:
statesize = 12
actionsize = 3
size = statesize + actionsize

In [36]:
def randomMatrix(shape, low=-1, high=1):
    return np.random.random(shape) * (high - low) + low

numMat = 30
coefs = [randomMatrix((size, size)) for _ in range(numMat)]

In [37]:
from lspi.basis import DiscreteQuadraticTupleBasis

basis = DiscreteQuadraticTupleBasis([coefs], actions, env.observation_space)

In [38]:
basis(env.observation_space.sample(), env.action_space.sample())

array([   1.        ,  -26.06159623,  -32.68497302,  -60.64137087,
        145.20785957, -122.40088118,  -65.96842271,   70.09445487,
         12.65181081,  -37.24441037,  -70.31080766,    3.45705329,
         21.06265736,  154.11020743,   31.90463303,   74.71831233,
        -88.51430001,  -21.77724507,  -21.55606833,   42.20956175,
        -16.85830779,  106.45596578,   10.99745785,   -9.62722788,
         21.48098453,  -11.63941566,   76.69937712,   13.03554931,
         14.22401432,  -41.08521   ,  -36.97305414])

In [40]:
from lspi.optim import LSTDQ, LSPI
from lspi.policy import DiscreteActionBasisPolicy

lstdq = LSTDQ(basis, discount=0.95)
dabp = DiscreteActionBasisPolicy(env.action_space, basis, np.zeros(basis.rank))
lspi = LSPI(lstdq, dabp, max_iter=50, epsilon=1e-3, verbose=True)

In [41]:
lspi.fit(data)

Iteration 0: Diff: 2193.32924933
Iteration 1: Diff: 1912.37397313
Iteration 2: Diff: 17.6683018571
Iteration 3: Diff: 0.0616077814138


<lspi.optim.LSPI at 0x7f13940d5090>

In [49]:
rewards = []
actions = []

for episode in range(10):
    state = env.reset()
    done = False
    cumr = 0
    itr = 0
    while not done and itr < 500:
        action = lspi.policy(state)
        state, reward, done, _ = env.step(action)
#         done = cumr < -300 or state[0] > 0.5
        cumr += reward
#         env.render()
#     print(cumr)
        print("Action: {}, State: {}, Reward: {}".format(action, state, reward))
    rewards.append(cumr)
#     print("Episode {episode}, Reward: {reward}".format(episode=episode, reward=reward))
rewards = np.array(rewards)

Planning to [1.5707963267948966, 0.7853981633974483, 0, 0, 0, 0]  on joints  ['elbow_joint', 'shoulder_lift_joint', 'shoulder_pan_joint', 'wrist_1_joint', 'wrist_2_joint', 'wrist_3_joint']
Successfully planned to position. Executing trajectory.


Sampling new goal


Finding new goal pose
Found goal pose: x: -0.210415708346
y: -0.0938964763398
z: 0.42216868985
New target pose: [-0.21041570834617976, -0.09389647633980201, 0.4221686898498746]
Action: 25, State: ([1.5534821478771024, 0.8000265052334683, 0.018917359273430634, 0.10491482655739465, -0.7318237526005614, -0.6031945969838799, -0.7244898863215887, -0.21251038048432008, 0.37959324202988953, -0.21041570834617976, -0.09389647633980201, 0.4221686898498746],), Reward: -14.0045389439
Action: 25, State: ([1.556683485934367, 0.7975937785182774, 0.014294236350371747, 0.1375606027745576, -0.8305710842286973, -0.9490648352190111, -0.7239212923997657, -0.2103982984904198, 0.3799237733573196, -0.21041570834617976, -0.09389647633980201, 0.4221686898498746],), Reward: -14.0036712699
Action: 25, State: ([1.5453462552691342, 0.7974148014485252, -0.012026742525288903, -0.8110748659460737, -0.6062371614397102, -1.8996954760165852, -0.7313729982282571, -0.1967886493903625, 0.378067985302112, -0.2104157083461797

Action: 25, State: ([1.4131098207173451, 0.8588990846597566, -0.45216046121762243, -0.491681747867196, -0.3733846598163907, -1.2037778058328579, -0.7621117566327291, 0.05273019508689043, 0.3806600431975968, -0.21041570834617976, -0.09389647633980201, 0.4221686898498746],), Reward: -14.0129927866
Action: 25, State: ([1.4083581955278905, 0.8618391439681448, -0.4590738834546082, -0.48894579106536923, -0.37549692491044917, -1.1366529855912562, -0.7621117566327291, 0.05273019508689043, 0.3806600431975968, -0.21041570834617976, -0.09389647633980201, 0.4221686898498746],), Reward: -14.0129927866
Action: 25, State: ([1.3983967976501654, 0.8655170551663414, -0.470621386091878, -0.505866953011621, -0.3571566882205671, -0.9068842858817442, -0.7629125038734283, 0.06104151677651552, 0.3813159886165669, -0.21041570834617976, -0.09389647633980201, 0.4221686898498746],), Reward: -14.0145534591
Action: 25, State: ([1.3890337617325583, 0.8705031902344471, -0.48465313374109975, -0.49622622586294296, -0.3

Action: 25, State: ([1.1927511175011771, 0.9561859020724537, -0.6952629106310386, -0.28422551093757215, -0.45689866803589874, -0.7073550982567401, -0.7601215233613235, 0.21907065301240836, 0.3817125997337067, -0.21041570834617976, -0.09389647633980201, 0.4221686898498746],), Reward: -14.0378125243
Action: 25, State: ([1.1892031826019016, 0.9578237496348105, -0.6975356018168171, -0.2583688910656301, -0.4794862197062648, -0.4200161566113296, -0.7596520252300873, 0.22399587893081757, 0.38171223044610575, -0.21041570834617976, -0.09389647633980201, 0.4221686898498746],), Reward: -14.0386480162
Action: 25, State: ([1.1810176372196581, 0.9615274161799743, -0.7066090823517346, -0.33051299526605027, -0.4539603928731446, -0.7066913952609936, -0.7596520252300873, 0.22399587893081757, 0.38171223044610575, -0.21041570834617976, -0.09389647633980201, 0.4221686898498746],), Reward: -14.0386480162
Action: 25, State: ([1.1734466539750112, 0.9658485394543952, -0.7139152441203702, -0.25800376855384555, 

Action: 25, State: ([1.0708337151010507, 1.0129824295432313, -0.8481978749081893, 0.0595491928219061, -0.611881552141114, -0.6205155308323062, -0.7258881926629722, 0.325709700109034, 0.3828977954673561, -0.21041570834617976, -0.09389647633980201, 0.4221686898498746],), Reward: -14.0424284486
Action: 25, State: ([1.0685244599408454, 1.0140751080508084, -0.8534202899010097, 0.09727069334547056, -0.6461689299906338, -0.516130186681361, -0.7258881926629722, 0.325709700109034, 0.3828977954673561, -0.21041570834617976, -0.09389647633980201, 0.4221686898498746],), Reward: -14.0424284486
Action: 25, State: ([1.066655509070415, 1.0139098492132659, -0.8578093521567167, 0.1270918314706337, -0.6444064067933399, -0.3049932166244545, -0.724074811316052, 0.32883726511591405, 0.38293606279600784, -0.21041570834617976, -0.09389647633980201, 0.4221686898498746],), Reward: -14.0421153019
Action: 25, State: ([1.065662315913145, 1.0144351526662039, -0.8617427624282152, 0.12328722632999212, -0.6663177676657

Action: 25, State: ([-0.7562096536601954, 1.6437861612609064, -1.1314955151197275, -2.239813256208654, -1.3541348918708904, -0.37399088930272695, -0.6512369435382747, 0.5797352192023766, 0.21051511075193957, -0.21041570834617976, -0.09389647633980201, 0.4221686898498746],), Reward: -14.0564223879
Action: 25, State: ([-0.7798578370459932, 1.6445744896974235, -1.1344459605667048, -2.17069117236084, -1.3418659249359042, -0.32952556332542765, -0.644477521068748, 0.573145151758037, 0.1893837460127848, -0.21041570834617976, -0.09389647633980201, 0.4221686898498746],), Reward: -14.042486006
Action: 25, State: ([-0.835734407342013, 1.6433607391912295, -1.1417981437905773, -2.17764183172121, -1.2700788007728079, -0.5922362500834113, -0.6364774667624978, 0.5671432843363018, 0.17108571249468352, -0.21041570834617976, -0.09389647633980201, 0.4221686898498746],), Reward: -14.028969711
Action: 25, State: ([-0.8837562105780616, 1.6446586184253897, -1.1489985144532397, -2.1023125104989697, -1.26053435

Action: 25, State: ([-2.074766600323235, 1.645625668348563, -1.5684374626847797, 0.16696378058851413, -0.4242889448058435, -0.9893790267485767, -0.32038348726122456, 0.25265749691908646, -0.030963074179125638, -0.21041570834617976, -0.09389647633980201, 0.4221686898498746],), Reward: -13.5686669592
Action: 25, State: ([-2.092837209029538, 1.6459392923922795, -1.5889228062710448, -0.19741743442074655, -0.40682295325442003, -0.9824745869024535, -0.31664930272243913, 0.25059833106662066, -0.030150098904728695, -0.21041570834617976, -0.09389647633980201, 0.4221686898498746],), Reward: -13.5644111827
Action: 25, State: ([-2.109223495363274, 1.6455354355029632, -1.6106071508864304, -0.1127464349462078, -0.4057457317215324, -1.167191960478353, -0.3099899300108394, 0.2456999746684418, -0.028481805389307113, -0.21041570834617976, -0.09389647633980201, 0.4221686898498746],), Reward: -13.5560496693
Action: 25, State: ([-2.1166197926747774, 1.6472500104595058, -1.6327980399031734, 0.34371073919055

Action: 25, State: ([-2.095692528096828, 1.6489996860218952, -1.8839387950493816, 0.7212541336461263, -0.7633315012324677, -0.6559242011620412, -0.21006815640386814, 0.2629694359526116, -0.02823946070884925, -0.21041570834617976, -0.09389647633980201, 0.4221686898498746],), Reward: -13.4972283658
Action: 25, State: ([-2.096703289820047, 1.6499685937408826, -1.891315080552058, 0.6800264248607808, -0.7279359628878121, -0.7345126504738682, -0.20783169418520311, 0.2628145019054307, -0.028103714345326047, -0.21041570834617976, -0.09389647633980201, 0.4221686898498746],), Reward: -13.4957094662
Action: 25, State: ([-2.097488695944362, 1.648890875667223, -1.8943996901095925, 0.640221185375087, -0.595754569601861, -0.7500742794951107, -0.206113547962649, 0.26218597230915175, -0.02811786535505556, -0.21041570834617976, -0.09389647633980201, 0.4221686898498746],), Reward: -13.4941587654
Action: 25, State: ([-2.0990249934061413, 1.6508517732677808, -1.9007399293805882, 0.656889250683341, -0.64877

Action: 25, State: ([-2.0974339663758927, 1.6513592201130338, -2.0720796057560484, 0.7452519805454831, -0.8161797821710772, -0.5537452763298774, -0.14289007537959994, 0.25646719047525407, -0.027647636024265954, -0.21041570834617976, -0.09389647633980201, 0.4221686898498746],), Reward: -13.4543578811
Action: 25, State: ([-2.0974909892917957, 1.650783696519591, -2.076108710402287, 0.7302402219186008, -0.755374958537699, -0.6306321225381843, -0.1415164039715334, 0.2561246237592078, -0.02751923413051116, -0.21041570834617976, -0.09389647633980201, 0.4221686898498746],), Reward: -13.4533840964
Action: 25, State: ([-2.096900844980051, 1.6508659759063802, -2.0796760862741577, 0.7604825341982304, -0.7694092293406078, -0.6135407225744931, -0.1415164039715334, 0.2561246237592078, -0.02751923413051116, -0.21041570834617976, -0.09389647633980201, 0.4221686898498746],), Reward: -13.4533840964
Action: 25, State: ([-2.096742683800441, 1.6511118752640277, -2.0833928908498134, 0.7417773354776296, -0.78

Action: 25, State: ([-2.09722644894794, 1.6512414157763464, -2.084723753426996, 0.7183727668386531, -0.805127048164176, -0.44175972658779694, -0.13877038773311737, 0.2558436395294161, -0.02759823528991734, -0.21041570834617976, -0.09389647633980201, 0.4221686898498746],), Reward: -13.451833328
Action: 25, State: ([-2.0972597163778666, 1.6511686979996796, -2.083976619351354, 0.714963476576944, -0.8391373824506447, -0.3806140753985261, -0.1384452515895337, 0.2556213434005407, -0.027701274537967002, -0.21041570834617976, -0.09389647633980201, 0.4221686898498746],), Reward: -13.4514942839
Action: 25, State: ([-2.097029030651461, 1.651225881076531, -2.0836723857858974, 0.740610787405377, -0.8429069785269522, -0.3854753832175154, -0.13877912919995727, 0.2557565430555832, -0.027731032268633937, -0.21041570834617976, -0.09389647633980201, 0.4221686898498746],), Reward: -13.4517738324
Action: 25, State: ([-2.0973353124368854, 1.6517118902157266, -2.084809195383979, 0.7299091648329111, -0.795504

Action: 25, State: ([-2.0971011157069226, 1.6513556324110032, -2.0841982298458497, 0.7494458641209503, -0.8216571181596783, -0.38879591991737716, -0.13844421328573117, 0.2555545517766527, -0.02739515373114948, -0.21041570834617976, -0.09389647633980201, 0.4221686898498746],), Reward: -13.4514064417
Action: 25, State: ([-2.0969744918973703, 1.6511970239747207, -2.083685147235993, 0.7521789947756011, -0.8438632055453409, -0.3873982450114592, -0.13870739448983155, 0.2557274346286615, -0.027382432997689443, -0.21041570834617976, -0.09389647633980201, 0.4221686898498746],), Reward: -13.4516814364
Action: 25, State: ([-2.097462359764375, 1.65174825515148, -2.0832605392069707, 0.7116274833282382, -0.8291462858260805, -0.3286206768827926, -0.13870739448983155, 0.2557274346286615, -0.027382432997689443, -0.21041570834617976, -0.09389647633980201, 0.4221686898498746],), Reward: -13.4516814364
Action: 25, State: ([-2.0974410913735912, 1.6512127530593395, -2.0847821847897947, 0.7325359343217875, -

Action: 25, State: ([-2.0972064497452525, 1.6512114719141424, -2.084267390822177, 0.7343716343001867, -0.8297450322779971, -0.38755791955559316, -0.13860614351872325, 0.25565170878848986, -0.027712917571960205, -0.21041570834617976, -0.09389647633980201, 0.4221686898498746],), Reward: -13.4515982739
Action: 25, State: ([-2.0972064497452525, 1.6512114719141424, -2.084267390822177, 0.7343716343001867, -0.8297450322779971, -0.38755791955559316, -0.13860614351872325, 0.25565170878848986, -0.027712917571960205, -0.21041570834617976, -0.09389647633980201, 0.4221686898498746],), Reward: -13.4515982739
Action: 25, State: ([-2.0973401424616425, 1.6515811295157183, -2.0834797845852293, 0.7121714522026765, -0.8243016217444061, -0.33211464131360297, -0.13878832838573993, 0.25570421235325896, -0.027683011726545304, -0.21041570834617976, -0.09389647633980201, 0.4221686898498746],), Reward: -13.4517278613
Action: 25, State: ([-2.097807250131071, 1.6511045402239084, -2.0847776738748784, 0.690608188510

Action: 25, State: ([-2.097337452678696, 1.6512088111793704, -2.08443717337686, 0.719363854376195, -0.8245188967427888, -0.3932073925069105, -0.13842284403906113, 0.25569139035067323, -0.027363241665451177, -0.21041570834617976, -0.09389647633980201, 0.4221686898498746],), Reward: -13.4515131146
Action: 25, State: ([-2.0970756102388286, 1.6513103333553305, -2.0835510732658404, 0.7398719706160927, -0.8444239026078212, -0.3826246948146095, -0.13869540354627025, 0.2557034267567443, -0.027717784092327558, -0.21041570834617976, -0.09389647633980201, 0.4221686898498746],), Reward: -13.4516863527
Action: 25, State: ([-2.0973943610934462, 1.651632226824316, -2.0834371607892237, 0.7065340684834062, -0.8250992834990334, -0.32688515575577165, -0.13869540354627025, 0.2557034267567443, -0.027717784092327558, -0.21041570834617976, -0.09389647633980201, 0.4221686898498746],), Reward: -13.4516863527
Action: 25, State: ([-2.097559275657974, 1.651203982302631, -2.0847984464789278, 0.7205256725799472, -0

Action: 25, State: ([-2.0974843363643982, 1.65111016758841, -2.0849473882073637, 0.7203811758940213, -0.7963774165625085, -0.5071702209953184, -0.13836791468951373, 0.2556037777903154, -0.02741602409980054, -0.21041570834617976, -0.09389647633980201, 0.4221686898498746],), Reward: -13.4514153235
Action: 25, State: ([-2.0971343795125774, 1.6512339093259705, -2.0843295681072727, 0.7354961078238909, -0.8291127634784284, -0.39126307860973153, -0.13836791468951373, 0.2556037777903154, -0.02741602409980054, -0.21041570834617976, -0.09389647633980201, 0.4221686898498746],), Reward: -13.4514153235
Action: 25, State: ([-2.097224546136573, 1.6512067135783264, -2.083646871527707, 0.7295046445577983, -0.843473164296638, -0.3879314247643185, -0.13856815486410923, 0.255676063480885, -0.027718549820272442, -0.21041570834617976, -0.09389647633980201, 0.4221686898498746],), Reward: -13.4516021009
Action: 25, State: ([-2.097236478418795, 1.6517256127104911, -2.083342111434739, 0.7405767802120651, -0.820

Action: 25, State: ([-2.0976071443209108, 1.6516367108792158, -2.083446601456381, 0.6943203681053842, -0.82630414531754, -0.32858743977399285, -0.13882067752699395, 0.2558391562994939, -0.02721588350833426, -0.21041570834617976, -0.09389647633980201, 0.4221686898498746],), Reward: -13.4518174358
Action: 25, State: ([-2.0974203889466185, 1.6512135755836699, -2.084740431384784, 0.7323613221226402, -0.7993818751939885, -0.503092199156534, -0.13848252352084098, 0.2555437967527865, -0.027696645848982182, -0.21041570834617976, -0.09389647633980201, 0.4221686898498746],), Reward: -13.451443643
Action: 25, State: ([-2.096882204194536, 1.6512012998538514, -2.0843678138129054, 0.7601472633171436, -0.831584430053198, -0.39702652981814823, -0.13851777302405271, 0.25573893327223957, -0.02777675206060065, -0.21041570834617976, -0.09389647633980201, 0.4221686898498746],), Reward: -13.4516387646
Action: 25, State: ([-2.0972484535319484, 1.6513789734906377, -2.083658162770199, 0.7273391870537071, -0.84

Action: 25, State: ([-2.097104229238097, 1.6516617960435598, -2.0848712343579745, 0.7479621568406363, -0.8001456898207229, -0.5227993389741277, -0.1388211454734922, 0.2557528382546586, -0.027739401806449238, -0.21041570834617976, -0.09389647633980201, 0.4221686898498746],), Reward: -13.451791336
Action: 25, State: ([-2.0970345626743914, 1.6512146377829264, -2.0846652585320715, 0.7464088007818347, -0.8172523659116502, -0.4529155814753276, -0.13829989858461006, 0.2557768367381248, -0.027587618308717643, -0.21041570834617976, -0.09389647633980201, 0.4221686898498746],), Reward: -13.451550797
Action: 25, State: ([-2.097300084141766, 1.6511978654743942, -2.0837829930789997, 0.707199888546584, -0.8438771445296899, -0.38307134261236886, -0.1386452749525022, 0.2557613766610988, -0.027671625527588617, -0.21041570834617976, -0.09389647633980201, 0.4221686898498746],), Reward: -13.4517089037
Action: 25, State: ([-2.096994382327938, 1.6512240013247235, -2.083602533821998, 0.7429425365116356, -0.84

Action: 25, State: ([-2.0973640659077626, 1.651706999212359, -2.084834770092023, 0.7247062304846538, -0.7937503013027117, -0.5161092044812955, -0.13834827340868758, 0.2557163779413053, -0.027521618542026005, -0.21041570834617976, -0.09389647633980201, 0.4221686898498746],), Reward: -13.4515145397
Action: 25, State: ([-2.0973899995254817, 1.6512400443814084, -2.0845860460572734, 0.7072341806872845, -0.8053911570947038, -0.43811433214250095, -0.13845320322686383, 0.25570244895951355, -0.027381053000180233, -0.21041570834617976, -0.09389647633980201, 0.4221686898498746],), Reward: -13.4515388567
Action: 25, State: ([-2.0971715944405194, 1.6513211929101033, -2.083684395808042, 0.7346559339484123, -0.8440859189545528, -0.38270929501915524, -0.13845320322686383, 0.25570244895951355, -0.027381053000180233, -0.21041570834617976, -0.09389647633980201, 0.4221686898498746],), Reward: -13.4515388567
Action: 25, State: ([-2.0972989714143893, 1.6512570487912202, -2.0835102960574687, 0.71975120025384

Action: 25, State: ([-2.0976664932839384, 1.65110453517282, -2.084822334904046, 0.702911323114681, -0.7987707744389039, -0.5031216650953005, -0.138751747565272, 0.2557864375377087, -0.027404847449654934, -0.21041570834617976, -0.09389647633980201, 0.4221686898498746],), Reward: -13.4517562277
Action: 25, State: ([-2.0972474522860125, 1.651237702821625, -2.0847009099617653, 0.726161167803403, -0.8000713816955327, -0.44153205127894546, -0.13844604699103766, 0.25555181756502704, -0.02739451222322531, -0.21041570834617976, -0.09389647633980201, 0.4221686898498746],), Reward: -13.4514048576
Action: 25, State: ([-2.0974336803741114, 1.6511991904262837, -2.0838980448003888, 0.6998982792747455, -0.838895015281611, -0.3784024582782721, -0.13870737562746777, 0.25572768416116276, -0.02738247667729171, -0.21041570834617976, -0.09389647633980201, 0.4221686898498746],), Reward: -13.4516816499
Action: 25, State: ([-2.097359884355879, 1.6517542762670514, -2.0832830672159304, 0.7175674023651274, -0.827

Action: 25, State: ([-2.097459107951117, 1.6511047653706, -2.0849286289277043, 0.7222472448218876, -0.7950404529050628, -0.5083047368979035, -0.13886991004378946, 0.2558622098061833, -0.02753106276644568, -0.21041570834617976, -0.09389647633980201, 0.4221686898498746],), Reward: -13.4518904962
Action: 25, State: ([-2.097168817409308, 1.651211732688619, -2.0842679376412576, 0.7356882299799832, -0.8296763136001887, -0.38772016846135554, -0.1385986391319311, 0.2556646379868466, -0.02771612131829221, -0.21041570834617976, -0.09389647633980201, 0.4221686898498746],), Reward: -13.4516063324
Action: 25, State: ([-2.097270519583251, 1.651257971262421, -2.083523411502231, 0.7228282262557079, -0.8423787547176853, -0.3852621732242489, -0.1385986391319311, 0.2556646379868466, -0.02771612131829221, -0.21041570834617976, -0.09389647633980201, 0.4221686898498746],), Reward: -13.4516063324
Action: 25, State: ([-2.097198212774331, 1.6517725635190414, -2.0847835681258715, 0.7410053671816655, -0.79097327

Action: 25, State: ([-2.0973384021951365, 1.6512087970163574, -2.0844370629867512, 0.7194197468126085, -0.8245248444160134, -0.39320802858862025, -0.1384231884353499, 0.255690837929129, -0.02736310592072483, -0.21041570834617976, -0.09389647633980201, 0.4221686898498746],), Reward: -13.4515127815
Action: 25, State: ([-2.0971264990425382, 1.6512205175104695, -2.0839637631963024, 0.7351741212074466, -0.8405892232896791, -0.37804459226767523, -0.13869546700017288, 0.25570333025675657, -0.027717772690483644, -0.21041570834617976, -0.09389647633980201, 0.4221686898498746],), Reward: -13.4516862973
Action: 25, State: ([-2.0973937681364365, 1.6516322332879785, -2.083437238981354, 0.7065747349465674, -0.8251011835583688, -0.326895263671545, -0.1387859416461123, 0.2559073606661224, -0.027245941927138972, -0.21041570834617976, -0.09389647633980201, 0.4221686898498746],), Reward: -13.4518634351
Action: 25, State: ([-2.0975580671640977, 1.6512039868483273, -2.084798695188349, 0.7205521226846121, -

Action: 25, State: ([-2.097570761458326, 1.6516988727457589, -2.08482654452299, 0.7000139373058231, -0.793266915431724, -0.5112583555848518, -0.138827419664296, 0.25568838876893407, -0.02771315814394304, -0.21041570834617976, -0.09389647633980201, 0.4221686898498746],), Reward: -13.4517354407
Action: 25, State: ([-2.097348597120847, 1.651074375228446, -2.084952211353979, 0.7374712936791831, -0.7990557872914563, -0.512130417630264, -0.13832345206695867, 0.2557660215089949, -0.027212290273243844, -0.21041570834617976, -0.09389647633980201, 0.4221686898498746],), Reward: -13.4515172682
Action: 25, State: ([-2.0973030664436845, 1.6513965126353165, -2.0845100712616587, 0.7299924062298665, -0.80126024471354, -0.4387744907091044, -0.13850736360732657, 0.25565900127703967, -0.027642354935588365, -0.21041570834617976, -0.09389647633980201, 0.4221686898498746],), Reward: -13.4515511148
Action: 25, State: ([-2.097069069436987, 1.6511689096753601, -2.084012253447889, 0.7359494366101301, -0.8452075

Action: 25, State: ([-2.0976071435754218, 1.6516367116859998, -2.083446639875799, 0.6943201718844833, -0.8263045164095074, -0.3285884496904936, -0.13868930556038908, 0.25568150564784836, -0.027738567853412832, -0.21041570834617976, -0.09389647633980201, 0.4221686898498746],), Reward: -13.4516662486
Action: 25, State: ([-2.097420389445909, 1.6512135763308287, -2.0847404787789596, 0.732361089533745, -0.79938217891759, -0.5030934219382479, -0.1388206638292426, 0.25583915401277, -0.027215883406680796, -0.21041570834617976, -0.09389647633980201, 0.4221686898498746],), Reward: -13.4518174273
Action: 25, State: ([-2.096882209608818, 1.6512012994657974, -2.084367859356691, 0.7601469358381894, -0.8315845825404535, -0.3970270466117396, -0.13845082462844957, 0.2556661480934738, -0.027368413752553555, -0.21041570834617976, -0.09389647633980201, 0.4221686898498746],), Reward: -13.4515047635
Action: 25, State: ([-2.097289871476157, 1.6511954994376845, -2.0836994307460177, 0.7142965770658833, -0.8484

Action: 25, State: ([-2.097093649517399, 1.6513919747947474, -2.0840085972874984, 0.7515251587991909, -0.8357104310382446, -0.3962876911514511, -0.13843688913706415, 0.2556765744541671, -0.02774212201057774, -0.21041570834617976, -0.09389647633980201, 0.4221686898498746],), Reward: -13.4515425448
Action: 25, State: ([-2.0969943818607764, 1.6512240013089947, -2.0836025514093572, 0.7429424246535646, -0.8424054539886603, -0.39075498100675454, -0.13879783814961338, 0.25577168438588854, -0.027737423614092427, -0.21041570834617976, -0.09389647633980201, 0.4221686898498746],), Reward: -13.4517965699
Action: 25, State: ([-2.0973679105075895, 1.6517529163388733, -2.0848374761052284, 0.7176171182676869, -0.7892319588494205, -0.5140023820827919, -0.13886990944818867, 0.2558622096984885, -0.027531062746965984, -0.21041570834617976, -0.09389647633980201, 0.4221686898498746],), Reward: -13.4518904958
Action: 25, State: ([-2.09737745855342, 1.651238307727679, -2.084678155105083, 0.7103026580172618, -

Action: 25, State: ([-2.0974305145572973, 1.651631196048375, -2.083394247035529, 0.7172790246389871, -0.8220097093357044, -0.3284875834914081, -0.13888040462216977, 0.2556889163260735, -0.02768859474388391, -0.21041570834617976, -0.09389647633980201, 0.4221686898498746],), Reward: -13.4517587463
Action: 25, State: ([-2.097346284996288, 1.6511736524494198, -2.0848130546799233, 0.7385889478743305, -0.7910868903103269, -0.5043665630381257, -0.1388806606903196, 0.2557688551837207, -0.027552069746169805, -0.21041570834617976, -0.09389647633980201, 0.4221686898498746],), Reward: -13.4518159017
Action: 25, State: ([-2.0973384021694814, 1.651208797003715, -2.084437063585585, 0.7194197460735355, -0.8245248487565331, -0.3932080371054864, -0.13842318820734997, 0.25569083789584957, -0.027363105921265674, -0.21041570834617976, -0.09389647633980201, 0.4221686898498746],), Reward: -13.4515127814
Action: 25, State: ([-2.0970753609993356, 1.6513103209521933, -2.083551110816394, 0.7399252496712522, -0.8

Action: 25, State: ([-2.097339420822694, 1.6512077240477971, -2.0847578381770617, 0.7063604789760526, -0.8033919086525831, -0.43976417306665383, -0.13826240505602008, 0.255820591263366, -0.027230737797375337, -0.21041570834617976, -0.09389647633980201, 0.4221686898498746],), Reward: -13.4515378752
Action: 25, State: ([-2.0971344452489866, 1.6512339076398375, -2.084329648708332, 0.7354950807600293, -0.8291133688666897, -0.3912648012487237, -0.13838933408692478, 0.2556968329904336, -0.027397437819726644, -0.21041570834617976, -0.09389647633980201, 0.4221686898498746],), Reward: -13.4515052046
Action: 25, State: ([-2.0972246071017864, 1.6512067106207713, -2.0836469693560478, 0.729504945632454, -0.8434737518021389, -0.3879333148526536, -0.13874649517209586, 0.25575048752708057, -0.027355078749053685, -0.21041570834617976, -0.09389647633980201, 0.4221686898498746],), Reward: -13.451717611
Action: 25, State: ([-2.0975707614614763, 1.6516988727396633, -2.084826544730859, 0.7000139383295607, -

Action: 25, State: ([-2.0971058418950994, 1.651321709991394, -2.0836047645892926, 0.7401849894993988, -0.8414132604936686, -0.38723929371465704, -0.13880079403163698, 0.25576490169333876, -0.027692450283957226, -0.21041570834617976, -0.09389647633980201, 0.4221686898498746],), Reward: -13.4517877749
Action: 25, State: ([-2.0972329808997774, 1.6516787673404432, -2.0849343006438037, 0.7364219272068718, -0.7944784505869553, -0.5147079140977309, -0.13880079403163698, 0.25576490169333876, -0.027692450283957226, -0.21041570834617976, -0.09389647633980201, 0.4221686898498746],), Reward: -13.4517877749
Action: 25, State: ([-2.0974203894407264, 1.6512135763331885, -2.084740478926266, 0.7323610888104963, -0.7993821798330808, -0.5030934257030089, -0.13848250692309677, 0.2555437933061152, -0.027696645773672757, -0.21041570834617976, -0.09389647633980201, 0.4221686898498746],), Reward: -13.4514436321
Action: 25, State: ([-2.096882209621537, 1.6512012994646375, -2.084367859497976, 0.7601469347024787

Action: 25, State: ([-2.097104227640922, 1.6516617960205293, -2.0848712550187987, 0.7479621884563263, -0.8001457901044571, -0.5227997881203339, -0.1389066771296106, 0.255820905401154, -0.027193780172031945, -0.21041570834617976, -0.09389647633980201, 0.4221686898498746],), Reward: -13.4518402565
Action: 25, State: ([-2.098009420856842, 1.6512302498759635, -2.084599410002877, 0.6869809316432531, -0.8076941100997118, -0.5073295467259031, -0.13855949660406072, 0.2555048664105334, -0.027285962500781358, -0.21041570834617976, -0.09389647633980201, 0.4221686898498746],), Reward: -13.4514074197
Action: 25, State: ([-2.0970936495102066, 1.6513919747950352, -2.084008597353015, 0.7515251590125702, -0.8357104313060251, -0.3962876922355508, -0.13855949660406072, 0.2555048664105334, -0.027285962500781358, -0.21041570834617976, -0.09389647633980201, 0.4221686898498746],), Reward: -13.4514074197
Action: 25, State: ([-2.097277655622671, 1.6517412827027487, -2.08328711086245, 0.7205310906542555, -0.825

Action: 25, State: ([-2.097380429990346, 1.651119371860208, -2.084847236304615, 0.7324289704478671, -0.7997443844988167, -0.5085563777955265, -0.1388371830843255, 0.25587632206423194, -0.027231115987163268, -0.21041570834617976, -0.09389647633980201, 0.4221686898498746],), Reward: -13.4518592166
Action: 25, State: ([-2.0971030408305396, 1.6511938080120014, -2.084286419925501, 0.7356551099821531, -0.8311828816060691, -0.3956992016073112, -0.13845410194543384, 0.2555203511395411, -0.027731781723284166, -0.21041570834617976, -0.09389647633980201, 0.4221686898498746],), Reward: -13.4514129779
Action: 25, State: ([-2.0971715937267934, 1.6513211929524756, -2.0836844047152994, 0.7346559474456001, -0.8440859835560997, -0.3827094523839308, -0.1385839970730692, 0.25567781787414684, -0.02773366527175064, -0.21041570834617976, -0.09389647633980201, 0.4221686898498746],), Reward: -13.4516125858
Action: 25, State: ([-2.097430514555784, 1.651631196048542, -2.083394247062084, 0.7172790246420409, -0.82

Action: 25, State: ([-2.0974336802583498, 1.6511991903940277, -2.0838980484315988, 0.6998982803442426, -0.8388950357603976, -0.3784025378083895, -0.13870737434367883, 0.25572768392603057, -0.02738247670857824, -0.21041570834617976, -0.09389647633980201, 0.4221686898498746],), Reward: -13.4516816491
Action: 25, State: ([-2.0973598840363135, 1.651754276296602, -2.0832830710137316, 0.7175674096156057, -0.8279163872507267, -0.32896564689690844, -0.13888411762483696, 0.2558397568416837, -0.027516234125347427, -0.21041570834617976, -0.09389647633980201, 0.4221686898498746],), Reward: -13.4518762024
Action: 25, State: ([-2.0974843874854674, 1.6511101663178458, -2.0849474950569338, 0.720376941211844, -0.7963777546220427, -0.5071722783776469, -0.13888411762483696, 0.2558397568416837, -0.027516234125347427, -0.21041570834617976, -0.09389647633980201, 0.4221686898498746],), Reward: -13.4518762024
Action: 25, State: ([-2.0971344452485905, 1.6512339076397087, -2.084329648716526, 0.7354950807767933,

Action: 25, State: ([-2.0978017696302587, 1.6511045431608418, -2.084780609412464, 0.6898272472060772, -0.8012715187720791, -0.5034977016314763, -0.13883657326534055, 0.25578583601276494, -0.027578316818000048, -0.21041570834617976, -0.09389647633980201, 0.4221686898498746],), Reward: -13.4518122925
Action: 25, State: ([-2.097444068091151, 1.6513876134599341, -2.084085900961659, 0.7198610057487012, -0.8298326053963454, -0.39431665120616805, -0.13861202095267125, 0.25576685509166747, -0.027324645577650186, -0.21041570834617976, -0.09389647633980201, 0.4221686898498746],), Reward: -13.4516652712
Action: 25, State: ([-2.097125366689699, 1.6511614957401815, -2.0840110977061217, 0.7250673456003986, -0.8391546034409614, -0.38963922784624505, -0.13861202095267125, 0.25576685509166747, -0.027324645577650186, -0.21041570834617976, -0.09389647633980201, 0.4221686898498746],), Reward: -13.4516652712
Action: 25, State: ([-2.097607143567762, 1.6516367116884156, -2.0834466399992895, 0.694320171657395

Action: 25, State: ([-2.097723484091164, 1.6516525009757386, -2.0832524181500363, 0.68619780811511, -0.8353805147137313, -0.3313397430509473, -0.13876373498183156, 0.2557211923148547, -0.02736991358811075, -0.21041570834617976, -0.09389647633980201, 0.4221686898498746],), Reward: -13.4517015493
Action: 25, State: ([-2.09800942085678, 1.6512302498760034, -2.084599410005019, 0.6869809316357625, -0.807694110113076, -0.507329546776792, -0.13829989105833487, 0.2557768357810696, -0.02758761847173319, -0.21041570834617976, -0.09389647633980201, 0.4221686898498746],), Reward: -13.4515507926
Action: 25, State: ([-2.097034562299405, 1.6512146376597832, -2.084665280124919, 0.7464087420836135, -0.8172524101516401, -0.4529160514803473, -0.13829989105833487, 0.2557768357810696, -0.02758761847173319, -0.21041570834617976, -0.09389647633980201, 0.4221686898498746],), Reward: -13.4515507926
Action: 25, State: ([-2.0973000826147405, 1.6511978654380304, -2.083783013818806, 0.7071998836484832, -0.84387730

Action: 25, State: ([-2.097171593726724, 1.65132119295248, -2.0836844047161662, 0.7346559474470366, -0.8440859835624153, -0.38270945239926396, -0.13845319996646854, 0.2557024482263365, -0.02738105302410493, -0.21041570834617976, -0.09389647633980201, 0.4221686898498746],), Reward: -13.4515388545
Action: 25, State: ([-2.0972989707337604, 1.6512570487692768, -2.083510305027181, 0.7197512035548816, -0.8429620137390977, -0.386805881381842, -0.1387846513646557, 0.25573841398702546, -0.02768232841237256, -0.21041570834617976, -0.09389647633980201, 0.4221686898498746],), Reward: -13.4517559752
Action: 25, State: ([-2.097410584069486, 1.6517555005023583, -2.0847565511153903, 0.7273920748889018, -0.7896901171628294, -0.5103624287849016, -0.1387846513646557, 0.25573841398702546, -0.02768232841237256, -0.21041570834617976, -0.09389647633980201, 0.4221686898498746],), Reward: -13.4517559752
Action: 25, State: ([-2.0974398667467486, 1.651277972798117, -2.084679758843702, 0.7052639895253977, -0.8004

Action: 25, State: ([-2.0971344452485763, 1.6512339076397042, -2.0843296487167837, 0.7354950807774043, -0.8291133689102226, -0.3912648014406761, -0.13838933408410817, 0.2556968329898747, -0.027397437819737414, -0.21041570834617976, -0.09389647633980201, 0.4221686898498746],), Reward: -13.4515052046
Action: 25, State: ([-2.0972246071019693, 1.6512067106205066, -2.0836469693664803, 0.7295049456096377, -0.8434737518477595, -0.38793331504527445, -0.13882741955160735, 0.25568838874100075, -0.027713158145649675, -0.21041570834617976, -0.09389647633980201, 0.4221686898498746],), Reward: -13.4517354406
Action: 25, State: ([-2.097236527028821, 1.6517256076048428, -2.083342207558137, 0.7405785442293661, -0.8207822916327874, -0.32711716916442857, -0.13884779014563517, 0.2558633299858605, -0.027549796474929, -0.21041570834617976, -0.09389647633980201, 0.4221686898498746],), Reward: -13.4518827367
Action: 25, State: ([-2.0973485971400656, 1.6510743752259325, -2.0849522116143557, 0.7374712942579873,

KeyboardInterrupt: 

In [50]:
env._actions[25]

(array([-2.0943951,  2.0943951, -2.0943951]),)