In [1]:
import numpy as np
import sys
import robosuite as suite
from robosuite.environments.manipulation.empty import Empty
from scipy import interpolate
from robosuite.utils.mjmod import DynamicsModder

In [2]:
horizon = 2000
num_cloth_joints = 6

In [3]:
suite.environments.base.register_env(Empty)
controller_config = suite.load_controller_config(default_controller="JOINT_POSITION")

In [4]:
# env = suite.make(
#     "Empty",
#     robots="IIWA",             # load a Sawyer robot and a Panda robot
#     gripper_types="ClothGripper",                # use default grippers per robot arm
#     controller_configs=controller_config, 
#     has_renderer=True,                      # on-screen rendering
#     render_camera="sideview",              # visualize the "frontview" camera
#     has_offscreen_renderer=False,           # no off-screen rendering
#     render_collision_mesh=True,
#     control_freq=20,                        # 20 hz control for applied actions
#     horizon=horizon,                            # each episode terminates after 200 steps
#     use_object_obs=False,                   # no observations needed
#     use_camera_obs=False,                   # no observations needed
# )
env = suite.make(
    "Empty",
    robots="IIWA",             # load a Sawyer robot and a Panda robot
    gripper_types="ClothGripper",                # use default grippers per robot arm
    controller_configs=controller_config, 
    has_offscreen_renderer=True,           # no off-screen rendering
    render_collision_mesh=False,
    control_freq=20,                        # 20 hz control for applied actions
    horizon=horizon,                            # each episode terminates after 200 steps
    use_object_obs=False,                   # no observations needed
    use_camera_obs=False,                   # no observations needed
)

In [5]:
intermediate_pose = np.array([0.0, -0.628319, 0.0, 0.523599, 0.0, -0.628319, 0.0])

obs = env.reset()
done = False
for i in range(90):
    jpos = np.array([env.sim.data.get_joint_qpos("robot0_joint_" + str(joint)) for joint in range(1, 8)])
    obs, reward, done, info = env.step(50 * (intermediate_pose - jpos))
    gpos = []
    for j in range(num_cloth_joints):
        pos = env.sim.data.geom_xpos[env.sim.model.geom_name2id("gripper0_geom{}".format(j))]
        gpos.append(pos[0])
        gpos.append(pos[2])
    print(gpos)
#     env.render()

[-0.9787647314386667, 1.160885229828662, -1.0166721930421052, 1.173638061209647, -1.0550000631337495, 1.1850623957506592, -1.0933445209757042, 1.1964307991946606, -1.1314180056758478, 1.208678010971323, -1.1693226218910087, 1.2214393138967716]
[-0.9837667467810888, 1.158966067311188, -1.0218985960955151, 1.1710387097001864, -1.060902885538644, 1.1798904084693633, -1.100165174161825, 1.1875139716282999, -1.139280148606749, 1.1958619810990463, -1.1782495036256297, 1.20486650997374]
[-0.9901395583703778, 1.156286467750572, -1.0284546104396872, 1.1677704317825257, -1.0679213161788912, 1.174269725356539, -1.1077288360019357, 1.1781665576883782, -1.1475378700411136, 1.1820478605624523, -1.1873036563069144, 1.1863504468224249]
[-0.9970012393162573, 1.1532672439302103, -1.0354478600290413, 1.1643055473226105, -1.075148540116724, 1.1691865881816224, -1.1151246006833455, 1.1705541748096087, -1.1551152127412845, 1.1713935805690239, -1.195100143088216, 1.172470580522475]
[-1.0038412000469297, 1.15

[-1.2868537061006498, 0.9216463628104252, -1.3267203047925729, 0.9183823087238366, -1.3654349185603718, 0.9083235748354458, -1.4026685673541746, 0.893706646360917, -1.4389519641453283, 0.8768687668655956, -1.4748823667042665, 0.8592902232677416]
[-1.2915275766272105, 0.9157564392994109, -1.3313668179281541, 0.9121738986441943, -1.370003519007082, 0.9018199242335962, -1.4071283573839417, 0.8869287996984634, -1.4432880443268916, 0.869826866334831, -1.4790895115097165, 0.8519871918065376]
[-1.2961537359865487, 0.9098289453883921, -1.33596308971055, 0.90592829087848, -1.3745197853632176, 0.8952802541451998, -1.4115341521587441, 0.8801166174165356, -1.4475687236121297, 0.8627526098000613, -1.4832399459131915, 0.8446539094136551]
[-1.3007318822387688, 0.9038642671460979, -1.3405088232808235, 0.8996458888648917, -1.3789834346446332, 0.8887049677091132, -1.415885683625561, 0.87327048483042, -1.4517937469525661, 0.8556463506970317, -1.4873334265034923, 0.8372906908146399]
[-1.3052617165458864, 

In [6]:
import numpy as np
from gym import spaces
from gym.core import Env
from robosuite.wrappers import Wrapper


class GymWrapper(Wrapper, Env):
    def __init__(self, env, keys=None):
        # Run super method
        super().__init__(env=env)
        # Create name for gym
        robots = "".join([type(robot.robot_model).__name__ for robot in self.env.robots])
        self.name = robots + "_" + type(self.env).__name__

        # Get reward range
        self.reward_range = (0, self.env.reward_scale)

        # Gym specific attributes
        self.env.spec = None
        self.metadata = None

        # set up observation and action spaces

    def reset(self):
        self.env.reset()
        jpos = [self.env.sim.data.get_joint_qpos("robot0_joint_" + str(joint)) for joint in range(1, 8)]
        gpos = []
        for j in range(num_cloth_joints):
            pos = self.env.sim.data.geom_xpos[self.env.sim.model.geom_name2id("gripper0_geom{}".format(j))]
            gpos.append(pos[0])
            gpos.append(pos[2])
        return np.array(jpos + gpos)

    def step(self, action):
        ob_dict, reward, done, info = self.env.step(action)
        jpos = [self.env.sim.data.get_joint_qpos("robot0_joint_" + str(joint)) for joint in range(1, 8)]
        gpos = []
        for j in range(num_cloth_joints):
            pos = self.env.sim.data.geom_xpos[self.env.sim.model.geom_name2id("gripper0_geom{}".format(j))]
            gpos.append(pos[0])
            gpos.append(pos[2])
        return np.array(jpos + gpos), reward, done, info

In [7]:
from stable_baselines3.common.vec_env import SubprocVecEnv

multi_env = SubprocVecEnv([lambda: GymWrapper(suite.make(
    "Empty",
    robots="IIWA",             # load a Sawyer robot and a Panda robot
    gripper_types="ClothGripper",                # use default grippers per robot arm
    controller_configs=controller_config, 
    has_offscreen_renderer=True,           # no off-screen rendering
    render_collision_mesh=False,
    control_freq=20,                        # 20 hz control for applied actions
    horizon=horizon,                            # each episode terminates after 200 steps
    use_object_obs=False,                   # no observations needed
    use_camera_obs=False,                   # no observations needed
)) for _ in range(10)])

In [8]:
def simulate(actions):
    obs = multi_env.reset()
    for i in range(90):
        obs, reward, done, info = multi_env.step(50 * (intermediate_pose - obs[:, :-num_cloth_joints * 2]))
    done = False
    for action in actions:
        obs, reward, done, info = multi_env.step([action] * 10)
#         env.render()
    return np.mean(obs[:, -num_cloth_joints * 2:], axis=0).tolist()

In [9]:
simulate([0] * 7 for _ in range(100))

[-1.3808792908360648,
 0.7790591848598533,
 -1.419514556278937,
 0.7686999198632518,
 -1.4560280648654125,
 0.752367067421787,
 -1.4905612446034584,
 0.7321814770130839,
 -1.523974138051289,
 0.7101910881308039,
 -1.556994410700236,
 0.6876154314407401]

In [10]:
# Creating window glfw
# 0.3947247381978785 1.4747500155867088
# 0.4325377648709574 1.4618376387611147
# 0.4628294377820218 1.4357794320709718
# 0.48462112128783885 1.4022774211105264
# 0.5000900615573327 1.3654188162897296
# 0.5112945770153463 1.327043015878611
# 0.5197121021754062 1.2879579491907713
# 0.5263528085181368 1.248530098043205
# 0.5318898955244334 1.208930975736482
# 0.5367795093650977 1.1692460094641723
# 0.5413225304158094 1.1295195069109842
# 0.5457894399151935 1.0897842940255975

In [11]:
import itertools

goal_state_x = [-1.4 - 0.04 * i for i in range(num_cloth_joints)]
goal_state_y = [0.6 for i in range(num_cloth_joints)]
goal_state = list(itertools.chain(*zip(goal_state_x, goal_state_y)))
print(goal_state)

def goal_dist(positions):
    return np.sqrt(np.sum(np.square(np.array(positions) - np.array(goal_state))))

def in_goal(positions):
    for i in range(0, len(positions), 2):
        if not (positions[i] <= -1.4):
            return False
        if not (0.6 <= positions[i + 1] <= 0.7):
            return False
    return True
# goal_dist(simulate([[0, 0.8, 0, -0.5, 0, -0.1, 0] for _ in range(200)]))

[-1.4, 0.6, -1.44, 0.6, -1.48, 0.6, -1.52, 0.6, -1.5599999999999998, 0.6, -1.5999999999999999, 0.6]


In [12]:
import collections
import heapq

class PriorityQueue:
    def __init__(self):
        self.elements = []

    def empty(self):
        return len(self.elements) == 0

    def put(self, item, priority):
        heapq.heappush(self.elements, (priority, item))

    def get(self):
        return heapq.heappop(self.elements)[1]

In [13]:
start_position = simulate([0] * 7 for _ in range(100))

def neighbors(positions, actions):
    possible_actions = np.array([[0, 0.1, 0, 0.1, 0, 0.1, 0],
                                [0, -0.1, 0, 0.1, 0, 0.1, 0],
                                [0, 0.1, 0, -0.1, 0, 0.1, 0],
                                [0, -0.1, 0, -0.1, 0, 0.1, 0],
                                [0, 0.1, 0, 0.1, 0, -0.1, 0],
                                [0, -0.1, 0, 0.1, 0, -0.1, 0],
                                [0, 0.1, 0, -0.1, 0, -0.1, 0],
                                [0, -0.1, 0, -0.1, 0, -0.1, 0]]) * 4
    
    for action in possible_actions:
        result = simulate(actions + [action])
        yield result, actions + [action]

def discretize(positions, size=0.0002):
    def round_nearest(x, size):
        return round(x / size) * size
    return tuple([round_nearest(x, size) for x in positions])

In [None]:
queue = PriorityQueue()
queue.put(discretize(start_position), 0)
actions_so_far = {discretize(start_position): []}
cost_so_far = {discretize(start_position): 0}

while not queue.empty():
    curr_pose = queue.get()

    if in_goal(curr_pose):
        print("done", curr_pose)
        break

    for next_pose, actions in neighbors(curr_pose, actions_so_far[curr_pose]):
        next_pose = discretize(next_pose)
        new_cost = cost_so_far[curr_pose] + 0.001
        if next_pose not in cost_so_far or new_cost < cost_so_far[next_pose]:
            cost_so_far[next_pose] = new_cost
            actions_so_far[next_pose] = actions
            priority = new_cost + goal_dist(next_pose)
            queue.put(next_pose, priority)
            print(goal_dist(next_pose), actions)


0.355376195038441 [array([0. , 0.4, 0. , 0.4, 0. , 0.4, 0. ])]
0.3532672076488278 [array([ 0. , -0.4,  0. ,  0.4,  0. ,  0.4,  0. ])]
0.35787919749546776 [array([ 0. ,  0.4,  0. , -0.4,  0. ,  0.4,  0. ])]
0.354628876432814 [array([ 0. , -0.4,  0. , -0.4,  0. ,  0.4,  0. ])]
0.355523332567639 [array([ 0. ,  0.4,  0. ,  0.4,  0. , -0.4,  0. ])]
0.35045798606965717 [array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ])]
0.3572782669013049 [array([ 0. ,  0.4,  0. , -0.4,  0. , -0.4,  0. ])]
0.3537315931606902 [array([ 0. , -0.4,  0. , -0.4,  0. , -0.4,  0. ])]
0.3469090370687971 [array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([0. , 0.4, 0. , 0.4, 0. , 0.4, 0. ])]
0.342550434242901 [array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. ,  0.4,  0. ])]
0.34897810819591535 [array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. ,  0.4,  0. ])]
0.3456626100694144 [array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0.

0.28962700150365817 [array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ])]
0.2951304118521167 [array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. , -0.4,  0. ])]
0.2916486242038526 [array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. , -0.4,  0. , -0.4,  0. ])]
0.28143063088441533 [

0.2586799567032592 [array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. ,  0.4,  0. ])]
0.2548306104062069 [array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. ,  0.4,  0. ]), array([ 0. , -0.4,  0. , -0.4,  0. ,  0.4,  0. ])]
0.25564138945014364 [array([ 0. , -0.4,  0

0.2352812784732351 [array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. ,  0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. ,  0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. ,  0.4,  0. ])]
0.23196103120998587 [array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. ,  0.4

0.21064387007458826 [array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. ,  0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. ,  0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. ,  0.4,  0. ]), array([0. , 0.4, 0. , 0.4, 0. , 0.4, 0. ])]
0.2068182777222556 [array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]

0.1977838213808198 [array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. ,  0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. ,  0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. ,  0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. ,  0.4,  0. ,  0.4,  0. , -0.4,  0. ])]
0.19538710295206294 [array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4

0.1924999740259723 [array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. ,  0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. ,  0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. ,  0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. , -0.4,  0. , -0.4,  0. ])]
0.18926278028180823 [array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4

0.18623426107996358 [array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. ,  0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. ,  0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. ,  0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. ,  0.4,  0. ])]
0.18902243253116816 [array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.

0.18518056053484663 [array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. ,  0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. ,  0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. ,  0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. , -0.4,  0. ,  0.4,  0. ])]
0.18411898326897208 [array([ 0. , -0.4,  0. ,  0.4,  0. , -0.

0.1827224124184004 [array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. ,  0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. ,  0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. ,  0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. ,  0.4,  0. ,  0.4,  0. , -0.4,  0. ])]
0.181785257

0.1804397960539748 [array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. ,  0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. ,  0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. ,  0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. ,  0.4,  0. ]), array([ 0. 

0.1784380004371267 [array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. ,  0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. ,  0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. ,  0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. ,  0.4,  0. ]), array([ 0. 

0.17619738931096576 [array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. ,  0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. ,  0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. ,  0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. ,  0.4,  0. ]), array([ 0.

0.17553051016846055 [array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. ,  0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. ,  0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. ,  0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. ,  0.4,  0. ]), array([ 0.

0.17437522759841784 [array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. ,  0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. ,  0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. ,  0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. ,  0.4,  0. ]), array([ 0.

0.17394987783841645 [array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. ,  0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. ,  0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. ,  0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. ,  0.4,  0. ]), array([ 0.

0.16949654863742808 [array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. ,  0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. ,  0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. ,  0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. ,  0.4,  0. ]), array([ 0.

0.16922245713852538 [array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. ,  0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. ,  0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. ,  0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. ,  0.4,  0. ]), array([ 0.

0.17029762182719996 [array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. ,  0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. ,  0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. ,  0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. ,  0.4,  0. ]), array([ 0.

0.16683416916207547 [array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. ,  0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. ,  0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. ,  0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. ,  0.4,  0. ]), array([ 0.

0.16761205207263594 [array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. ,  0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. ,  0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. ,  0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. ,  0.4,  0. ]), array([ 0.

0.16378766742340534 [array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. ,  0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. ,  0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. ,  0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. ,  0.4,  0. ]), array([ 0.

In [None]:
found_actions = "[array([ 0. ,  0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([0. , 0.4, 0. , 0.4, 0. , 0.4, 0. ]), array([0. , 0.4, 0. , 0.4, 0. , 0.4, 0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([0. , 0.4, 0. , 0.4, 0. , 0.4, 0. ]), array([0. , 0.4, 0. , 0.4, 0. , 0.4, 0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([0. , 0.4, 0. , 0.4, 0. , 0.4, 0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. , -0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. , -0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. , -0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([0. , 0.4, 0. , 0.4, 0. , 0.4, 0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([0. , 0.4, 0. , 0.4, 0. , 0.4, 0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([0. , 0.4, 0. , 0.4, 0. , 0.4, 0. ]), array([0. , 0.4, 0. , 0.4, 0. , 0.4, 0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. , -0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. , -0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([0. , 0.4, 0. , 0.4, 0. , 0.4, 0. ]), array([ 0. ,  0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([0. , 0.4, 0. , 0.4, 0. , 0.4, 0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([0. , 0.4, 0. , 0.4, 0. , 0.4, 0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. ,  0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([0. , 0.4, 0. , 0.4, 0. , 0.4, 0. ]), array([0. , 0.4, 0. , 0.4, 0. , 0.4, 0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. , -0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. , -0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. , -0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([0. , 0.4, 0. , 0.4, 0. , 0.4, 0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([0. , 0.4, 0. , 0.4, 0. , 0.4, 0. ]), array([0. , 0.4, 0. , 0.4, 0. , 0.4, 0. ]), array([ 0. ,  0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([0. , 0.4, 0. , 0.4, 0. , 0.4, 0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. ,  0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. , -0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([0. , 0.4, 0. , 0.4, 0. , 0.4, 0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. ,  0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. ,  0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([0. , 0.4, 0. , 0.4, 0. , 0.4, 0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. ,  0.4,  0. ]), array([0. , 0.4, 0. , 0.4, 0. , 0.4, 0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([0. , 0.4, 0. , 0.4, 0. , 0.4, 0. ]), array([ 0. , -0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([0. , 0.4, 0. , 0.4, 0. , 0.4, 0. ]), array([ 0. , -0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([0. , 0.4, 0. , 0.4, 0. , 0.4, 0. ]), array([ 0. , -0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([0. , 0.4, 0. , 0.4, 0. , 0.4, 0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. ,  0.4,  0. ]), array([0. , 0.4, 0. , 0.4, 0. , 0.4, 0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. , -0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. , -0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. , -0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([0. , 0.4, 0. , 0.4, 0. , 0.4, 0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. , -0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. ,  0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([0. , 0.4, 0. , 0.4, 0. , 0.4, 0. ]), array([ 0. , -0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. ,  0.4,  0. ]), array([ 0. , -0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([0. , 0.4, 0. , 0.4, 0. , 0.4, 0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([0. , 0.4, 0. , 0.4, 0. , 0.4, 0. ]), array([ 0. , -0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([0. , 0.4, 0. , 0.4, 0. , 0.4, 0. ]), array([0. , 0.4, 0. , 0.4, 0. , 0.4, 0. ]), array([ 0. ,  0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([0. , 0.4, 0. , 0.4, 0. , 0.4, 0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. ,  0.4,  0. ]), array([ 0. , -0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ])]"

In [None]:
def simulate_render(actions):
    all_geom_positions = []
    for _ in range(10):
        obs = env.reset()
        done = False
        for action in actions:
            obs, reward, done, info = env.step(action)
            env.render()
        geom_positions = []
        for j in range(num_cloth_joints):
            pos = env.sim.data.geom_xpos[env.sim.model.geom_name2id("gripper0_geom{}".format(j))]
            geom_positions.append(pos[0])
            geom_positions.append(pos[2])
        all_geom_positions.append(geom_positions)
    return np.mean(np.array(all_geom_positions), axis=0).tolist()

env = suite.make(
    "Empty",
    robots="IIWA",             # load a Sawyer robot and a Panda robot
    gripper_types="ClothGripper",                # use default grippers per robot arm
    controller_configs=controller_config, 
    has_renderer=True,                      # on-screen rendering
    render_camera="sideview",              # visualize the "frontview" camera
    has_offscreen_renderer=False,           # no off-screen rendering
    render_collision_mesh=True,
    control_freq=20,                        # 20 hz control for applied actions
    horizon=horizon,                            # each episode terminates after 200 steps
    use_object_obs=False,                   # no observations needed
    use_camera_obs=False,                   # no observations needed
)

found_actions = eval(found_actions.replace("array", "").replace("(", "").replace(")", ""))
simulate_render(found_actions)