In [1]:
import numpy as np
import sys
import robosuite as suite
from robosuite.environments.manipulation.empty import Empty
from scipy import interpolate
from robosuite.utils.mjmod import DynamicsModder

In [2]:
horizon = 2000
num_cloth_joints = 6

In [3]:
suite.environments.base.register_env(Empty)
controller_config = suite.load_controller_config(default_controller="JOINT_POSITION")

In [4]:
env = suite.make(
    "Empty",
    robots="IIWA",             # load a Sawyer robot and a Panda robot
    gripper_types="ClothGripper",                # use default grippers per robot arm
    controller_configs=controller_config, 
    has_renderer=True,                      # on-screen rendering
    render_camera="sideview",              # visualize the "frontview" camera
    has_offscreen_renderer=False,           # no off-screen rendering
    render_collision_mesh=True,
    control_freq=20,                        # 20 hz control for applied actions
    horizon=horizon,                            # each episode terminates after 200 steps
    use_object_obs=False,                   # no observations needed
    use_camera_obs=False,                   # no observations needed
)
# env = suite.make(
#     "Empty",
#     robots="IIWA",             # load a Sawyer robot and a Panda robot
#     gripper_types="ClothGripper",                # use default grippers per robot arm
#     controller_configs=controller_config, 
#     has_offscreen_renderer=True,           # no off-screen rendering
#     render_collision_mesh=False,
#     control_freq=20,                        # 20 hz control for applied actions
#     horizon=horizon,                            # each episode terminates after 200 steps
#     use_object_obs=False,                   # no observations needed
#     use_camera_obs=False,                   # no observations needed
# )

Creating window glfw


In [5]:
intermediate_pose = np.array([0.0, -0.628319, 0.0, 0.523599, 0.0, -0.628319, 0.0])

obs = env.reset()
done = False
for i in range(90):
    jpos = np.array([env.sim.data.get_joint_qpos("robot0_joint_" + str(joint)) for joint in range(1, 8)])
    obs, reward, done, info = env.step(50 * (intermediate_pose - jpos))
    gpos = []
    for j in range(num_cloth_joints):
        pos = env.sim.data.geom_xpos[env.sim.model.geom_name2id("gripper0_geom{}".format(j))]
        gpos.append(pos[0])
        gpos.append(pos[2])
    print(gpos)
    env.render()

Creating window glfw
[-0.9503374169197197, 1.1684455607299662, -0.988120779317216, 1.1815719809868999, -1.0264147300629334, 1.193124933675707, -1.0647541504915528, 1.2045261333225348, -1.1028111377641128, 1.21683699751637, -1.1406785346908888, 1.2297190596667957]
[-0.9527697907574432, 1.1691593051855325, -0.9907214853031358, 1.1817919280058742, -1.0297138275229396, 1.1907098061355497, -1.069039081878894, 1.1980216262582657, -1.1082612842557324, 1.2058673173330665, -1.147358917119503, 1.2143117536737569]
[-0.9565256078758991, 1.1692217076826095, -0.9945869819084606, 1.1815215391088563, -1.0339995360417642, 1.1883490850946805, -1.0738179170456494, 1.1921526510172598, -1.1136599015997422, 1.1957005129577118, -1.1534710258897047, 1.199579278397525]
[-0.9613386120971691, 1.1684626334036796, -0.9994544768806896, 1.1805938716787865, -1.0390307783254846, 1.186399529150165, -1.078975471755046, 1.188500159214905, -1.118949156093978, 1.1899479396151131, -1.1589155967031892, 1.1915834941491614]
[-

[-1.186199692845006, 1.0289193745166825, -1.226085450211737, 1.0319402764802172, -1.26587877967227, 1.0278794521303727, -1.3048703874034706, 1.0189545532561586, -1.3432451647215027, 1.0076684675336436, -1.3813835903958698, 0.9956077982343741]
[-1.1917753636010369, 1.0238085426313541, -1.2316843372418171, 1.0265054523795156, -1.2714437929995621, 1.022125377710027, -1.31036293658856, 1.0128896011139403, -1.348645600118677, 1.0012949061058107, -1.386684894757203, 0.9889251310656065]
[-1.1973090780125082, 1.0186528331681277, -1.237238622441494, 1.021025804012419, -1.2769617609879, 1.016327756292455, -1.3158063655616743, 1.0067833330013716, -1.3539951043820655, 0.9948829329869655, -1.391933565342487, 0.9822072686995392]
[-1.2028004820647096, 1.0134525872872309, -1.2427479564406263, 1.015501686950666, -1.2824323555360353, 1.0104869218839705, -1.3212003715417746, 1.0006360199651243, -1.3592933989071434, 0.9884327182398801, -1.3971293466026609, 0.9754542606680932]
[-1.2082492244311718, 1.00820

[-1.3657921023306645, 0.8062408679709473, -1.404740020488505, 0.7971271733006716, -1.4416736841716908, 0.7817680005155417, -1.476622238225077, 0.7623103994920125, -1.5103827514796226, 0.7408574883072436, -1.5437072244881884, 0.7187333126686757]
[-1.3695478255735372, 0.7996874757571659, -1.4084218911084938, 0.7902637401455936, -1.4452403984328621, 0.7746305310694268, -1.4800510409596348, 0.7549272612534913, -1.5136632688791125, 0.7332427553886971, -1.5468358652757734, 0.7108915025343759]
[-1.3732501517508597, 0.7931040233762627, -1.412047926640471, 0.7833709646416336, -1.448749417130958, 0.7674649812021356, -1.4834207396576384, 0.7475175734698716, -1.5168835060735315, 0.7256031277600558, -1.5499031287584795, 0.7030264986864457]
[-1.3766917097324798, 0.7868700500103817, -1.4154097084365507, 0.7768243370256233, -1.4519750649410508, 0.760607850899774, -1.4864850967199845, 0.7403826835708704, -1.519793486225672, 0.7182343007436427, -1.5526629958425051, 0.6954396740384292]
[-1.37941427747280

In [6]:
import numpy as np
from gym import spaces
from gym.core import Env
from robosuite.wrappers import Wrapper


class GymWrapper(Wrapper, Env):
    def __init__(self, env, keys=None):
        # Run super method
        super().__init__(env=env)
        # Create name for gym
        robots = "".join([type(robot.robot_model).__name__ for robot in self.env.robots])
        self.name = robots + "_" + type(self.env).__name__

        # Get reward range
        self.reward_range = (0, self.env.reward_scale)

        # Gym specific attributes
        self.env.spec = None
        self.metadata = None

        # set up observation and action spaces

    def reset(self):
        self.env.reset()
        jpos = [self.env.sim.data.get_joint_qpos("robot0_joint_" + str(joint)) for joint in range(1, 8)]
        gpos = []
        for j in range(num_cloth_joints):
            pos = self.env.sim.data.geom_xpos[self.env.sim.model.geom_name2id("gripper0_geom{}".format(j))]
            gpos.append(pos[0])
            gpos.append(pos[2])
        return np.array(jpos + gpos)

    def step(self, action):
        ob_dict, reward, done, info = self.env.step(action)
        jpos = [self.env.sim.data.get_joint_qpos("robot0_joint_" + str(joint)) for joint in range(1, 8)]
        gpos = []
        for j in range(num_cloth_joints):
            pos = self.env.sim.data.geom_xpos[self.env.sim.model.geom_name2id("gripper0_geom{}".format(j))]
            gpos.append(pos[0])
            gpos.append(pos[2])
        return np.array(jpos + gpos), reward, done, info

In [7]:
from stable_baselines3.common.vec_env import SubprocVecEnv

multi_env = SubprocVecEnv([lambda: GymWrapper(suite.make(
    "Empty",
    robots="IIWA",             # load a Sawyer robot and a Panda robot
    gripper_types="ClothGripper",                # use default grippers per robot arm
    controller_configs=controller_config, 
    has_offscreen_renderer=True,           # no off-screen rendering
    render_collision_mesh=False,
    control_freq=20,                        # 20 hz control for applied actions
    horizon=horizon,                            # each episode terminates after 200 steps
    use_object_obs=False,                   # no observations needed
    use_camera_obs=False,                   # no observations needed
)) for _ in range(10)])

ConnectionResetError: [Errno 104] Connection reset by peer

In [None]:
def simulate(actions):
    obs = multi_env.reset()
    for i in range(90):
        obs, reward, done, info = multi_env.step(50 * (intermediate_pose - obs[:, :-num_cloth_joints * 2]))
    done = False
    for action in actions:
        obs, reward, done, info = multi_env.step([action] * 10)
#         env.render()
    return np.mean(obs[:, -num_cloth_joints * 2:], axis=0).tolist()

In [None]:
simulate([0] * 7 for _ in range(100))

In [None]:
# Creating window glfw
# 0.3947247381978785 1.4747500155867088
# 0.4325377648709574 1.4618376387611147
# 0.4628294377820218 1.4357794320709718
# 0.48462112128783885 1.4022774211105264
# 0.5000900615573327 1.3654188162897296
# 0.5112945770153463 1.327043015878611
# 0.5197121021754062 1.2879579491907713
# 0.5263528085181368 1.248530098043205
# 0.5318898955244334 1.208930975736482
# 0.5367795093650977 1.1692460094641723
# 0.5413225304158094 1.1295195069109842
# 0.5457894399151935 1.0897842940255975

In [None]:
import itertools

goal_state_x = [0.4 + 0.04 * i for i in range(num_cloth_joints)]
goal_state_y = [0.6 for i in range(num_cloth_joints)]
goal_state = list(itertools.chain(*zip(goal_state_x, goal_state_y)))
goal_state = [0.36413204131356947, 0.6430304302386096, 0.40393953943417493, 0.6391108573831975, 0.443414477257156, 0.6326510398584807, 0.48268256039159285, 0.625034110955667, 0.5219231742213934, 0.6172769088005544, 0.5611924926306847, 0.6096663509608844]
print(goal_state)

def goal_dist(positions):
    return np.sqrt(np.sum(np.square(np.array(positions) - np.array(goal_state))))

def in_goal(positions):
    for i in range(0, len(positions), 2):
        if not (0.4 <= positions[i]):
            return False
        if not (0.5 <= positions[i + 1] <= 0.6):
            return False
    return True
# goal_dist(simulate([[0, 0.8, 0, -0.5, 0, -0.1, 0] for _ in range(200)]))

In [None]:
import collections
import heapq

class PriorityQueue:
    def __init__(self):
        self.elements = []

    def empty(self):
        return len(self.elements) == 0

    def put(self, item, priority):
        heapq.heappush(self.elements, (priority, item))

    def get(self):
        return heapq.heappop(self.elements)[1]

In [None]:
env.reset()
start_position = []
for j in range(num_cloth_joints):
    pos = env.sim.data.geom_xpos[env.sim.model.geom_name2id("gripper0_geom{}".format(j))]
    start_position.append(pos[0])
    start_position.append(pos[2])

def neighbors(positions, actions):
    possible_actions = np.array([[0, 0.1, 0, 0.1, 0, 0.1, 0],
                                [0, -0.1, 0, 0.1, 0, 0.1, 0],
                                [0, 0.1, 0, -0.1, 0, 0.1, 0],
                                [0, -0.1, 0, -0.1, 0, 0.1, 0],
                                [0, 0.1, 0, 0.1, 0, -0.1, 0],
                                [0, -0.1, 0, 0.1, 0, -0.1, 0],
                                [0, 0.1, 0, -0.1, 0, -0.1, 0],
                                [0, -0.1, 0, -0.1, 0, -0.1, 0]]) * 4
    
    for action in possible_actions:
        result = simulate(actions + [action])
        yield result, actions + [action]

def discretize(positions, size=0.0002):
    def round_nearest(x, size):
        return round(x / size) * size
    return tuple([round_nearest(x, size) for x in positions])

In [None]:
queue = PriorityQueue()
queue.put(discretize(start_position), 0)
actions_so_far = {discretize(start_position): []}
cost_so_far = {discretize(start_position): 0}

while not queue.empty():
    curr_pose = queue.get()

    if in_goal(curr_pose):
        print("done", curr_pose)
        break

    for next_pose, actions in neighbors(curr_pose, actions_so_far[curr_pose]):
        next_pose = discretize(next_pose)
        new_cost = cost_so_far[curr_pose] + 0.001
        if next_pose not in cost_so_far or new_cost < cost_so_far[next_pose]:
            cost_so_far[next_pose] = new_cost
            actions_so_far[next_pose] = actions
            priority = new_cost + goal_dist(next_pose)
            queue.put(next_pose, priority)
            print(goal_dist(next_pose), actions)


In [None]:
found_actions = "[array([ 0. ,  0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([0. , 0.4, 0. , 0.4, 0. , 0.4, 0. ]), array([0. , 0.4, 0. , 0.4, 0. , 0.4, 0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([0. , 0.4, 0. , 0.4, 0. , 0.4, 0. ]), array([0. , 0.4, 0. , 0.4, 0. , 0.4, 0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([0. , 0.4, 0. , 0.4, 0. , 0.4, 0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. , -0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. , -0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. , -0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([0. , 0.4, 0. , 0.4, 0. , 0.4, 0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([0. , 0.4, 0. , 0.4, 0. , 0.4, 0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([0. , 0.4, 0. , 0.4, 0. , 0.4, 0. ]), array([0. , 0.4, 0. , 0.4, 0. , 0.4, 0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. , -0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. , -0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([0. , 0.4, 0. , 0.4, 0. , 0.4, 0. ]), array([ 0. ,  0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([0. , 0.4, 0. , 0.4, 0. , 0.4, 0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([0. , 0.4, 0. , 0.4, 0. , 0.4, 0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. ,  0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([0. , 0.4, 0. , 0.4, 0. , 0.4, 0. ]), array([0. , 0.4, 0. , 0.4, 0. , 0.4, 0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. , -0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. , -0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. , -0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([0. , 0.4, 0. , 0.4, 0. , 0.4, 0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([0. , 0.4, 0. , 0.4, 0. , 0.4, 0. ]), array([0. , 0.4, 0. , 0.4, 0. , 0.4, 0. ]), array([ 0. ,  0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([0. , 0.4, 0. , 0.4, 0. , 0.4, 0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. ,  0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. , -0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([0. , 0.4, 0. , 0.4, 0. , 0.4, 0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. ,  0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. ,  0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([0. , 0.4, 0. , 0.4, 0. , 0.4, 0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. ,  0.4,  0. ]), array([0. , 0.4, 0. , 0.4, 0. , 0.4, 0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([0. , 0.4, 0. , 0.4, 0. , 0.4, 0. ]), array([ 0. , -0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([0. , 0.4, 0. , 0.4, 0. , 0.4, 0. ]), array([ 0. , -0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([0. , 0.4, 0. , 0.4, 0. , 0.4, 0. ]), array([ 0. , -0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([0. , 0.4, 0. , 0.4, 0. , 0.4, 0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. ,  0.4,  0. ]), array([0. , 0.4, 0. , 0.4, 0. , 0.4, 0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. , -0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. , -0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. , -0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([0. , 0.4, 0. , 0.4, 0. , 0.4, 0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. , -0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. ,  0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([0. , 0.4, 0. , 0.4, 0. , 0.4, 0. ]), array([ 0. , -0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. ,  0.4,  0. ]), array([ 0. , -0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([0. , 0.4, 0. , 0.4, 0. , 0.4, 0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([0. , 0.4, 0. , 0.4, 0. , 0.4, 0. ]), array([ 0. , -0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([0. , 0.4, 0. , 0.4, 0. , 0.4, 0. ]), array([0. , 0.4, 0. , 0.4, 0. , 0.4, 0. ]), array([ 0. ,  0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. , -0.4,  0. ,  0.4,  0. ]), array([ 0. ,  0.4,  0. ,  0.4,  0. , -0.4,  0. ]), array([0. , 0.4, 0. , 0.4, 0. , 0.4, 0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. ,  0.4,  0. ]), array([ 0. , -0.4,  0. , -0.4,  0. , -0.4,  0. ]), array([ 0. , -0.4,  0. ,  0.4,  0. , -0.4,  0. ])]"

In [None]:
def simulate_render(actions):
    all_geom_positions = []
    for _ in range(10):
        obs = env.reset()
        done = False
        for action in actions:
            obs, reward, done, info = env.step(action)
            env.render()
        geom_positions = []
        for j in range(num_cloth_joints):
            pos = env.sim.data.geom_xpos[env.sim.model.geom_name2id("gripper0_geom{}".format(j))]
            geom_positions.append(pos[0])
            geom_positions.append(pos[2])
        all_geom_positions.append(geom_positions)
    return np.mean(np.array(all_geom_positions), axis=0).tolist()

env = suite.make(
    "Empty",
    robots="IIWA",             # load a Sawyer robot and a Panda robot
    gripper_types="ClothGripper",                # use default grippers per robot arm
    controller_configs=controller_config, 
    has_renderer=True,                      # on-screen rendering
    render_camera="sideview",              # visualize the "frontview" camera
    has_offscreen_renderer=False,           # no off-screen rendering
    render_collision_mesh=True,
    control_freq=20,                        # 20 hz control for applied actions
    horizon=horizon,                            # each episode terminates after 200 steps
    use_object_obs=False,                   # no observations needed
    use_camera_obs=False,                   # no observations needed
)

found_actions = eval(found_actions.replace("array", "").replace("(", "").replace(")", ""))
simulate_render(found_actions)