In [18]:
import metaworld
# import environments.metaworld_envs
import gym
from environments.parallel_envs import make_vec_envs, make_env


In [None]:
import gym
import random

from environments.env_utils.vec_env.dummy_vec_env import DummyVecEnv
from environments.env_utils.vec_env.subproc_vec_env import SubprocVecEnv
from environments.env_utils.vec_env.vec_normalize import VecNormalize
from environments.wrappers import TimeLimitMask, VariBadWrapper


def make_env(env_id, seed, rank, episodes_per_task, tasks, add_done_info, **kwargs):
    def _thunk():

        env = gym.make(env_id, **kwargs)
        if tasks is not None:
            env.unwrapped.reset_task = lambda x: env.unwrapped.set_task(random.choice(tasks))
        if seed is not None:
            env.seed(seed + rank)
        if str(env.__class__.__name__).find('TimeLimit') >= 0:
            env = TimeLimitMask(env)
        env = VariBadWrapper(env=env, episodes_per_task=episodes_per_task, add_done_info=add_done_info)
        return env

    return _thunk

def make_metaworld_env(env_id, task_id, seed, rank, episodes_per_task,add_done_info, **kwargs):
    def _thunk():
        env = gym.make(env_id, **kwargs)
        env.set_benchmark_task(task_id)
        if seed is not None:
            env.seed(seed + rank)
        if str(env.__class__.__name__).find('TimeLimit') >= 0:
            env = TimeLimitMask(env)
        env = VariBadWrapper(env=env, episodes_per_task=episodes_per_task, add_done_info=add_done_info)
        return env

    return _thunk

def make_vec_envs(env_name, seed, num_processes, gamma,
                  device, episodes_per_task,
                  normalise_rew, ret_rms, tasks,
                  rank_offset=0,
                  add_done_info=None,
                  **kwargs):
    """
    :param ret_rms: running return and std for rewards
    """
    ## hacky work around
    if 'ML10' in env_name:
            envs = [make_metaworld_env(env_id=env_name, 
                    task_id = i,
                    seed=seed, rank=rank_offset + i,
                     episodes_per_task=episodes_per_task,
                     add_done_info=add_done_info,
                     **kwargs)
            for i in range(num_processes)]
    else:
        envs = [make_env(env_id=env_name, seed=seed, rank=rank_offset + i,
                        episodes_per_task=episodes_per_task,
                        tasks=tasks,
                        add_done_info=add_done_info,
                        **kwargs)
                for i in range(num_processes)]

    if len(envs) > 1:
        envs = SubprocVecEnv(envs)
    else:
        envs = DummyVecEnv(envs)

    if len(envs.observation_space.shape) == 1:
        if gamma is None:
            envs = VecNormalize(envs, normalise_rew=normalise_rew, ret_rms=ret_rms)
        else:
            envs = VecNormalize(envs, normalise_rew=normalise_rew, ret_rms=ret_rms, gamma=gamma)

    envs = VecPyTorch(envs, device)

    return envs

In [None]:

class TestML10Env(gym.Env):

    def __init__(self):
        # initialise blank env
        self.benchmark = metaworld.ML10()
        self.task_names = list(self.benchmark.train_classes.keys())

        # set a random task from the benchmark
        self.set_task()

        # requires self.set_task()
        self.observation_space = self.env.observation_space
        self.action_space = self.env.action_space

        # metaworld max steps - hardcoded
        self._max_episode_steps = 500

    def set_benchmark_task(self, task_id):
        self.env_name = self.task_names[task_id]
        self.env_cls = self.benchmark.train_classes[self.env_name]
        self.env = self.env_cls()

    def step(self, action):
        obs, reward, terminated, truncated, info = self.env.step(action)
        done = terminated or truncated
        info['task'] = self.task
        return obs, reward, done, info
    
    def reset(self):
        obs, _ = self.env.reset()
        return obs
    
    def get_task(self):
        return self.env_name, self.env_cls
    
    ## reset_task is automatically created in make_env using set_task
    def set_task(self, task = None):
        if task is None:
            task = random.choice(
                [task for task in self.benchmark.train_tasks if task.env_name==self.env_name]
                )

        self.task = task
        self.env.set_task(self.task)

    # duplicated for varibad temporarily
    def reset_task(self, task = None):
        if task is None:
            task = random.choice(
                [task for task in self.benchmark.train_tasks if task.env_name==self.env_name]
                )

        self.task = task
        self.env.set_task(self.task)

In [3]:
ml10 = metaworld.ML10()

In [50]:
import random
task_id = 2
task_names = list(ml10.train_classes.keys())
# env_name, env_cls = 
env_name = task_names[task_id]
env_cls = ml10.train_classes[env_name]
task = random.choice([task for task in ml10.train_tasks if task.env_name==env_name])
print(task_names, env_name, env_cls, task, sep = '\n')


['reach-v2', 'push-v2', 'pick-place-v2', 'door-open-v2', 'drawer-close-v2', 'button-press-topdown-v2', 'peg-insert-side-v2', 'window-open-v2', 'sweep-v2', 'basketball-v2']
pick-place-v2
<class 'metaworld.envs.mujoco.sawyer_xyz.v2.sawyer_pick_place_v2.SawyerPickPlaceEnvV2'>
Task(env_name='pick-place-v2', data=b'\x80\x04\x95=\x01\x00\x00\x00\x00\x00\x00}\x94(\x8c\x08rand_vec\x94\x8c\x15numpy.core.multiarray\x94\x8c\x0c_reconstruct\x94\x93\x94\x8c\x05numpy\x94\x8c\x07ndarray\x94\x93\x94K\x00\x85\x94C\x01b\x94\x87\x94R\x94(K\x01K\x06\x85\x94h\x05\x8c\x05dtype\x94\x93\x94\x8c\x02f8\x94\x89\x88\x87\x94R\x94(K\x03\x8c\x01<\x94NNNJ\xff\xff\xff\xffJ\xff\xff\xff\xffK\x00t\x94b\x89C0z9\xbb\x91\x07\xfc\xb5?\x8a\xf1\xbd\xf0b\x19\xe6?\x00\x00\x00@\xe1z\x94?;ll\x0cPn\xa4\xbf\xda,*\xb0\x9e\x7f\xea?N\x1f[\xc9\xdd\xfa\xd1?\x94t\x94b\x8c\x07env_cls\x94\x8c8metaworld.envs.mujoco.sawyer_xyz.v2.sawyer_pick_place_v2\x94\x8c\x14SawyerPickPlaceEnvV2\x94\x93\x94\x8c\x14partially_observable\x94\x88u.')


In [20]:
'ML10' in 'CustomML10-v2'

True

In [4]:
ml10_env = gym.make('ML10-v2')

In [12]:
# env = gym.make('ML10-v2')
varibad_env = make_env('ML10-v2', 1, 1, 2, tasks = None, add_done_info =True)

In [13]:
vbad_ml10 = varibad_env()

In [14]:
vbad_ml10.done_mdp

True

In [15]:
vbad_ml10.reset_mdp()


array([ 0.00615235,  0.6001898 ,  0.19430117,  1.        , -0.03745555,
        0.68238707,  0.01987216,  0.        ,  0.        ,  0.        ,
        1.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.00615235,  0.6001898 ,
        0.19430117,  1.        , -0.03745555,  0.68238707,  0.01987216,
        0.        ,  0.        ,  0.        ,  1.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ])

In [16]:
vbad_ml10.observation_space


Box(40,)

In [17]:
vbad_ml10.episodes_per_task

2