In [None]:
import os
import time
import numpy as np
import gymnasium as gym
import gymnasium_robotics
from gymnasium.wrappers import RecordVideo
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
from sb3_contrib import TQC
import my_envs
from wrappers import ActiveObjectWrapper, ManualGoalWrapper

gym.register_envs(gymnasium_robotics)

LOG_DIR = 'logs_testing'
# RUN_ID = os.getenv('RESUME_ID', None)
# assert RUN_ID is not None,  "Set RESUME_ID to the run folder name (e.g., 08vger36)"

ENV_ID = 'MultiObjectFetchPickAndPlace-v0'
# MultiObjectFetchPickAndPlace-v0

# ENV_ID = 'FetchPickAndPlace-v4'
MODEL_PATH = os.path.join('models', 'tqcdense_model.zip')
VECNORM_PATH = os.path.join('models', 'tqcdense_vecnorm.pkl')
VIDEO_DIR = os.path.join(LOG_DIR, "inference_videos")


def make_eval_env():
    env = gym.make(
        ENV_ID,
        render_mode = 'human', 
        reward_type = 'dense', 
        n_objects = 4
    )
    # os.makedirs(VIDEO_DIR, exist_ok=True)

    # env = RecordVideo(
    #     env,
    #     video_folder=VIDEO_DIR,
    #     episode_trigger=lambda ep: True,  # record every episode
    #     name_prefix="eval",
    # )
    env = ActiveObjectWrapper(env, 0, 4)
    env = ManualGoalWrapper(env)
    # env = Monitor(env)  # for episode stats
    
    return env

eval_env = DummyVecEnv([make_eval_env])

if os.path.exists(VECNORM_PATH):
    eval_env = VecNormalize.load(VECNORM_PATH, eval_env)
else:
    raise FileNotFoundError(f'Missing Vecnormalize file : {VECNORM_PATH}')

eval_env.training = False
eval_env.norm_reward = False

model = TQC.load(MODEL_PATH, env = eval_env)



In [31]:
eval_env.reset()

OrderedDict([('achieved_goal',
              array([[-1.380873  , -1.2845722 , -0.05878199]], dtype=float32)),
             ('desired_goal',
              array([[ 0.36231285,  0.58025473, -0.57196546]], dtype=float32)),
             ('observation',
              array([[-2.8229108e-02, -2.3284504e-02, -1.3531129e-01, -1.3808730e+00,
                      -1.2845722e+00, -5.8781985e-02, -8.5368133e-01, -7.1308279e-01,
                       9.3479499e-02, -1.7087260e+00, -1.7319595e+00,  2.1000132e-02,
                      -1.9299747e-01,  3.0267995e-02, -5.7901917e+00, -4.3915128e-03,
                      -5.5173841e+00, -3.8756887e-04, -8.6273644e-03,  2.2135344e-03,
                      -4.5994865e-03, -3.0075747e-03, -6.9535203e-02, -4.4036992e-02,
                      -4.2243820e-02]], dtype=float32))])

In [None]:
# set manual goal
eval_env.env_method("set_goal_relative_to_object", 1, (0.0, 0.0, 0.05))

# IMPORTANT: refresh obs so desired_goal matches the new goal
obs, _, _, _ = eval_env.step(np.array([[0.0, 0.0, 0.0, 0.0]], dtype=np.float32))

In [38]:
# 5) Run a few episodes
n_episodes = 1
for ep in range(n_episodes):
    obs = eval_env.reset()
    done = False
    ep_return = 0.0

    while not done:
        action, _ = model.predict(obs, deterministic=True)
        obs, reward, dones, infos = eval_env.step(action)
        ep_return += float(reward[0])
        done = bool(dones[0])
        time.sleep(.05)
        info = infos[0]
        if info.get("is_success", False):
            print("Target reached")
            break

    print(f"Episode {ep+1}/{n_episodes} return: {ep_return:.3f}")


Target reached
Episode 1/1 return: -7.721


In [21]:
action[0,3] = 1
obs, _, _, _ = eval_env.step(action)

In [25]:
eval_env.env_method("set_active_object", 3)
eval_env.reset()

OrderedDict([('achieved_goal',
              array([[ 1.0666339 ,  1.1523316 , -0.05878199]], dtype=float32)),
             ('desired_goal',
              array([[-0.94194406,  0.8088951 , -0.57196546]], dtype=float32)),
             ('observation',
              array([[-2.8229108e-02, -2.3284504e-02, -1.3531129e-01,  1.0666339e+00,
                       1.1523316e+00, -5.8781985e-02,  7.0643717e-01,  6.8221384e-01,
                       9.3479499e-02, -1.7087260e+00, -1.7319595e+00,  3.7060216e-02,
                       6.8642974e-02,  2.2598559e-01,  2.8186576e+00,  6.0774949e-03,
                      -5.6209073e+00,  7.6922867e-04, -5.5768914e-03,  1.7459443e-02,
                      -4.5994865e-03, -3.0075747e-03, -6.9535203e-02, -4.4036992e-02,
                      -4.2243820e-02]], dtype=float32))])

In [28]:
eval_env.close()

In [None]:
# 5) Run a few episodes
n_episodes = 1
for ep in range(n_episodes):
    obs = eval_env.reset()
    done = False
    ep_return = 0.0

    while not done:
        action, _ = model.predict(obs, deterministic=True)
        obs, reward, dones, infos = eval_env.step(action)
        ep_return += float(reward[0])
        done = bool(dones[0])
        time.sleep(.01)
        info = infos[0]
        if info.get("is_success", False):
            print("Target reached")
            break


    print(f"Episode {ep+1}/{n_episodes} return: {ep_return:.3f}")


Target reached
Episode 1/1 return: -16.426


In [8]:
action, _ = model.predict(obs, deterministic=True)
obs, reward, dones, infos = eval_env.step(action)
print(infos)

[{'is_success': np.float32(0.0), 'TimeLimit.truncated': False}]


In [11]:
import numpy as np
#release the object
action = np.array([[0.0, 0.0, 0.0, +1.0]])
obs, reward, dones, infos = eval_env.step(action)


In [12]:
eval_env.close()

In [7]:
eval_env.action_space.sample()

array([-0.38441518,  0.68080485, -0.38448015, -0.6948155 ], dtype=float32)