In [None]:
import os
from os.path import join, dirname, abspath
import sys
CURRENT_DIR = os.getcwd()
sys.path.insert(0, join(CURRENT_DIR, '../..'))  # Import local models

from cliport.environments.environment import Environment
import torch
from models.PickModel import PickModel
from models.PlaceModel import PlaceModel
from agents.PickPlaceAgent import PickPlaceAgent
from cliport import tasks
from cliport.dataset import RavensDataset
import numpy as np
from cliport.utils import utils


In [None]:
record_cfg = {
  "save_video": False,
  "save_video_path": "/home/ubuntu/VLM/videos/",
  "add_text": True,
  "fps": 20,
  "video_height": 640,
  "video_width": 720,
}

assets_root = "/home/ubuntu/cliport/cliport/environments/assets/"

env = Environment(
    assets_root,
    disp=False,
    shared_memory=False,
    hz=480,
    record_cfg=record_cfg
)

In [3]:
agent = PickPlaceAgent(num_rotations=12, lr=1e-4, device='cuda')

In [5]:
agent.pick_model.load_state_dict(torch.load("/home/ubuntu/VLM/checkpoints/checkpoint_PairPack_latest.pth")['pick_state_dict'])
agent.place_model.load_state_dict(torch.load("/home/ubuntu/VLM/checkpoints/checkpoint_PairPack_latest.pth")['place_state_dict'])

<All keys matched successfully>

In [6]:
train_dataset_cfg = {"dataset":{"type": "single",
                    "images": True,
                    "cache": False,
                    "augment":{"theta_sigma":60},
                    "cache_size": 350},
                    }

# load data
train_dataset = RavensDataset('/home/ubuntu/cliport/data/packing-boxes-pairs-full-val', train_dataset_cfg, n_demos=100, augment=False)

In [9]:
total_reward = 0
num_trials = 100
for i in range(num_trials):
    env.start_rec("pick-place-pairs" + str(i))

    episode, seed = train_dataset.load(i)
    task = tasks.names["packing-boxes-pairs-full"]()
    task.mode = "train"
    env.seed(seed)
    env.set_task(task)
    obs = env.reset()
    info = env.info
    episode_reward = 0

    lang_goal = info['lang_goal']
    print(f'Lang Goal: {lang_goal}')
    
    for _ in range(10):
        img = train_dataset.get_image(obs)
        lang_goal = info['lang_goal']
        act, affordances = agent.act(img, lang_goal)

        obs, reward, done, info = env.step(act)
        episode_reward += reward
        total_reward += reward
        print("Single timestep reward:", reward)
        if done:
            break
    print("Iteration:", i)
    print("Episode Reward:", episode_reward)
    print("Average Reward Across Episodes:", total_reward/(i+1))
    env.end_rec()


Lang Goal: pack all the gray and pink blocks into the brown box
Single timestep reward: 0.05625
Single timestep reward: 0.05625
Single timestep reward: 0.05625000000000001
Single timestep reward: 0.11041666666666666
Single timestep reward: 0.0
Single timestep reward: 0.07500000000000001
Single timestep reward: 0.11041666666666666
Single timestep reward: 0.09583333333333333
Single timestep reward: 0.07916666666666661
Single timestep reward: 0.09791666666666676
Iteration: 0
Episode Reward: 0.7375
Average Reward Across Episodes: 0.7375
Lang Goal: pack all the purple and cyan blocks into the brown box
Single timestep reward: 0.0
Single timestep reward: 0.25
Single timestep reward: 0.25
Single timestep reward: 0.0
Single timestep reward: 0.16666666666666663
Single timestep reward: 0.25
Single timestep reward: 0.0
Single timestep reward: 0.0
Single timestep reward: 0.0
Single timestep reward: 0.0
Iteration: 1
Episode Reward: 0.9166666666666666
Average Reward Across Episodes: 0.82708333333333