In [1]:
import os
import pickle
import json

import numpy as np
import hydra
from cliport import agents
from cliport import dataset
from cliport import tasks
from cliport.utils import utils
from cliport.environments.environment import Environment

os.environ['CLIPORT_ROOT'] = '/sfs/weka/scratch/ys5hd/cliport'

  warn(


In [2]:
# Load configs
ROOT_DIR = os.environ['CLIPORT_ROOT']
config_file = 'eval.yaml' 
cfg = utils.load_hydra_config(os.path.join(ROOT_DIR, f'cliport/cfg/{config_file}'))

In [3]:
tcfg = utils.load_hydra_config(os.path.join(ROOT_DIR, f'cliport/cfg/train.yaml'))

### Select Mode

In [6]:
# Set False for Cliport architecture
# Set as task name for a specific task
LLM_PARSING = False #'align-rope'

In [7]:
MODE = "test" # "val"
CHECKPOINT_TYPE = "test_best" # "val_missing"

# MODE = "val"
# CHECKPOINT_TYPE = "val_missing"

In [8]:
EVAL_TASK = 'align-rope' #'assembling-kits-seq-seen-colors'
MODEL_TASK = 'align-rope' #'assembling-kits-seq-seen-colors'
#'packing-boxes-pairs-seen-colors'
#'put-block-in-bowl-seen-colors'
#'packing-boxes-pairs-seen-colors'
#'stack-block-pyramid-seq-unseen-colors'
EVAL_AGENT = 'cliport'
EVAL_MODE = MODE
EVAL_NDEMOS = 100
EVAL_TRAIN_DEMOS = 100
EVAL_CHECKPOINT_TYPE = CHECKPOINT_TYPE
if LLM_PARSING:
    EXPS = 'exps_llm'
else:
    EXPS = 'exps'
    
#'exps'
EVAL_FOLDER = EXPS


cfg['eval_task'] = EVAL_TASK
cfg['agent'] = EVAL_AGENT
cfg['mode'] = EVAL_MODE
cfg['n_demos'] = EVAL_NDEMOS
cfg['train_demos'] = EVAL_TRAIN_DEMOS
cfg['checkpoint_type'] = EVAL_CHECKPOINT_TYPE
cfg['exp_folder'] = EVAL_FOLDER

cfg['assets_root'] = f'{ROOT_DIR}/cliport/environments/assets/'
cfg['record']['save_video_path'] = f'{ROOT_DIR}/{EXPS}/{EVAL_TASK}-{EVAL_AGENT}-n{EVAL_NDEMOS}-train/videos/'
cfg['data_dir'] = "/sfs/weka/scratch/ys5hd/cliport/cliport/data/"

In [9]:
# SAVE VIDEO
cfg['record']['save_video'] = False

In [10]:
cfg['model_path'] = f'{ROOT_DIR}/{EXPS}/{MODEL_TASK}-{EVAL_AGENT}-n{EVAL_NDEMOS}-train/checkpoints/'
cfg['train_config'] = f'{ROOT_DIR}/{EXPS}/{MODEL_TASK}-{EVAL_AGENT}-n{EVAL_NDEMOS}-train/.hydra/config.yaml'
cfg['save_path'] = f'{ROOT_DIR}/{EXPS}/{MODEL_TASK}-{EVAL_AGENT}-n{EVAL_NDEMOS}-train/checkpoints/'
cfg['results_path'] = f'{ROOT_DIR}/{EXPS}/{MODEL_TASK}-{EVAL_AGENT}-n{EVAL_NDEMOS}-train/checkpoints/'

In [11]:
env = Environment(
    cfg['assets_root'],
    disp=cfg['disp'],
    shared_memory=cfg['shared_memory'],
    hz=480,
    record_cfg=cfg['record']
)

In [12]:
# Choose eval mode and task.
mode = cfg['mode']
eval_task = cfg['eval_task']
if mode not in {'train', 'val', 'test'}:
    raise Exception("Invalid mode. Valid options: train, val, test")

In [13]:
# Load eval dataset.
dataset_type = cfg['type']
if 'multi' in dataset_type:
    ds = dataset.RavensMultiTaskDataset(cfg['data_dir'],
                                        tcfg,
                                        group=eval_task,
                                        mode=mode,
                                        n_demos=cfg['n_demos'],
                                        augment=False)
else:
    ds = dataset.RavensDataset(os.path.join(cfg['data_dir'], f"{EVAL_TASK}-{EVAL_MODE}"),
                               tcfg,
                               n_demos=cfg['n_demos'],
                               augment=False)

In [14]:
all_results = {}
name = '{}-{}-n{}'.format(EVAL_TASK, cfg['agent'], cfg['n_demos'])

In [15]:
# Save path for results.
json_name = f"multi-results-{mode}.json" if 'multi' in cfg['model_path'] else f"results-{mode}.json"
save_path = cfg['save_path']
print(f"Save path for results: {save_path}")
if not os.path.exists(save_path):
    os.makedirs(save_path)
save_json = os.path.join(save_path, f'{name}-{json_name}')

Save path for results: /sfs/weka/scratch/ys5hd/cliport/exps/align-rope-cliport-n100-train/checkpoints/


In [16]:
# Load existing results.
existing_results = {}
if os.path.exists(save_json):
    with open(save_json, 'r') as f:
        existing_results = json.load(f)

In [17]:
def list_ckpts_to_eval(vcfg, existing_results):
    ckpts_to_eval = []

    # Just the last.ckpt
    if vcfg['checkpoint_type'] == 'last':
        last_ckpt = 'last.ckpt'
        ckpts_to_eval.append(last_ckpt)

    # Validation checkpoints that haven't been already evaluated.
    elif vcfg['checkpoint_type'] == 'val_missing':
        checkpoints = sorted([c for c in os.listdir(vcfg['model_path']) if "steps=" in c])
        ckpts_to_eval = [c for c in checkpoints if c not in existing_results]

    # Find the best checkpoint from validation and run eval on the test set.
    elif vcfg['checkpoint_type'] == 'test_best':
        result_jsons = [c for c in os.listdir(vcfg['results_path']) if "results-val" in c]
        if 'multi' in vcfg['model_task']:
            result_jsons = [r for r in result_jsons if "multi" in r]
        else:
            result_jsons = [r for r in result_jsons if "multi" not in r]

        if len(result_jsons) > 0:
            result_json = result_jsons[0]
            with open(os.path.join(vcfg['results_path'], result_json), 'r') as f:
                eval_res = json.load(f)
            best_checkpoint = 'last.ckpt'
            best_success = -1.0
            for ckpt, res in eval_res.items():
                if res['mean_reward'] > best_success:
                    best_checkpoint = ckpt
                    best_success = res['mean_reward']
            print(best_checkpoint)
            ckpt = best_checkpoint
            ckpts_to_eval.append(ckpt)
        else:
            print("No best val ckpt found. Using last.ckpt")
            ckpt = 'last.ckpt'
            ckpts_to_eval.append(ckpt)

    # Load a specific checkpoint with a substring e.g: 'steps=10000'
    else:
        print(f"Looking for: {vcfg['checkpoint_type']}")
        checkpoints = [c for c in os.listdir(vcfg['model_path']) if vcfg['checkpoint_type'] in c]
        checkpoint = checkpoints[0] if len(checkpoints) > 0 else ""
        ckpt = checkpoint
        ckpts_to_eval.append(ckpt)

    return ckpts_to_eval

In [18]:
ckpts_to_eval = list_ckpts_to_eval(cfg, existing_results)

steps=40000-val_loss=0.00013663.ckpt


In [19]:
ckpts_to_eval

['steps=40000-val_loss=0.00013663.ckpt']

In [20]:
# ckpts_to_eval = ckpts_to_eval[-3:]
# ckpts_to_eval[-2:]

# ckpts_to_eval = ["steps=80000-val_loss=0.00013843.ckpt"]
# ckpts_to_eval = ckpts_to_eval[-4:]

In [22]:
# Evaluation loop
print(f"Evaluating: {str(ckpts_to_eval)}")
for ckpt in ckpts_to_eval:
    model_file = os.path.join(cfg['model_path'], ckpt)

    if not os.path.exists(model_file) or not os.path.isfile(model_file):
        print(f"Checkpoint not found: {model_file}")
        continue
    elif not cfg['update_results'] and ckpt in existing_results:
        print(f"Skipping because of existing results for {model_file}.")
        print("Not Skipping")
        #continue

    results = []
    mean_reward = 0.0

    # Run testing for each training run.
    for train_run in range(cfg['n_repeats']):

        # Initialize agent.
        utils.set_seed(train_run, torch=True)
        agent = agents.names[cfg['agent']](name, tcfg, None, ds, llm_parsing=True)

        # Load checkpoint
        agent.load(model_file)
        print(f"Loaded: {model_file}")

        record = cfg['record']['save_video']
        n_demos = cfg['n_demos']
                
        if MODE == "val":
            n_demos = 20
            
#         # Run testing and save total rewards with last transition info.
#         n_demos = 1

        for i in range(0, n_demos):
            print(f'Test: {i + 1}/{n_demos}')
            episode, seed = ds.load(i)
            goal = episode[-1]
            total_reward = 0
            np.random.seed(seed)

            # set task
            if 'multi' in dataset_type:
                task_name = ds.get_curr_task()
                task = tasks.names[task_name]()
                print(f'Evaluating on {task_name}')
            else:
                task_name = cfg['eval_task']
                task = tasks.names[task_name]()

            task.mode = mode
            env.seed(seed)
            env.set_task(task)
            obs = env.reset()
            info = env.info
            reward = 0                        
            
            # Start recording video (NOTE: super slow)
            if record:
                video_name = f'{task_name}-{i+1:06d}'
                if 'multi' in cfg['model_task']:
                    video_name = f"{cfg['model_task']}-{video_name}"
                env.start_rec(video_name)

            for _ in range(task.max_steps):
                act = agent.act(obs, info, goal)
                lang_goal = info['lang_goal']
                print(f'Lang Goal: {lang_goal}')
                obs, reward, done, info = env.step(act)
                total_reward += reward
                print(f'Total Reward: {total_reward:.3f} | Done: {done}\n')
                if done:
                    break

            results.append((total_reward, info))
            mean_reward = np.mean([r for r, i in results])
            print(f'Mean: {mean_reward} | Task: {task_name} | Ckpt: {ckpt}')

            # End recording video
            if record:
                env.end_rec()

        all_results[ckpt] = {
            'episodes': results,
            'mean_reward': mean_reward,
        }

    # Save results in a json file.
    if cfg['save_results']:

        # Load existing results
        if os.path.exists(save_json):
            with open(save_json, 'r') as f:
                existing_results = json.load(f)
            existing_results.update(all_results)
            all_results = existing_results

        with open(save_json, 'w') as f:
            json.dump(all_results, f, indent=4)


Evaluating: ['steps=40000-val_loss=0.00013663.ckpt']
Skipping because of existing results for /sfs/weka/scratch/ys5hd/cliport/exps/align-rope-cliport-n100-train/checkpoints/steps=40000-val_loss=0.00013663.ckpt.
Not Skipping
Attn FCN - Stream One: plain_resnet_lat, Stream Two: clip_lingunet_lat, Stream Fusion: add
Transport FCN - Stream One: plain_resnet_lat, Stream Two: clip_lingunet_lat, Stream Fusion: conv
Agent: align-rope-cliport-n100, Logging: False
Loaded: /sfs/weka/scratch/ys5hd/cliport/exps/align-rope-cliport-n100-train/checkpoints/steps=40000-val_loss=0.00013663.ckpt
Test: 1/100
pybullet build time: Sep 22 2020 00:56:01
text argument:/sfs/weka/scratch/ys5hd/cliport/cliport/environments/assets/
int args: [Lang Goal: align the rope from front right tip to back right corner
Total Reward: 0.000 | Done: False

Lang Goal: align the rope from front right tip to back right corner
Total Reward: 0.000 | Done: False

Lang Goal: align the rope from front right tip to back right corner
Tot

Lang Goal: align the rope from front right tip to back right corner
Total Reward: 0.050 | Done: False

Lang Goal: align the rope from front right tip to back right corner
Total Reward: 0.100 | Done: False

Lang Goal: align the rope from front right tip to back right corner
Total Reward: 1.000 | Done: True

Mean: 0.7944444444444446 | Task: align-rope | Ckpt: steps=40000-val_loss=0.00013663.ckpt
Test: 10/100
Lang Goal: align the rope from front right tip to back right corner
Total Reward: 0.000 | Done: False

Lang Goal: align the rope from front right tip to back right corner
Total Reward: 0.150 | Done: False

Lang Goal: align the rope from front right tip to back right corner
Total Reward: 0.250 | Done: False

Lang Goal: align the rope from front right tip to back right corner
Total Reward: 0.350 | Done: False

Lang Goal: align the rope from front right tip to back right corner
Total Reward: 0.200 | Done: False

Lang Goal: align the rope from front right tip to back right corner
Total R

Total Reward: 0.050 | Done: False

Lang Goal: align the rope from front right tip to back right corner
Total Reward: 0.250 | Done: False

Lang Goal: align the rope from front right tip to back right corner
Total Reward: 0.150 | Done: False

Lang Goal: align the rope from front right tip to back right corner
Total Reward: 0.150 | Done: False

Lang Goal: align the rope from front right tip to back right corner
Total Reward: 0.150 | Done: False

Mean: 0.7533333333333335 | Task: align-rope | Ckpt: steps=40000-val_loss=0.00013663.ckpt
Test: 16/100
Lang Goal: align the rope from front left tip to back left corner
Total Reward: 0.650 | Done: False

Lang Goal: align the rope from front left tip to back left corner
Total Reward: 0.150 | Done: False

Lang Goal: align the rope from front left tip to back left corner
Total Reward: 0.700 | Done: False

Lang Goal: align the rope from front left tip to back left corner
Total Reward: 0.200 | Done: False

Lang Goal: align the rope from front left tip t

Lang Goal: align the rope from front left tip to back left corner
Total Reward: 0.150 | Done: False

Lang Goal: align the rope from front left tip to back left corner
Total Reward: 0.150 | Done: False

Lang Goal: align the rope from front left tip to back left corner
Total Reward: 0.600 | Done: False

Lang Goal: align the rope from front left tip to back left corner
Total Reward: 0.850 | Done: False

Lang Goal: align the rope from front left tip to back left corner
Total Reward: 0.150 | Done: False

Lang Goal: align the rope from front left tip to back left corner
Total Reward: 0.200 | Done: False

Lang Goal: align the rope from front left tip to back left corner
Total Reward: 0.250 | Done: False

Lang Goal: align the rope from front left tip to back left corner
Total Reward: 0.100 | Done: False

Lang Goal: align the rope from front left tip to back left corner
Total Reward: 0.400 | Done: False

Lang Goal: align the rope from front left tip to back left corner
Total Reward: 0.400 | Don

Total Reward: 0.000 | Done: False

Lang Goal: align the rope from front left tip to front right tip
Total Reward: 0.150 | Done: False

Lang Goal: align the rope from front left tip to front right tip
Total Reward: 0.550 | Done: False

Lang Goal: align the rope from front left tip to front right tip
Total Reward: 0.550 | Done: False

Lang Goal: align the rope from front left tip to front right tip
Total Reward: 0.550 | Done: False

Lang Goal: align the rope from front left tip to front right tip
Total Reward: 0.550 | Done: False

Lang Goal: align the rope from front left tip to front right tip
Total Reward: 0.550 | Done: False

Lang Goal: align the rope from front left tip to front right tip
Total Reward: 0.150 | Done: False

Lang Goal: align the rope from front left tip to front right tip
Total Reward: 0.150 | Done: False

Lang Goal: align the rope from front left tip to front right tip
Total Reward: 0.000 | Done: False

Lang Goal: align the rope from front left tip to front right tip


Lang Goal: align the rope from front left tip to back left corner
Total Reward: -0.000 | Done: False

Mean: 0.7129032258064518 | Task: align-rope | Ckpt: steps=40000-val_loss=0.00013663.ckpt
Test: 32/100
Lang Goal: align the rope from back right corner to back left corner
Total Reward: 0.250 | Done: False

Lang Goal: align the rope from back right corner to back left corner
Total Reward: 0.500 | Done: False

Lang Goal: align the rope from back right corner to back left corner
Total Reward: 0.350 | Done: False

Lang Goal: align the rope from back right corner to back left corner
Total Reward: 0.650 | Done: False

Lang Goal: align the rope from back right corner to back left corner
Total Reward: 0.300 | Done: False

Lang Goal: align the rope from back right corner to back left corner
Total Reward: 0.300 | Done: False

Lang Goal: align the rope from back right corner to back left corner
Total Reward: 0.150 | Done: False

Lang Goal: align the rope from back right corner to back left corner

Total Reward: 0.000 | Done: False

Lang Goal: align the rope from front right tip to back right corner
Total Reward: 0.000 | Done: False

Lang Goal: align the rope from front right tip to back right corner
Total Reward: 0.000 | Done: False

Lang Goal: align the rope from front right tip to back right corner
Total Reward: 0.000 | Done: False

Lang Goal: align the rope from front right tip to back right corner
Total Reward: 0.100 | Done: False

Lang Goal: align the rope from front right tip to back right corner
Total Reward: 0.000 | Done: False

Lang Goal: align the rope from front right tip to back right corner
Total Reward: 0.050 | Done: False

Lang Goal: align the rope from front right tip to back right corner
Total Reward: 0.000 | Done: False

Lang Goal: align the rope from front right tip to back right corner
Total Reward: 0.000 | Done: False

Lang Goal: align the rope from front right tip to back right corner
Total Reward: 0.050 | Done: False

Lang Goal: align the rope from front r

Total Reward: 0.150 | Done: False

Mean: 0.6250000000000001 | Task: align-rope | Ckpt: steps=40000-val_loss=0.00013663.ckpt
Test: 41/100
Lang Goal: align the rope from back right corner to back left corner
Total Reward: 0.000 | Done: False

Lang Goal: align the rope from back right corner to back left corner
Total Reward: 0.000 | Done: False

Lang Goal: align the rope from back right corner to back left corner
Total Reward: 0.000 | Done: False

Lang Goal: align the rope from back right corner to back left corner
Total Reward: 0.000 | Done: False

Lang Goal: align the rope from back right corner to back left corner
Total Reward: 0.000 | Done: False

Lang Goal: align the rope from back right corner to back left corner
Total Reward: 0.150 | Done: False

Lang Goal: align the rope from back right corner to back left corner
Total Reward: 0.200 | Done: False

Lang Goal: align the rope from back right corner to back left corner
Total Reward: 0.200 | Done: False

Lang Goal: align the rope from 

Lang Goal: align the rope from back right corner to back left corner
Total Reward: 0.150 | Done: False

Lang Goal: align the rope from back right corner to back left corner
Total Reward: 0.150 | Done: False

Lang Goal: align the rope from back right corner to back left corner
Total Reward: 0.150 | Done: False

Lang Goal: align the rope from back right corner to back left corner
Total Reward: 0.150 | Done: False

Lang Goal: align the rope from back right corner to back left corner
Total Reward: 0.150 | Done: False

Mean: 0.5750000000000001 | Task: align-rope | Ckpt: steps=40000-val_loss=0.00013663.ckpt
Test: 45/100
Lang Goal: align the rope from front left tip to front right tip
Total Reward: 0.000 | Done: False

Lang Goal: align the rope from front left tip to front right tip
Total Reward: 0.100 | Done: False

Lang Goal: align the rope from front left tip to front right tip
Total Reward: 1.000 | Done: True

Mean: 0.5844444444444444 | Task: align-rope | Ckpt: steps=40000-val_loss=0.0001

Lang Goal: align the rope from back right corner to back left corner
Total Reward: 0.100 | Done: False

Lang Goal: align the rope from back right corner to back left corner
Total Reward: 0.100 | Done: False

Lang Goal: align the rope from back right corner to back left corner
Total Reward: 0.100 | Done: False

Lang Goal: align the rope from back right corner to back left corner
Total Reward: 0.100 | Done: False

Lang Goal: align the rope from back right corner to back left corner
Total Reward: 0.100 | Done: True

Mean: 0.6120370370370373 | Task: align-rope | Ckpt: steps=40000-val_loss=0.00013663.ckpt
Test: 55/100
Lang Goal: align the rope from front left tip to back left corner
Total Reward: 0.000 | Done: False

Lang Goal: align the rope from front left tip to back left corner
Total Reward: 0.250 | Done: False

Lang Goal: align the rope from front left tip to back left corner
Total Reward: 0.700 | Done: False

Lang Goal: align the rope from front left tip to back left corner
Total Rewa

Total Reward: 0.000 | Done: False

Lang Goal: align the rope from back right corner to back left corner
Total Reward: 0.000 | Done: False

Lang Goal: align the rope from back right corner to back left corner
Total Reward: 0.200 | Done: False

Lang Goal: align the rope from back right corner to back left corner
Total Reward: 0.400 | Done: False

Lang Goal: align the rope from back right corner to back left corner
Total Reward: 0.500 | Done: False

Lang Goal: align the rope from back right corner to back left corner
Total Reward: 0.600 | Done: False

Lang Goal: align the rope from back right corner to back left corner
Total Reward: 0.550 | Done: False

Lang Goal: align the rope from back right corner to back left corner
Total Reward: 0.550 | Done: False

Lang Goal: align the rope from back right corner to back left corner
Total Reward: 0.500 | Done: False

Lang Goal: align the rope from back right corner to back left corner
Total Reward: 0.400 | Done: False

Lang Goal: align the rope fro

Lang Goal: align the rope from back right corner to back left corner
Total Reward: 0.000 | Done: False

Lang Goal: align the rope from back right corner to back left corner
Total Reward: 0.100 | Done: False

Lang Goal: align the rope from back right corner to back left corner
Total Reward: 0.300 | Done: False

Lang Goal: align the rope from back right corner to back left corner
Total Reward: 1.000 | Done: True

Mean: 0.65 | Task: align-rope | Ckpt: steps=40000-val_loss=0.00013663.ckpt
Test: 69/100
Lang Goal: align the rope from back right corner to back left corner
Total Reward: 0.000 | Done: False

Lang Goal: align the rope from back right corner to back left corner
Total Reward: 0.350 | Done: False

Lang Goal: align the rope from back right corner to back left corner
Total Reward: 0.650 | Done: False

Lang Goal: align the rope from back right corner to back left corner
Total Reward: 0.700 | Done: False

Lang Goal: align the rope from back right corner to back left corner
Total Reward

Total Reward: 0.850 | Done: False

Lang Goal: align the rope from back right corner to back left corner
Total Reward: 0.850 | Done: False

Lang Goal: align the rope from back right corner to back left corner
Total Reward: 0.000 | Done: False

Lang Goal: align the rope from back right corner to back left corner
Total Reward: 0.050 | Done: False

Lang Goal: align the rope from back right corner to back left corner
Total Reward: 0.000 | Done: False

Lang Goal: align the rope from back right corner to back left corner
Total Reward: 0.000 | Done: False

Lang Goal: align the rope from back right corner to back left corner
Total Reward: 0.000 | Done: False

Lang Goal: align the rope from back right corner to back left corner
Total Reward: 0.000 | Done: False

Lang Goal: align the rope from back right corner to back left corner
Total Reward: 0.000 | Done: False

Lang Goal: align the rope from back right corner to back left corner
Total Reward: 0.000 | Done: False

Lang Goal: align the rope fro

Lang Goal: align the rope from back right corner to back left corner
Total Reward: 0.600 | Done: False

Lang Goal: align the rope from back right corner to back left corner
Total Reward: 0.150 | Done: False

Lang Goal: align the rope from back right corner to back left corner
Total Reward: 0.200 | Done: False

Lang Goal: align the rope from back right corner to back left corner
Total Reward: 0.450 | Done: False

Lang Goal: align the rope from back right corner to back left corner
Total Reward: 0.050 | Done: False

Lang Goal: align the rope from back right corner to back left corner
Total Reward: 0.150 | Done: False

Lang Goal: align the rope from back right corner to back left corner
Total Reward: 0.700 | Done: False

Lang Goal: align the rope from back right corner to back left corner
Total Reward: 1.000 | Done: True

Mean: 0.655625 | Task: align-rope | Ckpt: steps=40000-val_loss=0.00013663.ckpt
Test: 81/100
Lang Goal: align the rope from front right tip to back right corner
Total Rew

Total Reward: 0.950 | Done: False

Lang Goal: align the rope from front left tip to back left corner
Total Reward: 0.050 | Done: False

Lang Goal: align the rope from front left tip to back left corner
Total Reward: 0.150 | Done: False

Lang Goal: align the rope from front left tip to back left corner
Total Reward: 0.100 | Done: False

Lang Goal: align the rope from front left tip to back left corner
Total Reward: 0.250 | Done: False

Lang Goal: align the rope from front left tip to back left corner
Total Reward: 0.000 | Done: False

Lang Goal: align the rope from front left tip to back left corner
Total Reward: 0.050 | Done: False

Lang Goal: align the rope from front left tip to back left corner
Total Reward: 0.250 | Done: False

Lang Goal: align the rope from front left tip to back left corner
Total Reward: 0.500 | Done: False

Lang Goal: align the rope from front left tip to back left corner
Total Reward: 0.000 | Done: False

Lang Goal: align the rope from front left tip to back le

Total Reward: 0.000 | Done: False

Lang Goal: align the rope from front right tip to back right corner
Total Reward: 0.000 | Done: False

Mean: 0.6765957446808512 | Task: align-rope | Ckpt: steps=40000-val_loss=0.00013663.ckpt
Test: 95/100
Lang Goal: align the rope from front right tip to back right corner
Total Reward: 0.050 | Done: False

Lang Goal: align the rope from front right tip to back right corner
Total Reward: 0.050 | Done: False

Lang Goal: align the rope from front right tip to back right corner
Total Reward: 0.000 | Done: False

Lang Goal: align the rope from front right tip to back right corner
Total Reward: 0.000 | Done: False

Lang Goal: align the rope from front right tip to back right corner
Total Reward: 0.050 | Done: False

Lang Goal: align the rope from front right tip to back right corner
Total Reward: 0.050 | Done: False

Lang Goal: align the rope from front right tip to back right corner
Total Reward: 0.050 | Done: False

Lang Goal: align the rope from front ri

Total Reward: 0.900 | Done: False

Mean: 0.6655000000000001 | Task: align-rope | Ckpt: steps=40000-val_loss=0.00013663.ckpt


### QnA Experiments

In [8]:
from transformers import pipeline

qa_model = pipeline("question-answering")
question = "Which object to pick?"
context = "place the yellow block on the lightest brown block."
qa_model(question = question, context = context)

{'score': 0.40745291113853455,
 'start': 6,
 'end': 22,
 'answer': 'the yellow block'}

In [9]:
from transformers import pipeline

qa_model = pipeline("question-answering")
question = "Where to place the picked object?"
context = "place the yellow block on the lightest brown block."
qa_model(question = question, context = context)

{'score': 0.31235989928245544,
 'start': 30,
 'end': 50,
 'answer': 'lightest brown block'}