## Run evaluation on different action policies, e.g. VLA

In [None]:
import os
os.environ["MUJOCO_GL"] = "egl"
from VLABench.evaluation.evaluator import Evaluator
from VLABench.evaluation.model.policy.openvla import OpenVLA
from VLABench.evaluation.model.policy.base import RandomPolicy
from VLABench.tasks import *
from VLABench.robots import *

  _dash_comm = Comm(target_name="dash")


Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.


In [5]:
demo_tasks = ["select_fruit"]
unseen = True
save_dir = "/home/hbuurmei/Documents/VLABench/logs"

model_ckpt = "openvla/openvla-7b"
lora_ckpt = "VLABench/openvla-lora"  # "/remote-home1/pjliu/openvla/weights/select_fruit+CSv1+lora/"

### Init evaluator

In [None]:
evaluator = Evaluator(
    tasks=demo_tasks,
    n_episodes=2,
    max_substeps=10,   
    save_dir=save_dir,
    visualization=True
)

Load the task episodes by seeds, instead of episodes


### Load basic random policy

In [3]:
random_policy = RandomPolicy(model=None)
result = evaluator.evaluate(random_policy)

Evaluating select_fruit of RandomPolicy: 100%|██████████| 2/2 [01:45<00:00, 52.97s/it]


### Load policies, take OpenVLA as example

In [6]:
policy = OpenVLA(
    model_ckpt=model_ckpt,
    lora_ckpt=lora_ckpt,
    norm_config_file=os.path.join(os.getenv("VLABENCH_ROOT"), "configs/model/openvla_config.json")
)

result = evaluator.evaluate(policy)

FileNotFoundError: [Errno 2] No such file or directory: 'openvla/openvla-7b/config.json'

## Run evaluation on different VLMs

In [None]:
from VLABench.evaluation.model.vlm import *
from VLABench.evaluation.evaluator import VLMEvaluator

vlm_name = "GPT_4v" # valid names: ["GPT_4v", "Qwen2_VL", "InternVL2", "MiniCPM_V2_6", "GLM4v", "Llava_NeXT"]
fewshot_num = 0
task_list = ["mesh_and_texture/select_fruit"]

def initialize_model(model_name, *args, **kwargs):
    cls = globals().get(model_name)
    if cls is None:
        raise ValueError(f"Model '{model_name}' not found in the current namespace.")
    
    return cls(*args, **kwargs)

In [None]:
vlm = initialize_model(vlm_name)
evaluator = VLMEvaluator(
    tasks=task_list,
    n_episodes=2,
    data_path=os.path.join(os.getenv("VLABENCH_ROOT"), "../dataset", "vlm"),
    save_path=os.path.join(os.getenv("VLABENCH_ROOT"), "../logs/vlm"),
)

evaluator.evaluate(vlm, few_shot_num=fewshot_num)
result=evaluator.get_final_score_dict(vlm_name)