## Run evaluation on different action policies, e.g. VLA

In [43]:
# 设置项目路径和环境变量
import sys
import os

# 根据当前工作目录推断项目根目录
# 正确的项目根目录应该指向VLABench/VLABench
project_root = "/home/vla/Downloads/VLABench"
vlabench_root = "/home/vla/Downloads/VLABench/VLABench"  # 这是实际的VLABench包根目录

if project_root not in sys.path:
    sys.path.append(project_root)

# 设置 VLABENCH_ROOT 环境变量指向VLABench包的根目录
os.environ["VLABENCH_ROOT"] = vlabench_root


print(f"项目根目录已设置为: {project_root}")
print(f"VLABENCH_ROOT 环境变量已设置为: {os.getenv('VLABENCH_ROOT')}")

项目根目录已设置为: /home/vla/Downloads/VLABench
VLABENCH_ROOT 环境变量已设置为: /home/vla/Downloads/VLABench/VLABench


In [None]:
from VLABench.evaluation.evaluator import Evaluator
from VLABench.evaluation.model.policy.openvla import OpenVLA
from VLABench.evaluation.model.policy.base import RandomPolicy
from VLABench.tasks import *
from VLABench.robots import *

demo_tasks = ["select_fruit"]
unseen = True
save_dir = "/home/vla/Downloads/VLABench/logs" 



model_ckpt = "openvla/openvla-7b"  # 使用基础oepnVLA
lora_ckpt = None  # 如果没有LoRA权重就设为None
# model_ckpt = "/remote-home1/pjliu/openvla-7b"
# lora_ckpt = "/remote-home1/pjliu/openvla/weights/select_fruit+CSv1+lora/"

In [45]:
import os
# os.environ["MUJOCO_GL"] = "egl"
os.environ["MUJOCO_GL"] = "glfw"

### Init evaluator

In [59]:
evaluator = Evaluator(
    tasks=demo_tasks,
    n_episodes=2,
    max_substeps=10,   
    save_dir=save_dir,
    visulization=True
)

Load the task episodes by seeds, instead of episodes


### Load basic random policy

In [49]:
random_policy = RandomPolicy(model=None)
result = evaluator.evaluate(random_policy)

Evaluating select_fruit of RandomPolicy: 100%|██████████| 2/2 [00:27<00:00, 13.63s/it]


### Load policies, take OpenVLA as example

In [None]:
policy = OpenVLA(
    model_ckpt=model_ckpt,
    lora_ckpt=lora_ckpt,
    norm_config_file=os.path.join(os.getenv("VLABENCH_ROOT"), "configs/model/openvla_config.json")
)

result = evaluator.evaluate(policy)

## Run evaluation on different VLMs

In [None]:
from VLABench.evaluation.model.vlm import *
from VLABench.evaluation.evaluator import VLMEvaluator

vlm_name = "GPT_4v" # valid names: ["GPT_4v", "Qwen2_VL", "InternVL2", "MiniCPM_V2_6", "GLM4v", "Llava_NeXT"]
fewshot_num = 0
task_list = ["mesh_and_texture/select_fruit"]

def initialize_model(model_name, *args, **kwargs):
    cls = globals().get(model_name)
    if cls is None:
        raise ValueError(f"Model '{model_name}' not found in the current namespace.")
    
    return cls(*args, **kwargs)


In [None]:
vlm = initialize_model(vlm_name)
evaluator = VLMEvaluator(
    tasks=task_list,
    n_episodes=2,
    data_path=os.path.join(os.getenv("VLABENCH_ROOT"), "../dataset", "vlm"),
    save_path=os.path.join(os.getenv("VLABENCH_ROOT"), "../logs/vlm"),
)

evaluator.evaluate(vlm, few_shot_num=fewshot_num)
result=evaluator.get_final_score_dict(vlm_name)
