In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import sys
from dataclasses import dataclass
from pathlib import Path
from typing import Optional, Union

import draccus
import numpy as np
import tqdm
from libero.libero import benchmark
import pickle
import os
import wandb

# Append current directory so that interpreter can find experiments.robot
sys.path.append("../..")
from experiments.robot.libero.libero_utils import (
    get_libero_dummy_action,
    get_libero_env,
    get_libero_image,
    quat2axisangle,
    save_rollout_video,
)
from experiments.robot.openvla_utils import get_processor
from experiments.robot.robot_utils import (
    DATE_TIME,
    get_action,
    get_action_ids,
    get_image_resize_size,
    get_model,
    invert_gripper_action,
    normalize_gripper_action,
    set_seed_everywhere,
)

from transformers import AutoProcessor
from prismatic.vla.action_tokenizer import ActionTokenizer

2025-07-24 14:25:53.014697: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-07-24 14:25:53.014736: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-07-24 14:25:53.016680: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-07-24 14:25:53.026176: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
  from .autonotebook import tqdm as notebook_

In [3]:
from prismatic.extern.hf.configuration_prismatic import OpenVLAConfig
from prismatic.extern.hf.modeling_prismatic import OpenVLAForActionPrediction
from prismatic.extern.hf.processing_prismatic import PrismaticImageProcessor, PrismaticProcessor
from transformers import AutoModelForVision2Seq, AutoProcessor, BitsAndBytesConfig
from transformers import AutoConfig, AutoImageProcessor

AutoConfig.register("openvla", OpenVLAConfig)
AutoImageProcessor.register(OpenVLAConfig, PrismaticImageProcessor)
AutoProcessor.register(OpenVLAConfig, PrismaticProcessor)
AutoModelForVision2Seq.register(OpenVLAConfig, OpenVLAForActionPrediction)

In [4]:
@dataclass
class GenerateConfig:
    # fmt: off

    #################################################################################################################
    # Model-specific parameters
    #################################################################################################################
    model_family: str = "openvla"                    # Model family
    pretrained_checkpoint: Union[str, Path] = "/mnt/sda/home/zijianwang/HF_CACHE/openvla-7b-finetuned-libero-10"     # Pretrained checkpoint path
    load_in_8bit: bool = False                       # (For OpenVLA only) Load with 8-bit quantization
    load_in_4bit: bool = False                       # (For OpenVLA only) Load with 4-bit quantization

    center_crop: bool = True                         # Center crop? (if trained w/ random crop image aug)

    #################################################################################################################
    # LIBERO environment-specific parameters
    #################################################################################################################
    task_suite_name: str = "libero_10"          # Task suite. Options: libero_spatial, libero_object, libero_goal, libero_10, libero_90
    num_steps_wait: int = 10                         # Number of steps to wait for objects to stabilize in sim
    num_trials_per_task: int = 50                    # Number of rollouts per task

    #################################################################################################################
    # Utils
    #################################################################################################################
    run_id_note: Optional[str] = None                # Extra note to add in run ID for logging
    local_log_dir: str = "./experiments/logs"        # Local directory for eval logs

    use_wandb: bool = False                          # Whether to also log results in Weights & Biases
    wandb_project: str = "YOUR_WANDB_PROJECT"        # Name of W&B project to log to (use default!)
    wandb_entity: str = "YOUR_WANDB_ENTITY"          # Name of entity to log under

    seed: int = 7                                    # Random Seed (for reproducibility)

    device: str = "cuda:0"

    # fmt: on

In [5]:
cfg = GenerateConfig()
assert cfg.pretrained_checkpoint is not None, "cfg.pretrained_checkpoint must not be None!"
if "image_aug" in cfg.pretrained_checkpoint:
    assert cfg.center_crop, "Expecting `center_crop==True` because model was trained with image augmentations!"
assert not (cfg.load_in_8bit and cfg.load_in_4bit), "Cannot use both 8-bit and 4-bit quantization!"

# Set random seed
set_seed_everywhere(cfg.seed)

# [OpenVLA] Set action un-normalization key
cfg.unnorm_key = cfg.task_suite_name

# Load model
model = get_model(cfg)

# processor = AutoProcessor.from_pretrained(cfg.pretrained_checkpoint, trust_remote_code=True)
# action_tokenizer = ActionTokenizer(processor.tokenizer)

[*] Instantiating Pretrained VLA model
[*] Loading in BF16 with Flash-Attention Enabled


Loading checkpoint shards: 100%|██████████| 4/4 [00:00<00:00,  9.51it/s]


Loaded model: <class 'transformers_modules.openvla.openvla-7b.31f090d05236101ebfc381b61c674dd4746d4ce0.modeling_prismatic.OpenVLAForActionPrediction'>


In [6]:
import functools
import types
import torch
import numpy as np
from typing import Optional

def predicted_action_token_ids(
    self, input_ids: Optional[torch.LongTensor] = None, unnorm_key: Optional[str] = None, **kwargs: str
) -> np.ndarray:
    """Thin wrapper around .generate() that decodes predicted actions and unnormalizes them."""
    # If the special empty token ('') does not already appear after the colon (':') token in the prompt
    # (after "OUT:" or "ASSISTANT:"), insert it to match the inputs seen at training time
    if not torch.all(input_ids[:, -1] == 29871):
        input_ids = torch.cat(
            (input_ids, torch.unsqueeze(torch.Tensor([29871]).long(), dim=0).to(input_ids.device)), dim=1
        )

    # Run VLA inference
    generated_ids = self.generate(input_ids, max_new_tokens=self.get_action_dim(unnorm_key), **kwargs)

    # Extract predicted action tokens and translate into (normalized) continuous actions
    predicted_action_token_ids = generated_ids[0, -self.get_action_dim(unnorm_key) :].cpu().numpy()
    return predicted_action_token_ids

# 添加到model
model.predicted_action_token_ids = types.MethodType(predicted_action_token_ids, model) 

In [7]:
processor = AutoProcessor.from_pretrained(cfg.pretrained_checkpoint, trust_remote_code=True)
action_tokenizer = ActionTokenizer(processor.tokenizer)

In [8]:
import json
successful_trajectories = json.load(open("/mnt/sda/home/zijianwang/openvla/vla-scripts/DPO/data/successful_trajectories.json"))

winners = []

for trajectory in successful_trajectories:
    success_task_id = trajectory["task_id"]
    success_episode = trajectory["episode"]
    winner = {
        "task_id": int(success_task_id),
        "episode": int(success_episode)
    }
    winners.append(winner)  

In [9]:
print(winners)

[{'task_id': 5, 'episode': 15}, {'task_id': 1, 'episode': 9}, {'task_id': 4, 'episode': 22}, {'task_id': 1, 'episode': 38}, {'task_id': 0, 'episode': 11}, {'task_id': 1, 'episode': 1}, {'task_id': 4, 'episode': 20}, {'task_id': 8, 'episode': 41}, {'task_id': 5, 'episode': 13}, {'task_id': 7, 'episode': 24}, {'task_id': 5, 'episode': 45}, {'task_id': 0, 'episode': 8}, {'task_id': 3, 'episode': 32}, {'task_id': 6, 'episode': 2}, {'task_id': 0, 'episode': 22}, {'task_id': 1, 'episode': 8}, {'task_id': 5, 'episode': 42}, {'task_id': 3, 'episode': 27}, {'task_id': 3, 'episode': 25}, {'task_id': 7, 'episode': 45}, {'task_id': 9, 'episode': 16}, {'task_id': 5, 'episode': 17}, {'task_id': 0, 'episode': 12}, {'task_id': 2, 'episode': 4}, {'task_id': 2, 'episode': 28}, {'task_id': 3, 'episode': 48}, {'task_id': 5, 'episode': 44}, {'task_id': 1, 'episode': 26}, {'task_id': 6, 'episode': 18}, {'task_id': 7, 'episode': 41}, {'task_id': 5, 'episode': 47}, {'task_id': 2, 'episode': 24}, {'task_id': 7

In [10]:
# Reorganize winners into dictionary format
winners_dict = {}
for winner in winners:
    task_id = winner["task_id"]
    episode = winner["episode"]
    
    if task_id not in winners_dict:
        winners_dict[task_id] = []
    
    winners_dict[task_id].append(episode)

print("Winners dictionary:", winners_dict)


Winners dictionary: {5: [15, 13, 45, 42, 17, 44, 47, 39, 0, 6, 29, 11, 16, 41, 31, 30, 43, 5, 40, 46, 32, 2, 18, 8, 48, 22, 49, 35, 24, 37, 21, 27, 7, 38, 25, 9, 1, 10, 34, 23, 3, 4], 1: [9, 38, 1, 8, 26, 42, 2, 4, 7, 49, 24, 37, 13, 11, 21, 44, 14, 45, 10, 15, 18, 5, 35, 27, 25, 29, 28, 36, 17, 19, 0, 34, 31, 48], 4: [22, 20, 13, 36, 27, 28, 49, 23, 38, 14, 6, 15, 29, 48, 3, 37, 43, 31, 32, 44, 8, 24], 0: [11, 8, 22, 12, 47, 37, 2, 25, 10, 49, 9, 19, 16, 29, 5, 34, 3, 21, 23, 42, 41, 40, 33, 27, 48, 1, 46, 39, 31], 8: [41, 16, 21, 0, 2, 42, 32, 22, 37, 33, 5, 39, 49, 25, 44, 36], 7: [24, 45, 41, 9, 39, 38, 28, 37, 42, 25, 36, 35, 23, 30, 18, 15, 12, 22, 27, 10, 21, 46, 44, 6, 20, 49], 3: [32, 27, 25, 48, 49, 13, 7, 14, 1, 35, 26, 45, 44, 18, 9, 34, 19, 10], 6: [2, 18, 48, 44, 9, 40, 0, 43, 7, 3, 12, 42, 5, 17, 4, 22, 24, 25, 16, 49, 19, 15, 8, 33, 45, 28], 9: [16, 32, 40, 18, 25, 5, 1, 22, 29, 34, 46, 10, 13, 28, 2, 23, 14, 42, 17, 27, 47, 33, 45, 36, 39, 9, 19], 2: [4, 28, 24, 36, 21

In [12]:
import numpy as np

def get_action(model, predicted_action_token_ids, unnorm_key):         
    discretized_actions = model.vocab_size - predicted_action_token_ids
    discretized_actions = np.clip(discretized_actions - 1, a_min=0, a_max=model.bin_centers.shape[0] - 1)
    normalized_actions = model.bin_centers[discretized_actions]

    # Unnormalize actions
    action_norm_stats = model.get_action_stats(unnorm_key)
    mask = action_norm_stats.get("mask", np.ones_like(action_norm_stats["q01"], dtype=bool))
    action_high, action_low = np.array(action_norm_stats["q99"]), np.array(action_norm_stats["q01"])
    actions = np.where(
        mask,
        0.5 * (normalized_actions + 1) * (action_high - action_low) + action_low,
        normalized_actions,
    )
    return actions

In [13]:
winner_id = 2

# [OpenVLA] Check that the model contains the action un-normalization key
if cfg.model_family == "openvla":
    # In some cases, the key must be manually modified (e.g. after training on a modified version of the dataset
    # with the suffix "_no_noops" in the dataset name)
    if cfg.unnorm_key not in model.norm_stats and f"{cfg.unnorm_key}_no_noops" in model.norm_stats:
        cfg.unnorm_key = f"{cfg.unnorm_key}_no_noops"
    assert cfg.unnorm_key in model.norm_stats, f"Action un-norm key {cfg.unnorm_key} not found in VLA `norm_stats`!"

# [OpenVLA] Get Hugging Face processor
processor = None
if cfg.model_family == "openvla":
    processor = get_processor(cfg)

# Initialize local logging
run_id = f"EVAL-{cfg.task_suite_name}-{cfg.model_family}-{DATE_TIME}"
if cfg.run_id_note is not None:
    run_id += f"--{cfg.run_id_note}"
os.makedirs(cfg.local_log_dir, exist_ok=True)
local_log_filepath = os.path.join(cfg.local_log_dir, run_id + ".txt")
log_file = open(local_log_filepath, "w")
print(f"Logging to local log file: {local_log_filepath}")

# Initialize Weights & Biases logging as well
if cfg.use_wandb:
    wandb.init(
        entity=cfg.wandb_entity,
        project=cfg.wandb_project,
        name=run_id,
    )

# Initialize LIBERO task suite
benchmark_dict = benchmark.get_benchmark_dict()
task_suite = benchmark_dict[cfg.task_suite_name]()
num_tasks_in_suite = task_suite.n_tasks
print(f"Task suite: {cfg.task_suite_name}")
log_file.write(f"Task suite: {cfg.task_suite_name}\n")

# Get expected image dimensions
resize_size = get_image_resize_size(cfg)

# Start evaluation
total_episodes, total_successes = 0, 0
for task_id in tqdm.tqdm(range(num_tasks_in_suite)):
    # if task_id != winners[winner_id]["task_id"]:
    #     continue
    # Get task
    print(f"Task ID: {task_id}")
    task = task_suite.get_task(task_id)

    # Get default LIBERO initial states
    initial_states = task_suite.get_task_init_states(task_id)

    # Initialize LIBERO environment and task description
    env, task_description = get_libero_env(task, cfg.model_family, resolution=256)

    # Start episodes
    task_episodes, task_successes = 0, 0
    for episode_idx in tqdm.tqdm(range(cfg.num_trials_per_task)):
        if episode_idx not in winners_dict[task_id]:
            continue
        print(f"Starting task: {task_id}, episode: {episode_idx}...")
        log_file.write(f"Starting task: {task_id}, episode: {episode_idx}...\n")

        print(f"\nTask: {task_description}")
        log_file.write(f"\nTask: {task_description}\n")

        # Reset environment
        env.reset()

        # Set initial states
        obs = env.set_init_state(initial_states[episode_idx])

        # Setup
        t = 0
        replay_images = []
        if cfg.task_suite_name == "libero_spatial":
            max_steps = 220  # longest training demo has 193 steps
        elif cfg.task_suite_name == "libero_object":
            max_steps = 280  # longest training demo has 254 steps
        elif cfg.task_suite_name == "libero_goal":
            max_steps = 300  # longest training demo has 270 steps
        elif cfg.task_suite_name == "libero_10":
            max_steps = 520  # longest training demo has 505 steps
        elif cfg.task_suite_name == "libero_90":
            max_steps = 400  # longest training demo has 373 steps

        # print(f"Starting episode {task_episodes+1}...")
        # log_file.write(f"Starting episode {task_episodes+1}...\n")
        while t < max_steps + cfg.num_steps_wait:
            print(f"t: {t}", end="\r")
            try:
                # IMPORTANT: Do nothing for the first few timesteps because the simulator drops objects
                # and we need to wait for them to fall
                if t < cfg.num_steps_wait:
                    obs, reward, done, info = env.step(get_libero_dummy_action(cfg.model_family))
                    t += 1
                    continue
                # obs: OrderedDict[str, np.ndarray]
                
                # Get preprocessed image
                img = get_libero_image(obs, resize_size)

                # Save preprocessed image for replay video
                replay_images.append(img)

                # Prepare observations dict
                # Note: OpenVLA does not take proprio state as input
                observation = {
                    "full_image": img,
                    "state": np.concatenate(
                        (obs["robot0_eef_pos"], quat2axisangle(obs["robot0_eef_quat"]), obs["robot0_gripper_qpos"])
                    ),
                }

                # Query model to get action
                # action = get_action(
                #     cfg,
                #     model,
                #     observation,
                #     task_description,
                #     processor=processor,
                # )
                action_ids = get_action_ids(
                    cfg,
                    model,
                    observation,
                    task_description,
                    processor=processor,
                )

                action = get_action(model, action_ids, cfg.unnorm_key)
                # print(action_ids)

                # Save obs and action_ids to local file
                # Create directory if it doesn't exist
                trajectory_save_dir = f"winner_trajectory_test/{cfg.task_suite_name}/task_{task_id}_episode_{episode_idx}/"
                os.makedirs(trajectory_save_dir, exist_ok=True)
                
                # Save obs and action_ids with timestep information
                obs_filename = os.path.join(trajectory_save_dir, f"step_{t}.pkl")
                info = {
                    "obs": obs,
                    "action_ids": action_ids,
                }
                with open(obs_filename, 'wb') as f:
                    pickle.dump(info, f)

                # Normalize gripper action [0,1] -> [-1,+1] because the environment expects the latter
                action = normalize_gripper_action(action, binarize=True) # raw action is in [0,1]

                # [OpenVLA] The dataloader flips the sign of the gripper action to align with other datasets
                # (0 = close, 1 = open), so flip it back (-1 = open, +1 = close) before executing the action
                if cfg.model_family == "openvla":
                    action = invert_gripper_action(action)

                # Execute action in environment
                obs, reward, done, info = env.step(action.tolist())
                if done:
                    task_successes += 1
                    total_successes += 1
                    break
                t += 1

            except Exception as e:
                print(f"Caught exception: {e}")
                log_file.write(f"Caught exception: {e}\n")
                break

        task_episodes += 1
        total_episodes += 1

        # Save a replay video of the episode
        save_rollout_video(
            replay_images, total_episodes, success = done, task_description = task_description, log_file = log_file
        )

        # Log current results
        print(f"Success: {done}")
        print(f"# episodes completed so far: {total_episodes}")
        print(f"# successes: {total_successes} ({total_successes / total_episodes * 100:.1f}%)")
        log_file.write(f"Success: {done}\n")
        log_file.write(f"# episodes completed so far: {total_episodes}\n")
        log_file.write(f"# successes: {total_successes} ({total_successes / total_episodes * 100:.1f}%)\n")
        log_file.flush()

    # Log final results
    print(f"Current task success rate: {float(task_successes) / float(task_episodes)}")
    print(f"Current total success rate: {float(total_successes) / float(total_episodes)}")
    log_file.write(f"Current task success rate: {float(task_successes) / float(task_episodes)}\n")
    log_file.write(f"Current total success rate: {float(total_successes) / float(total_episodes)}\n")
    log_file.flush()
    if cfg.use_wandb:
        wandb.log(
            {
                f"success_rate/{task_description}": float(task_successes) / float(task_episodes),
                f"num_episodes/{task_description}": task_episodes,
            }
        )

# Save local log file
log_file.close()

# Push total metrics and local log file to wandb
if cfg.use_wandb:
    wandb.log(
        {
            "success_rate/total": float(total_successes) / float(total_episodes),
            "num_episodes/total": total_episodes,
        }
    )
    wandb.save(local_log_filepath)

Logging to local log file: ./experiments/logs/EVAL-libero_10-openvla-2025_07_24-14_26_15.txt
[info] using task orders [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Task suite: libero_10


  0%|          | 0/10 [00:00<?, ?it/s]

Task ID: 0




Starting task: 0, episode: 1...

Task: put both the alphabet soup and the tomato sauce in the basket
t: 269

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Saved rollout MP4 at path ./rollouts/2025_07_24/2025_07_24-14_26_15--episode=1--success=True--task=put_both_the_alphabet_soup_and_the_tomato_sauce_in.mp4
Success: True
# episodes completed so far: 1
# successes: 1 (100.0%)
Starting task: 0, episode: 2...

Task: put both the alphabet soup and the tomato sauce in the basket
t: 10

  4%|▍         | 2/50 [01:31<36:28, 45.60s/it]
  0%|          | 0/10 [01:37<?, ?it/s]

t: 11




KeyboardInterrupt: 