In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
os.environ["HF_HUB_CACHE"] = "/mnt/sda/home/zijianwang/HF_CACHE"
from collections import deque
from dataclasses import dataclass
from pathlib import Path
from typing import Optional

import draccus
import torch
import torch.distributed as dist
import tqdm
from accelerate import PartialState
from peft import LoraConfig, PeftModel, get_peft_model, prepare_model_for_kbit_training
from torch.nn.parallel import DistributedDataParallel as DDP
from torch.optim import AdamW
from torch.utils.data import DataLoader
from transformers import AutoModelForVision2Seq, AutoProcessor, BitsAndBytesConfig
from transformers import AutoConfig, AutoImageProcessor
from transformers.modeling_outputs import CausalLMOutputWithPast

import wandb
from prismatic.models.backbones.llm.prompting import PurePromptBuilder, VicunaV15ChatPromptBuilder
from prismatic.util.data_utils import PaddedCollatorForActionPrediction
from prismatic.vla.action_tokenizer import ActionTokenizer
from prismatic.vla.datasets import RLDSBatchTransform, RLDSDataset, EpisodicRLDSDataset
from prismatic.vla.datasets.rlds.utils.data_utils import save_dataset_statistics

from prismatic.extern.hf.configuration_prismatic import OpenVLAConfig
from prismatic.extern.hf.modeling_prismatic import OpenVLAForActionPrediction
from prismatic.extern.hf.processing_prismatic import PrismaticImageProcessor, PrismaticProcessor

# Sane Defaults
os.environ["TOKENIZERS_PARALLELISM"] = "false"

  from .autonotebook import tqdm as notebook_tqdm
2025-07-25 21:48:52.189942: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-07-25 21:48:52.189981: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-07-25 21:48:52.191787: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-07-25 21:48:52.200698: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler fl

In [3]:
AutoConfig.register("openvla", OpenVLAConfig)
AutoImageProcessor.register(OpenVLAConfig, PrismaticImageProcessor)
AutoProcessor.register(OpenVLAConfig, PrismaticProcessor)
AutoModelForVision2Seq.register(OpenVLAConfig, OpenVLAForActionPrediction)

In [4]:
vla_model_config = OpenVLAConfig.from_pretrained("openvla/openvla-7b")
processor = AutoProcessor.from_pretrained("openvla/openvla-7b", trust_remote_code=True)
action_tokenizer = ActionTokenizer(processor.tokenizer)

# Add action unit separate token

In [5]:
print("len(tokenizer):", len(processor.tokenizer))
DEFAULT_ACT_TOKEN = "<A>"
num_added_toks = processor.tokenizer.add_tokens(DEFAULT_ACT_TOKEN)
print("len(tokenizer):", len(processor.tokenizer))
print("id of <A>:", processor.tokenizer.convert_tokens_to_ids("<A>"))

len(tokenizer): 32001
len(tokenizer): 32002
id of <A>: 32001


In [7]:
print("End of sequence token:", processor.tokenizer.eos_token)
print("End of sequence token id:", processor.tokenizer.eos_token_id)
print("All special tokens:", processor.tokenizer.special_tokens_map)

End of sequence token: </s>
End of sequence token id: 2
All special tokens: {'bos_token': '<s>', 'eos_token': '</s>', 'unk_token': '<unk>', 'pad_token': '<PAD>'}


# Usage of action tokenizer 

In [None]:
vla = AutoModelForVision2Seq.from_pretrained(
    "/mnt/sda/home/zijianwang/HF_CACHE/openvla-7b-finetuned-libero-10", 
    attn_implementation="flash_attention_2",  # [Optional] Requires `flash_attn`
    torch_dtype=torch.bfloat16, 
    low_cpu_mem_usage=True, 
    trust_remote_code=True
).to("cuda:3")

In [None]:
print(vla.norm_stats.keys())

In [None]:
vla.resize_token_embeddings()

In [5]:
batch_transform = RLDSBatchTransform(
    action_tokenizer,
    processor.tokenizer,
    image_transform=processor.image_processor.apply_transform,
    prompt_builder_fn=PurePromptBuilder if "v01" not in "openvla/openvla-7b" else VicunaV15ChatPromptBuilder,
)


task_name = "LIBERO-Long" #LIBERO-Object, LIBERO-Goal, LIBERO-Long, LIBERO-Spatial"

episodic_vla_dataset = EpisodicRLDSDataset(
    "/mnt/sda/home/zijianwang/openvla/modified_libero_rlds",
    "libero_10_no_noops",
    batch_transform,
    resize_resolution=tuple(vla_model_config.image_sizes),
    shuffle_buffer_size=100_000,
    image_aug=True,
    if_random_start=False,
)

2025-07-25 21:49:40.671903: I tensorflow/core/grappler/optimizers/data/replicate_on_split.cc:32] Running replicate on split optimization


2025-07-25 21:49:41.273350: I tensorflow/core/grappler/optimizers/data/replicate_on_split.cc:32] Running replicate on split optimization


In [6]:
import random
# target_index = random.randint(0, len(episodic_vla_dataset))
target_index = 0
for idx, episode in enumerate(episodic_vla_dataset):
    if idx != target_index:
        continue
    elif idx == target_index:
        length = episode['length']
    
        print(length)

        replay_images = episode['replay_images']
        break
# print(sum_length)

W0000 00:00:1753444190.974029 1767125 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "CropAndResize" attr { key: "T" value { type: DT_FLOAT } } attr { key: "extrapolation_value" value { f: 0 } } attr { key: "method" value { s: "bilinear" } } inputs { dtype: DT_FLOAT shape { dim { size: 1 } dim { size: 224 } dim { size: 224 } dim { size: -7 } } } inputs { dtype: DT_FLOAT shape { dim { size: -2 } dim { size: 4 } } } inputs { dtype: DT_INT32 shape { dim { size: -2 } } } inputs { dtype: DT_INT32 shape { dim { size: 2 } } } device { type: "CPU" vendor: "GenuineIntel" model: "101" frequency: 2500 num_cores: 80 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 32768 l2_cache_size: 1048576 l3_cache_size: 28835840 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { dim { size: -2 } dim { size: -8 } dim { size: -9 } dim { size: -7 } } }
W0000 00:00:1753444191.05

184


In [8]:
print(episode.keys())

dict_keys(['pixel_values', 'input_ids', 'labels', 'dataset_name', 'length', 'replay_images', 'text'])


In [60]:
print(replay_images[1].shape)
print(len(replay_images))
text = episode['text']
print(processor.tokenizer.decode(text))

(3, 224, 224)
268
<s> In: What action should the robot take to put both the alphabet soup and the tomato sauce in the basket?
Out: 


In [None]:
import sys
import imageio
import numpy as np
sys.path.append("../..")
# from experiments.robot.libero.libero_utils import save_rollout_video
mp4_path = "/mnt/sda/home/zijianwang/openvla/vla-scripts/demo_video/1.mp4"
video_writer = imageio.get_writer(mp4_path, fps=30)
for img in replay_images:
    if img.shape[0] == 3 and len(img.shape) == 3:  # (3, H, W) 格式
        img = np.transpose(img, (1, 2, 0))  # 转换为 (H, W, 3)
        video_writer.append_data(img)
video_writer.close()

In [47]:
collator = DataCollatorForCoASupervisedDataset(
    processor.tokenizer.model_max_length, processor.tokenizer.pad_token_id, padding_side="right"
)
dataloader = DataLoader(
    episodic_vla_dataset,
    collate_fn=collator,
    sampler=None,
    batch_size= 1,
    num_workers=0,  # Important =>> Set to 0 if using RLDS; TFDS rolls its own parallelism!
)
print(f"Length of dataloader: {len(dataloader)}")

NameError: name 'DataCollatorForCoASupervisedDataset' is not defined

In [None]:
with tqdm.tqdm(total=20000, leave=False) as progress:
    for batch_idx, batch in enumerate(dataloader):
        progress.update()

In [None]:
while True:  # Infinite loop to keep reading data
    for index, batch in tqdm.tqdm(enumerate(dataloader)):
        lengths = batch['lengths']
        print(index)

In [None]:
print(batch['pixel_values'].shape)
print(batch['input_ids'].shape)
print(batch['labels'].shape)
print(batch['dataset_names'])
print(batch['lengths'])

In [None]:
print(batch['input_ids'][0])

print(processor.tokenizer.batch_decode(batch['input_ids']))

In [None]:
import random
# random.seed(42)  # Fix random seed
start_index = random.randint(0, 110)
print(start_index)