In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import os
os.environ["HF_HUB_CACHE"] = "/mnt/sda/home/zijianwang/HF_CACHE"
from collections import deque
from dataclasses import dataclass
from pathlib import Path
from typing import Optional

import draccus
import torch
import torch.distributed as dist
import tqdm
from accelerate import PartialState
from peft import LoraConfig, PeftModel, get_peft_model, prepare_model_for_kbit_training
from torch.nn.parallel import DistributedDataParallel as DDP
from torch.optim import AdamW
from torch.utils.data import DataLoader
from transformers import AutoModelForVision2Seq, AutoProcessor, BitsAndBytesConfig
from transformers import AutoConfig, AutoImageProcessor
from transformers.modeling_outputs import CausalLMOutputWithPast

import wandb
from prismatic.models.backbones.llm.prompting import PurePromptBuilder, VicunaV15ChatPromptBuilder
from prismatic.util.data_utils import PaddedCollatorForActionPrediction
from prismatic.vla.action_tokenizer import ActionTokenizer
from prismatic.vla.datasets import RLDSBatchTransform, RLDSDataset, EpisodicRLDSDataset
from prismatic.vla.datasets.rlds.utils.data_utils import save_dataset_statistics

from prismatic.extern.hf.configuration_prismatic import OpenVLAConfig
from prismatic.extern.hf.modeling_prismatic import OpenVLAForActionPrediction
from prismatic.extern.hf.processing_prismatic import PrismaticImageProcessor, PrismaticProcessor

# Sane Defaults
os.environ["TOKENIZERS_PARALLELISM"] = "false"

In [None]:
AutoConfig.register("openvla", OpenVLAConfig)
AutoImageProcessor.register(OpenVLAConfig, PrismaticImageProcessor)
AutoProcessor.register(OpenVLAConfig, PrismaticProcessor)
AutoModelForVision2Seq.register(OpenVLAConfig, OpenVLAForActionPrediction)
processor = AutoProcessor.from_pretrained("openvla/openvla-7b", trust_remote_code=True)
action_tokenizer = ActionTokenizer(processor.tokenizer)
vocab_size = action_tokenizer.vocab_size
print("词表大小:", vocab_size)
vla_model_config = OpenVLAConfig.from_pretrained("openvla/openvla-7b")


In [None]:
batch_transform = RLDSBatchTransform(
    action_tokenizer,
    processor.tokenizer,
    image_transform=processor.image_processor.apply_transform,
    prompt_builder_fn=PurePromptBuilder if "v01" not in "openvla/openvla-7b" else VicunaV15ChatPromptBuilder,
)

vla_dataset = RLDSDataset(
    "/mnt/sda/home/zijianwang/openvla/modified_libero_rlds",
    "libero_goal_no_noops",
    batch_transform,
    resize_resolution=tuple(vla_model_config.image_sizes),
    shuffle_buffer_size=100_000,
    image_aug=True,
)

episodic_vla_dataset = EpisodicRLDSDataset(
    "/mnt/sda/home/zijianwang/openvla/modified_libero_rlds",
    "libero_object_no_noops", # "libero_goal_no_noops", libero_10, libero_spatial, libero_object, libero_goal
    batch_transform,
    resize_resolution=tuple(vla_model_config.image_sizes),
    shuffle_buffer_size=100_000,
    image_aug=False,
    if_random_start=False
)

In [None]:
import re, json
import numpy as np
import pickle
import imageio, os
import numpy as np
from infer_utils import predict_CoA, add_text_to_image

task_desc = json.load(open('task_descriptions.json', 'r'))
i = 0
found_tasks = set()  # 用于跟踪已找到的任务
task_description_counts = {}  # 用于跟踪每个任务描述出现的次数
total_tasks = sum(len(tasks) for tasks in task_desc.values())  # 计算总任务数

for data in episodic_vla_dataset:
    print(f"************* Processing episode {i} *************")
    text = processor.decode(data["text"])
    # print(text)
    pattern = r'In:\s*(.*?)\s*Out:'
    match = re.search(pattern, text, re.DOTALL)
    if match:
        extracted_text = match.group(1).strip()
        # 移除"What action should the robot take to"前缀和末尾的问号
        if extracted_text.startswith("What action should the robot take to "):
            extracted_text = extracted_text[len("What action should the robot take to "):]
        if extracted_text.endswith("?"):
            extracted_text = extracted_text[:-1]
    print(extracted_text)
    # 验证 task_desc 的 value 中是否包含了 extracted_text
    found_match = False
    for task_name, tasks in task_desc.items():
        for task_id, description in tasks.items():
            if extracted_text in description or description in extracted_text:
                print(f"Found match in {task_name}, task {task_id}: {description}")
                found_task_name = task_name
                found_task_id = task_id
                found_match = True
                found_tasks.add((task_name, task_id))  # 记录已找到的任务
                
                # 记录任务描述出现的次数
                task_key = f"{task_name}_{task_id}"
                if task_key not in task_description_counts:
                    task_description_counts[task_key] = 0
                task_description_counts[task_key] += 1
                
                break
        if found_match:
            break
    
    if not found_match:
        print(f"No match found for: {extracted_text}")
    action_ids = data["action"]
    action_ids = np.array(action_ids) 
    imgs = data["replay_images"] #List[np.ndarray]
    # 声明 action_ids 的长度是 imgs 的 8 倍
    assert len(action_ids) == len(imgs) * 8, f"action_ids length {len(action_ids)} should be 8 times imgs length {len(imgs)}"
    # 把action_ids每8个分为1组
    action_ids_grouped = [action_ids[i:i+8] for i in range(0, len(action_ids), 8)]
    
    # 使用任务描述出现次数的负数作为episode编号
    task_key = f"{found_task_name}_{found_task_id}"
    episode_num = -task_description_counts[task_key]
    trajectory_save_dir = f"winner_trajectory/{found_task_name}/task_{found_task_id}_episode_{episode_num}_success/"
    os.makedirs(trajectory_save_dir, exist_ok=True)

    mp4_path = os.path.join(trajectory_save_dir, f"Avideo_{len(imgs)}.mp4")
    video_writer = imageio.get_writer(mp4_path, fps=30)
    for step, img in enumerate(imgs[:]):    
        # img = add_text_to_image(img, 1, step)
        video_writer.append_data(img)
    video_writer.close()

    # # Save obs and action_ids with timestep information
    
    for j in range(len(imgs)):
        img = imgs[j]
        info = {
            "obs": img,
            "action_ids": action_ids_grouped[j],
        }
        obs_filename = os.path.join(trajectory_save_dir, f"step_{j}.pkl")
        with open(obs_filename, 'wb') as f:
            pickle.dump(info, f)
    print(f"Saved {obs_filename}")
    i += 1
    
    # 检查终止条件：所有任务都被找到或遍历了1000次
    if len(found_tasks) >= total_tasks or i >= 1000:
        print(f"Stopping loop: found {len(found_tasks)}/{total_tasks} tasks, processed {i} episodes")
        break
    
    # if i > 1000:
    #     break
    # break


In [2]:
import re, os
from collections import defaultdict

data_path = "/mnt/sda/home/zijianwang/openvla/vla-scripts/DPO/winner_trajectory/libero_10"
task_trajectories = defaultdict(list)

for folder_name in os.listdir(data_path):
    match = re.search(r"task_(\d+)_episode_(-?\d+)_(failure|success)", folder_name)
    if match and match.group(3) == "success":
        task_num = match.group(1)
        # Count pkl files in this trajectory folder
        trajectory_folder_path = os.path.join(data_path, folder_name)
        pkl_files = [f for f in os.listdir(trajectory_folder_path) if f.endswith(".pkl")]
        traj_len = len(pkl_files)
        task_trajectories[task_num].append((folder_name, traj_len))

# For each task, sort by trajectory length and get the shortest 10
print("Shortest 10 trajectories for each task:")
for task_num in sorted(task_trajectories.keys()):
    task_trajectories[task_num].sort(key=lambda x: x[1])
    shortest_10 = task_trajectories[task_num][:10]
    
    print(f"\nTask {task_num}:")
    for folder_name, traj_len in shortest_10:
        print(f"  {folder_name}: {traj_len} steps")

Shortest 10 trajectories for each task:

Task 0:
  task_0_episode_46_success: 249 steps
  task_0_episode_21_success: 249 steps
  task_0_episode_-7_success: 250 steps
  task_0_episode_-3_success: 255 steps
  task_0_episode_1_success: 260 steps
  task_0_episode_-2_success: 265 steps
  task_0_episode_25_success: 266 steps
  task_0_episode_33_success: 266 steps
  task_0_episode_29_success: 266 steps
  task_0_episode_23_success: 269 steps

Task 1:
  task_1_episode_2_success: 223 steps
  task_1_episode_42_success: 226 steps
  task_1_episode_27_success: 229 steps
  task_1_episode_45_success: 233 steps
  task_1_episode_-8_success: 234 steps
  task_1_episode_44_success: 235 steps
  task_1_episode_-13_success: 237 steps
  task_1_episode_37_success: 238 steps
  task_1_episode_-5_success: 238 steps
  task_1_episode_-14_success: 239 steps

Task 2:
  task_2_episode_4_success: 206 steps
  task_2_episode_22_success: 210 steps
  task_2_episode_15_success: 228 steps
  task_2_episode_25_success: 229 step

# Get Task info from test set

In [None]:
from experiments.robot.libero.libero_utils import get_libero_env

In [None]:
from libero.libero import benchmark
benchmark_dict = benchmark.get_benchmark_dict()
print(benchmark_dict.keys())
task_desc = {}
task_names = ['libero_spatial', 'libero_object', 'libero_goal', 'libero_10']
for task_name in task_names:
    task_suite = benchmark_dict[task_name]()  #libero_spatial, libero_object, libero_goal, libero_10, libero_90
    num_tasks_in_suite = task_suite.n_tasks
    print(f"Number of tasks in {task_name}: {num_tasks_in_suite}")
    task_desc[task_name] = {}
    for task_id in range(num_tasks_in_suite):
        task = task_suite.get_task(task_id)
        env, task_description = get_libero_env(task, "openvla", resolution=256)
        print(f"Task {task_id} description: {task_description}")
        task_desc[task_name][task_id] = task_description