In [None]:
from transformers import AutoModelForVision2Seq, AutoProcessor, BitsAndBytesConfig
from PIL import Image
import torch
from tqdm import tqdm
from os import listdir

In [None]:
torch.cuda.is_available()

In [None]:
# Load Processor & VLA
# openvla_4_bit, openvla_8_bit, openvla-7b-model
processor = AutoProcessor.from_pretrained(
    pretrained_model_name_or_path="../saved_model/openvla-7b-model", 
    trust_remote_code=True
)
vla = AutoModelForVision2Seq.from_pretrained(
    pretrained_model_name_or_path="../saved_model/openvla-7b-model", 
    # attn_implementation="flash_attention_2",  # [Optional] Requires `flash_attn`
    torch_dtype=torch.bfloat16, 
    low_cpu_mem_usage=True, 
    trust_remote_code=True,
    device_map="auto",
    quantization_config=BitsAndBytesConfig(load_in_8bit=True)
)

In [None]:
# # save processor & vla state
# processor.save_pretrained(
#     save_directory="saved_model/openvla_4_bit"
# )
# vla.save_pretrained(
#     save_directory="saved_model/openvla_4_bit"
# )

In [None]:
# check vla on which device(s)
vla.device

In [None]:
# Grab image input & format prompt

grape_dir = "../data/tabletop_dark_wood/pnp_sweep/00/2023-01-26_15-06-44/raw/traj_group0/traj0/images0"
grape_prompt = "Take the grapes and place it in the silver vessel."

# image: Image.Image = get_from_camera(...)

image: Image.Image = Image.open(grape_dir + "/im_1.jpg")

# prompt = "In: What action should the robot take to {<INSTRUCTION>}?\nOut:"

pic_dir = grape_dir
prompt = grape_prompt

In [None]:
# check image demo
image

In [None]:
# Predict Action (7-DoF; un-normalize for BridgeData V2)
inputs = processor(prompt, image).to("cuda:0", dtype=torch.bfloat16)
action = vla.predict_action(**inputs, unnorm_key="bridge_orig", do_sample=False)

In [None]:
# Execute...
# robot.act(action, ...)

action

In [None]:
# batch processing
actions = []

length = len(listdir(pic_dir))
for i in tqdm(range(0, length)):
    image_path = pic_dir + "/im_" + str(i) + ".jpg"
    image: Image.Image = Image.open(image_path)
    inputs = processor(prompt, image).to("cuda:0", dtype=torch.bfloat16)
    action = vla.predict_action(**inputs, unnorm_key="bridge_orig", do_sample=False)
    actions.append(action)

for i in range(0, length):
    print(actions[i])

In [None]:
import pickle

file_name = "obs_dict.pkl"  # agent_data.pkl, obs_dict.pkl, policy_out.pkl
# 打开一个文件用于读取
with open('../data/tabletop_dark_wood/pnp_sweep/00/2023-01-26_15-06-44/raw/traj_group0/traj0/' + file_name, 'rb') as f:
    # 使用pickle.load()从文件中读取序列化的对象并还原为原来的Python对象
    loaded_data = pickle.load(f)

# 打印加载的数据
print(loaded_data["full_state"])

In [None]:
import pickle

file_name = "obs_dict.pkl"  # agent_data.pkl, obs_dict.pkl, policy_out.pkl
# 打开一个文件用于读取
with open('../data/tabletop_dark_wood/pnp_sweep/00/2023-01-26_15-06-44/raw/traj_group0/traj0/' + file_name, 'rb') as f:
    # 使用pickle.load()从文件中读取序列化的对象并还原为原来的Python对象
    loaded_data = pickle.load(f)

# 打印加载的数据
print(loaded_data)

In [None]:
import torch

dtype = torch.float16
A = torch.tensor([[1.]], dtype=dtype).cuda()
B = torch.tensor([[1.001]], dtype=dtype).cuda()
test1 = torch.matmul(A, B)

A = torch.tensor([1.], dtype=dtype).cuda()
B = torch.tensor([1.001], dtype=dtype).cuda()
test2 = torch.matmul(A, B)

dtype = torch.bfloat16
A = torch.tensor([[1.]], dtype=dtype).cuda()
B = torch.tensor([[1.001]], dtype=dtype).cuda()
test3 = torch.matmul(A, B)

A = torch.tensor([1.], dtype=dtype).cuda()
B = torch.tensor([1.001], dtype=dtype).cuda()
test4 = torch.matmul(A, B)

print(test1), print(test2), print(test3), print(test4)