In [190]:
import sys
import numpy as np
from rlbench.action_modes.action_mode import MoveArmThenGripper
from rlbench.action_modes.arm_action_modes import ArmActionMode, JointVelocity, JointPosition, EndEffectorPoseViaPlanning, EndEffectorPoseViaIK
from rlbench.action_modes.gripper_action_modes import Discrete
from rlbench.environment import Environment
from rlbench.observation_config import ObservationConfig, CameraConfig
from rlbench.tasks import ReachTarget, PickAndLift, StackBlocks, PushButton, StackBlocks, PickUpCup, PlaceHangerOnRack
import matplotlib.pyplot as plt
from transformers import AutoModelForVision2Seq, AutoProcessor
from PIL import Image
import torch
from transformers import BitsAndBytesConfig
from pyquaternion import Quaternion
from rlbench.backend.robot import Robot
from scipy.spatial.transform import Rotation
from rlbench.backend.scene import Scene
from pathlib import Path
import os, json

In [2]:
model_path = "/home/lawrence/VLA-RL/openvla/runs/openvla-7b+test1+b2+lr-2e-05+lora-r4+dropout-0.0+q-4bit"#"/home/lawrence/openvla_7b"

In [3]:
class Agent(object):
    def __init__(self):
        config = BitsAndBytesConfig(
            load_in_8bit=True,
            llm_int8_skip_modules = ['projector'],
            llm_int8_enable_fp32_cpu_offload=True,
            # bnb_4bit_quant_type="nf4",
            # bnb_4bit_use_double_quant=True,
            # llm_int8_enable_fp32_cpu_offload=True,
            # bnb_4bit_compute_dtype=torch.bfloat16
        )
        quantization_config = BitsAndBytesConfig(
            load_in_4bit=True, bnb_4bit_compute_dtype=torch.bfloat16, bnb_4bit_quant_type="nf4", llm_int8_skip_modules = ['projector'],
        )
        self.openvla_path = "/home/lawrence/VLA-RL/openvla/runs/openvla-7b+test1+b2+lr-2e-05+lora-r4+dropout-0.0+q-4bit"#"/home/lawrence/openvla_7b"
        self.processor = AutoProcessor.from_pretrained(self.openvla_path, trust_remote_code=True)
        self.vla = AutoModelForVision2Seq.from_pretrained(
                        self.openvla_path, 
                        attn_implementation="sdpa",  # [Optional] Requires `flash_attn`
                        torch_dtype=torch.bfloat16, 
                        quantization_config=quantization_config,
                        low_cpu_mem_usage=True, 
                        trust_remote_code=True,
                        device_map="auto"
                    )
        if os.path.isdir(self.openvla_path):
            with open(Path(self.openvla_path) / "dataset_statistics.json", "r") as f:
                self.vla.norm_stats = json.load(f)
    def act(self, obs, instr, unnorm_key="rlbench", do_sample=True):
        prompt = "In: What action should the robot take to {<INSTRUCTION>}?\nOut:"
        prompt = prompt.replace("<INSTRUCTION>", instr.lower())
        image = Image.fromarray(obs)
        inputs = self.processor(prompt, image).to(self.vla.device, dtype=torch.bfloat16)
        action = self.vla.predict_action(**inputs, unnorm_key=unnorm_key, do_sample=do_sample)    
        return action

# stanford_hydra_dataset_converted_externally_to_rlds 1
# berkeley_autolab_ur5 


In [4]:
agent = Agent()

The model weights are not tied. Please use the `tie_weights` method before using the `infer_auto_device` function.
Loading checkpoint shards: 100%|██████████| 4/4 [00:38<00:00,  9.58s/it]


In [7]:
agent.vla.norm_stats

{'rlbench': {'action': {'q01': [0.014743955805897713,
    -0.259696900844574,
    0.8708521127700806,
    -3.141531217929336,
    -0.04291734929919966,
    -3.125942467823675,
    0.0],
   'q99': [0.4629177749156952,
    0.33081406354904175,
    1.1301019191741943,
    3.141589175004991,
    0.002644044521300959,
    3.0656777837205063,
    1.0]}}}

In [3]:
camera = CameraConfig(image_size=(224, 224), depth=False, point_cloud=False, mask=False)
obs_config = ObservationConfig(left_shoulder_camera=camera, right_shoulder_camera=camera, front_camera=camera, overhead_camera=camera)

env = Environment(
    action_mode=MoveArmThenGripper(
        arm_action_mode=EndEffectorPoseViaPlanning(absolute_mode=True, collision_checking=False), gripper_action_mode=Discrete()),
    obs_config=obs_config,
    headless=False)
env.launch()



In [191]:
task = env.get_task(PlaceHangerOnRack)

In [23]:
trans = np.ones(3)*(-0.)
rota = np.ones(3)*np.pi*0
quat = Rotation.from_euler('xyz', rota).as_quat()
action = np.concatenate([trans, quat, np.array([1])])
action

array([-0., -0., -0.,  0.,  0.,  0.,  1.,  1.])

In [195]:
descriptions, obs = task.reset()

In [200]:
obs.gripper_pose

array([ 2.78505206e-01, -8.16359185e-03,  1.47192001e+00, -8.24363906e-06,
        9.92671311e-01, -2.51155626e-07,  1.20845728e-01])

In [211]:
action1 = np.array([ 2.78505206e-01, 0.3,  1.47192001e+00, -8.24363906e-06,
        9.92671311e-01, -2.51155626e-07,  1.20845728e-01, 1])

In [212]:
task.step(action1)

InvalidActionError: A path could not be found. Most likely due to the target being inaccessible or a collison was detected.

In [18]:
# instr = descriptions[1]
# prompt = "In: What action should the robot take to {<INSTRUCTION>}?\nOut:"
# prompt = prompt.replace("<INSTRUCTION>", instr.lower())
# image = Image.fromarray(obs.front_rgb)
# inputs = agent.processor(prompt, image).to(agent.vla.device, dtype=torch.bfloat16)
# action = agent.vla.predict_action(**inputs, unnorm_key="Freiburg_Franka_Play", do_sample=True)
# action

In [8]:
agent.vla.config.norm_stats.keys()

dict_keys(['austin_buds_dataset_converted_externally_to_rlds', 'austin_sailor_dataset_converted_externally_to_rlds', 'austin_sirius_dataset_converted_externally_to_rlds', 'bc_z', 'berkeley_autolab_ur5', 'berkeley_cable_routing', 'berkeley_fanuc_manipulation', 'bridge_orig', 'cmu_stretch', 'dlr_edan_shared_control_converted_externally_to_rlds', 'dobbe', 'fmb_dataset', 'fractal20220817_data', 'furniture_bench_dataset_converted_externally_to_rlds', 'iamlab_cmu_pickup_insert_converted_externally_to_rlds', 'jaco_play', 'kuka', 'nyu_franka_play_dataset_converted_externally_to_rlds', 'roboturk', 'stanford_hydra_dataset_converted_externally_to_rlds', 'taco_play', 'toto', 'ucsd_kitchen_dataset_converted_externally_to_rlds', 'utaustin_mutex', 'viola'])

In [10]:
task = env.get_task(PickUpCup)
training_steps = 1000
episode_length = 100
obs = None
for i in range(training_steps):
    if i % episode_length == 0:
        print('Reset Episode')
        descriptions, obs = task.reset()
        print(descriptions[1])
    try:
        action = agent.act(obs.overhead_rgb,descriptions[0], do_sample=True, unnorm_key="rlbench")
        action_rotation = Rotation.from_euler('xyz', action[3:6])
        action_quaternion = action_rotation.as_quat()
        # print(delta_quaternion)  # returns (qx, qy, qz, qw)
        action = np.concatenate([action[0:3], action_quaternion, action[-1:]])
        print(action)
        obs, reward, terminate = task.step(action)
        print(reward)
    except Exception as e:
        print(e)
        continue

Reset Episode
grasp the red cup and lift it
[ 0.25816385  0.11197765  1.12959359  0.01382961 -0.99983103 -0.00601583
  0.01050989  0.5       ]
0.0
[ 0.2282856   0.07029452  1.00047702 -0.04395693 -0.99897612  0.00580022
  0.00899201  0.5       ]
0.0
[ 0.28979965  0.01934848  1.04216032  0.0023403  -0.6917024  -0.7219788
  0.01699761  0.5       ]
0.0
[ 0.29682983 -0.010756    1.01369367  0.83535047 -0.5113054  -0.10533886
  0.17222106  0.5       ]
0.0
[0.29331474 0.03555858 1.12959359 0.93103951 0.36472497 0.01170498
 0.00203121 0.99803922]
0.0
[ 0.30386001  0.07029452  0.87136045 -0.04398412 -0.99900278  0.00598518
  0.00479747  0.5       ]
0.0
[0.25289122 0.02629566 1.05537697 0.03623882 0.00740478 0.16610425
 0.98541428 0.5       ]
A path could not be found. Most likely due to the target being inaccessible or a collison was detected.
[ 0.30561755 -0.02465038  1.12959359 -0.04394809 -0.9989639   0.00574116
  0.01033066  0.5       ]
0.0
[ 0.30034492  0.07029452  1.12146026  0.01383861 

KeyboardInterrupt: 

In [213]:
env.shutdown()

[CoppeliaSim:loadinfo]   done.


In [None]:
action

array([-0.02001152,  0.02458013, -0.01649398,  0.00402673, -0.01346854,
        0.02819962,  0.99607843])