Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add bounding box sensor #1813

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions habitat-lab/habitat/config/default_structured_configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -589,6 +589,20 @@ class MarkerRelPosSensorConfig(LabSensorConfig):
type: str = "MarkerRelPosSensor"


@dataclass
class HandleBBoxSensorConfig(LabSensorConfig):
r"""
Articulation task only. Returns the bounding box of the handle in pixels
"""
type: str = "HandleBBoxSensor"
height: int = 480
width: int = 640
# The pixel size (in cell) of the border of the bbox
pixel_size: int = 2
# Height - Width for the handle size in meter
handle_size: List[float] = field(default_factory=lambda: [0.02, 0.1])


@dataclass
class TargetStartSensorConfig(LabSensorConfig):
r"""
Expand Down Expand Up @@ -2202,6 +2216,12 @@ class HabitatConfig(HabitatBaseConfig):
name="arm_depth_bbox_sensor",
node=ArmDepthBBoxSensorConfig,
)
cs.store(
package="habitat.task.lab_sensors.handle_bbox_sensor",
group="habitat/task/lab_sensors",
name="handle_bbox_sensor",
node=HandleBBoxSensorConfig,
)
cs.store(
package="habitat.task.lab_sensors.spot_head_stereo_depth_sensor",
group="habitat/task/lab_sensors",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
# LICENSE file in the root directory of this source tree.


from typing import Tuple

import numpy as np
from gym import spaces

Expand All @@ -20,6 +22,11 @@
UsesArticulatedAgentInterface,
rearrange_logger,
)
from habitat.utils.geometry_utils import (
coordinate_from_opengl_to_opencv,
pose_from_opengl_to_opencv,
pose_from_xzy_to_xyz,
)


@registry.register_sensor
Expand Down Expand Up @@ -64,6 +71,212 @@ def get_observation(self, observations, episode, *args, **kwargs):
return np.array(rel_marker_pos)


@registry.register_sensor
class HandleBBoxSensor(UsesArticulatedAgentInterface, Sensor):
"""
Detect handle and return bbox
"""

cls_uuid: str = "handle_bbox"

def __init__(self, sim, config, *args, task, **kwargs):
super().__init__(config=config)
self._sim = sim
self._task = task
self._target_height = config.height
self._target_width = config.width
self._pixel_size = config.pixel_size
self._handle_size = config.handle_size
self._origin_height = None
self._origin_width = None

@staticmethod
def _get_uuid(*args, **kwargs):
return HandleBBoxSensor.cls_uuid

def _get_sensor_type(self, *args, **kwargs):
return SensorTypes.TENSOR

def _get_observation_space(self, *args, config, **kwargs):
return spaces.Box(
shape=(
config.height,
config.width,
1,
),
low=np.finfo(np.float32).min,
high=np.finfo(np.float32).max,
dtype=np.float32,
)

def _get_image_coordinate_from_world_coordinate(
self, point: np.ndarray, target_key: str
) -> Tuple[int, int, bool]:
"""
Project point in the world frame to the image plane

:param point: The 3D xyz point in openCV coordinate
:param target_key: The target key (name) of the sensor. Usually it is articulated_agent_arm_rgb
"""
# Get the camera info
fs_w, fs_h, cam_pose = self._get_camera_param(target_key)

# Do projection from world coordinate into the camera frame
point_cam_coord = np.linalg.inv(cam_pose) @ [
point[0],
point[1],
point[2],
1,
]

# From the camera frame into image pixel frame
# For image width coordinate
w = self._origin_width / 2.0 + (
fs_w * point_cam_coord[0] / point_cam_coord[2]
)
# For image height coordinate
h = self._origin_height / 2.0 + (
fs_h * point_cam_coord[1] / point_cam_coord[2]
)

# check if the point is in front of the camera
is_in_front_of_camera = point_cam_coord[2] > 0
return (w, h, is_in_front_of_camera)

def _get_camera_param(
self, target_key: str
) -> Tuple[float, float, np.ndarray]:
"""
Get the camera parameters from the agent's sensor

:param target_key: The target key (name) of the sensor. Usually it is articulated_agent_arm_rgb
:return: A tuple of camera focal width, focal height, world_T_cam in real-world xyz convention
"""
agent_id = 0 if self.agent_id is None else self.agent_id

# Get focal length
fov = (
float(self._sim.agents[agent_id]._sensors[target_key].hfov)
* np.pi
/ 180
)
fs_w = self._origin_width / (2 * np.tan(fov / 2.0))
fs_h = self._origin_height / (2 * np.tan(fov / 2.0))

# Get the camera pose
hab_cam_T = (
self._sim.agents[agent_id]
._sensors[target_key]
.render_camera.camera_matrix.inverted()
)
world_T_cam = pose_from_xzy_to_xyz(
pose_from_opengl_to_opencv(np.array(hab_cam_T))
)
return fs_w, fs_h, world_T_cam

def _get_image_pixel_from_point(
self, point: np.ndarray, target_key: str, img: np.ndarray
):
"""
Get image pixel from point

:param point: The 3D xyz point in openCV coordinate
:param target_key: The target key (name) of the sensor. Usually it is articulated_agent_arm_rgb

:return: The bounding box of the handle
"""
# Get the pixel coordinate in 2D
(
w,
h,
is_in_front_of_camera,
) = self._get_image_coordinate_from_world_coordinate(point, target_key)

if is_in_front_of_camera:
# Clip the width and length
w_low = int(np.clip(w - self._pixel_size, 0, self._origin_width))
w_high = int(np.clip(w + self._pixel_size, 0, self._origin_width))
h_low = int(np.clip(h - self._pixel_size, 0, self._origin_height))
h_high = int(np.clip(h + self._pixel_size, 0, self._origin_height))
img[h_low:h_high, w_low:w_high, 0] = 1.0
return img

def _get_bbox(self, img):
"""
Simple function to get the bounding box (1 for detected object and 0 otherwise), assuming that only one object of interest in the image

:param img: The binary image with non rectangle shape of handles
:return: The bounding box (rectangle) of the handle in which 1 indicates the handle and 0 otherwise
"""
bbox = np.zeros(img.shape)

# No handle
if np.sum(img) == 0:
return bbox

# Get the max and min value of each row and column
rows = np.any(img, axis=1)
cols = np.any(img, axis=0)
rmin, rmax = np.where(rows)[0][[0, -1]]
cmin, cmax = np.where(cols)[0][[0, -1]]

# Get the bounding box
bbox = np.zeros(img.shape)
bbox[rmin:rmax, cmin:cmax] = 1.0

return bbox

def _crop_image(self, img):
"""Crop the image based on the target size"""
height_start = int((self._origin_height - self._target_height) / 2)
width_start = int((self._origin_width - self._target_width) / 2)
img = img[
height_start : height_start + self._target_height,
width_start : width_start + self._target_width,
]
return img

def get_observation(self, observations, episode, task, *args, **kwargs):
# Get a correct observation space
if self.agent_id is None:
target_key = "articulated_agent_arm_rgb"
assert target_key in observations
else:
target_key = f"agent_{self.agent_id}_articulated_agent_arm_rgb"
assert target_key in observations

# Get the image size
self._origin_height, self._origin_width, _ = observations[
target_key
].shape
img = np.zeros((self._origin_height, self._origin_width, 1))

# Get the handle transformation
handle_T = self._task.get_use_marker().current_transform

# Draw the handle location in the image
height, width = self._handle_size[0] / 2, self._handle_size[1] / 2
granularity = 11
for height_offset, width_offset in zip(
np.linspace(-height, height, granularity),
np.linspace(-width, width, granularity),
):
# Get the handle location on the left and right side
handle_pos = coordinate_from_opengl_to_opencv(
handle_T.transform_point([height_offset, 0.0, width_offset])
)
# Draw the handle location in the image
img = self._get_image_pixel_from_point(handle_pos, target_key, img)

# Get the bbox
img = self._get_bbox(img)
jimmytyyang marked this conversation as resolved.
Show resolved Hide resolved

# Crop the image
img = self._crop_image(img)

return np.float32(img)


@registry.register_sensor
class ArtJointSensor(Sensor):
"""
Expand Down Expand Up @@ -143,7 +356,7 @@ def reset_metric(self, *args, episode, task, observations, **kwargs):
episode=episode,
task=task,
observations=observations,
**kwargs
**kwargs,
)

def update_metric(self, *args, episode, task, observations, **kwargs):
Expand All @@ -168,7 +381,7 @@ def reset_metric(self, *args, episode, task, observations, **kwargs):
episode=episode,
task=task,
observations=observations,
**kwargs
**kwargs,
)

def update_metric(self, *args, episode, task, observations, **kwargs):
Expand Down Expand Up @@ -205,7 +418,7 @@ def reset_metric(self, *args, episode, task, observations, **kwargs):
episode=episode,
task=task,
observations=observations,
**kwargs
**kwargs,
)

def update_metric(self, *args, episode, task, observations, **kwargs):
Expand Down Expand Up @@ -252,7 +465,7 @@ def reset_metric(self, *args, episode, task, observations, **kwargs):
episode=episode,
task=task,
observations=observations,
**kwargs
**kwargs,
)

def update_metric(self, *args, task, **kwargs):
Expand Down Expand Up @@ -316,7 +529,7 @@ def reset_metric(self, *args, episode, task, observations, **kwargs):
episode=episode,
task=task,
observations=observations,
**kwargs
**kwargs,
)

def update_metric(self, *args, episode, task, observations, **kwargs):
Expand All @@ -325,7 +538,7 @@ def update_metric(self, *args, episode, task, observations, **kwargs):
episode=episode,
task=task,
observations=observations,
**kwargs
**kwargs,
)
reward = self._metric
link_state = task.measurements.measures[
Expand Down
Loading