In [1]:
import cv2 as cv
import numpy as np
from camera_simulator import CameraSimulator

In [2]:
class ImageManipulator:
    """
    Image manipulation functions that work both on batched data and single images.
    Image batch dim is assumed to be [N, W, H, 3]
    Single image dim is assumed to be [W, H, 3]
    """

    @staticmethod
    def calc_mask(images: np.ndarray, bg_value: int = 0, orig_dims: bool = False) -> np.ndarray:
        mask = np.any(images != bg_value, axis=-1)
        if orig_dims:
            mask = np.broadcast_to(np.expand_dims(mask, axis=-1), images.shape)
        return mask

    @staticmethod
    def calc_bboxes(mask_batch: np.ndarray, margin_factor: float = 1.2) -> np.ndarray:
        x = np.any(mask_batch, axis=-1)
        y = np.any(mask_batch, axis=-2)

        def argmin_argmax(arr: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
            # find smallest and largest indices
            imin = np.argmax(arr, axis=-1)
            arr = np.flip(arr, axis=-1)
            length = arr.shape[-1]
            imax = length - np.argmax(arr, axis=-1)

            # add margin to the indices
            diff = imax - imin
            margin = (diff * (margin_factor - 1)).astype(imin.dtype)
            imin = imin - margin
            imax = imax + margin

            # make sure we're within bounds
            imin = np.maximum(imin, 0)
            imax = np.minimum(imax, length - 1)
            return imin, imax

        xmin, xmax = argmin_argmax(x)
        ymin, ymax = argmin_argmax(y)

        return np.stack((xmin, ymin, xmax, ymax), axis=-1)

In [3]:
class GenericSampler:
    def __init__(
        self,
        world_file: str,
        obj_pos: tuple[int, int, int],
        cam_pos: tuple[int, int, int],
        cam_rot: np.ndarray,
        cam_res: tuple[int, int] = (300, 300),
        cam_fov: int = 45,
        cam_depth: bool = False,
        sim_time: float = 0,
    ):
        self._cam = CameraSimulator(resolution=cam_res, fovy=cam_fov, world_file=world_file)

        self._cam_pos = cam_pos
        self._cam_rot = cam_rot
        self._obj_pos = obj_pos
        self._cam_depth = cam_depth
        self._sim_time = sim_time

    def _render(self):
        if self._cam_depth:
            return self._cam.render_depth(self._cam_rot, self._cam_pos)
        return self._cam.render(self._cam_rot, self._cam_pos)

    def get_view(self, orient: tuple[float, float, float]) -> np.ndarray:
        self._cam.set_object_position(self._obj_pos)
        self._cam.set_obj_orient_euler(orient)
        self._cam.simulate_seconds(self._sim_time)
        image = self._render()
        return image

    def get_view_cropped(self, orient: tuple[float, float, float], margin_factor: float = 1.2) -> np.ndarray:
        image = self.get_view(orient)
        mask = ImageManipulator.calc_mask(image, bg_value=0, orig_dims=False)
        x1, y1, x2, y2 = ImageManipulator.calc_bboxes(mask, margin_factor)
        cropped = image[x1:x2, y1:y2, :]
        return cropped

    def get_view_batch(self, orient_list: list[tuple[float, float, float]]) -> np.ndarray:
        images = []
        for orient in orient_list:
            image = self.get_view(orient)
            images.append(image)
        images = np.stack(images, axis=0)
        return images


class OrientSampler(GenericSampler):
    def __init__(
        self,
        world_file: str,
        obj_pos: tuple[int, int, int] = (0, 0, -0.7),
        cam_res: tuple[int, int] = (300, 300),
        cam_fov: int = 45,
        cam_depth: bool = False,
    ):
        super().__init__(
            world_file=world_file,
            obj_pos=obj_pos,
            cam_pos=[0, 0, 0],  # camera is at the origin
            cam_rot=np.identity(3),  # camera is looking down at the z axis
            cam_res=cam_res,
            cam_fov=cam_fov,
            cam_depth=cam_depth,
            sim_time=0,  # dont simulate
        )

In [None]:
viewer = OrientSampler(world_file="data/world_mug_sim.xml", obj_pos=(0, 0, -0.7))

random_orientations = np.random.uniform(0, 2 * np.pi, size=(10, 3))
for orient in random_orientations:
    im = viewer.get_view(orient)
    cv.imshow("img", im)
    cv.waitKey(0)

In [8]:
from camera_utils import xy_axes_to_rotation
cam_rot = xy_axes_to_rotation([1, 0, 0], [0, 0, 1])

viewer = GenericSampler(
    world_file="data/world_mug.xml",
    obj_pos=(0, 1, 0.3),
    cam_pos=(0, 0, 0.1),
    cam_rot=cam_rot,
    sim_time=3,
    cam_fov=60,
)

random_orientations = np.random.uniform(0, 2 * np.pi, size=(100, 3))
for orient in random_orientations:
    im = viewer.get_view(orient)
    cv.imshow("img", im)
    cv.waitKey(0)

In [None]:
cv.destroyAllWindows()