In [1]:
import cv2 as cv
import numpy as np
import torch
from camera_simulator import CameraSimulator

In [2]:
class ImageManipulator:
    """
    Image manipulation functions that work both on batched data and single images.
    Image batch dim is assumed to be [N, W, H, 3]
    Single image dim is assumed to be [W, H, 3]
    """
    @staticmethod
    def calc_mask(images: np.ndarray, bg_value: int = 0, orig_dims: bool = False) -> np.ndarray:
        mask = np.any(images != bg_value, axis=-1)
        if orig_dims:
            mask = np.broadcast_to(np.expand_dims(mask, axis=-1), images.shape)
        return mask

    @staticmethod
    def calc_bboxes(mask_batch: np.ndarray, margin_factor: float = 1.2) -> np.ndarray:
        x = np.any(mask_batch, axis=-1)
        y = np.any(mask_batch, axis=-2)

        def argmin_argmax(arr: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
            # find smallest and largest indices
            imin = np.argmax(arr, axis=-1)
            arr = np.flip(arr, axis=-1)
            length = arr.shape[-1]
            imax = length - np.argmax(arr, axis=-1)

            # add margin to the indices
            diff = imax - imin
            margin = (diff * (margin_factor - 1)).astype(imin.dtype)
            imin = imin - margin
            imax = imax + margin

            # make sure we're within bounds
            imin = np.maximum(imin, 0)
            imax = np.minimum(imax, length - 1)
            return imin, imax

        xmin, xmax = argmin_argmax(x)
        ymin, ymax = argmin_argmax(y)

        return np.stack((xmin, ymin, xmax, ymax), axis=-1)

In [3]:
class ViewProvider:
    def __init__(
        self,
        world_file: str,
        camera_resolution: tuple[int, int] = (300, 300),
        camera_fov: int = 45,
        camera_height: float = 0.75,
        object_position: tuple[int, int, int] = (0, 0, 0),
        render_depth: bool = False,
    ):
        self._camera = CameraSimulator(resolution=camera_resolution, fovy=camera_fov, world_file=world_file)
        self._camera.set_object_position(object_position)

        self._camera_height = camera_height
        self._camera_position = [0, 0, camera_height]
        self._object_position = object_position
        self._render_depth = render_depth

    def _render_image(self):
        if self._render_depth:
            return self._camera.render_depth(torch.eye(3), self._camera_position)
        return self._camera.render(torch.eye(3), self._camera_position)

    def get_view(self, orient: tuple[float, float, float]) -> np.ndarray:
        self._camera.set_object_orientation_euler(orient)
        image = self._render_image()
        return image

    def get_view_batch(self, orient_list: list[tuple[float, float, float]]):
        image_list = []
        for orient in orient_list:
            self._camera.set_object_orientation_euler(orient)
            image = self._render_image()
            image_list.append(image)
        return image_list

    def get_view_cropped(self, orient: tuple[float, float, float], margin_factor: float = 1.2) -> np.ndarray:
        self._camera.set_object_orientation_euler(orient)
        image = self._render_image()
        mask = ImageManipulator.calc_mask(image, bg_value=0, orig_dims=False)
        x1, y1, x2, y2 = ImageManipulator.calc_bboxes(mask, margin_factor)
        cropped = image[x1:x2, y1:y2, :]
        return cropped

In [10]:
viewer = ViewProvider(world_file="data/world_mug_sim.xml", render_depth=False, object_position=(0.3, 0.3, -1))

img = viewer.get_view([0, 0, 0])
cv.imshow("img", img)
cv.waitKey(0)
img = viewer.get_view([0, 0, 1])
cv.imshow("img", img)
cv.waitKey(0)
img = viewer.get_view([0, 0, 2])
cv.imshow("img", img)
cv.waitKey(0)

32

In [5]:
viewer

<__main__.ViewProvider at 0x7f4416ca9750>

In [6]:
viewer = ViewProvider(world_file="data/world_mug_sim.xml")

random_orientations = np.random.uniform(0, 2 * np.pi, size=(1000, 3))
for orient in random_orientations:
    im = viewer.get_view(orient)
    """ cv.imshow("img", im)
    cv.waitKey(0) """

In [7]:
cv.destroyAllWindows()