In [2]:
import cv2 as cv
import numpy as np
from camera_simulator import CameraSimulator

In [3]:
class ImageManipulator:
    """
    Image manipulation functions that work both on batched data and single images.
    Image batch dim is assumed to be [N, W, H, 3]
    Single image dim is assumed to be [W, H, 3]
    """

    @staticmethod
    def calc_mask(images: np.ndarray, bg_value: int = 0, orig_dims: bool = False) -> np.ndarray:
        mask = np.any(images != bg_value, axis=-1)
        if orig_dims:
            mask = np.broadcast_to(np.expand_dims(mask, axis=-1), images.shape)
        return mask

    @staticmethod
    def calc_bboxes(mask_batch: np.ndarray, margin_factor: float = 1.2) -> np.ndarray:
        x = np.any(mask_batch, axis=-1)
        y = np.any(mask_batch, axis=-2)

        def argmin_argmax(arr: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
            # find smallest and largest indices
            imin = np.argmax(arr, axis=-1)
            arr = np.flip(arr, axis=-1)
            length = arr.shape[-1]
            imax = length - np.argmax(arr, axis=-1)

            # add margin to the indices
            diff = imax - imin
            margin = (diff * (margin_factor - 1)).astype(imin.dtype)
            imin = imin - margin
            imax = imax + margin

            # make sure we're within bounds
            imin = np.maximum(imin, 0)
            imax = np.minimum(imax, length - 1)
            return imin, imax

        xmin, xmax = argmin_argmax(x)
        ymin, ymax = argmin_argmax(y)

        return np.stack((xmin, ymin, xmax, ymax), axis=-1)

In [4]:
from manipulated_object import ManipulatedObject


class ImageSampler:
    def __init__(
        self,
        world_file: str,
        object_position: tuple[int, int, int],
        camera_position: tuple[int, int, int],
        camera_rotation: np.ndarray,
        camera_resolution: tuple[int, int] = (300, 300),
        camra_fov: int = 45,
        render_depth: bool = False,
        simulation_time: float = 0,
    ):
        self._camera = CameraSimulator(resolution=camera_resolution, fovy=camra_fov, world_file=world_file)
        self._camera_position = camera_position
        self._camera_rotation = camera_rotation
        self._object_position = object_position
        self._simulate_depth = render_depth
        self._simulation_time = simulation_time

    def _render(self):
        if self._simulate_depth:
            return self._camera.render_depth(self._camera_rotation, self._camera_position)
        return self._camera.render(self._camera_rotation, self._camera_position)

    def get_view(
        self,
        orient: tuple[float, float, float],
    ) -> tuple[np.ndarray, tuple[float, float, float]]:

        self._camera.set_object_position(self._object_position)
        self._camera.set_object_orientation(orient)
        self._camera.simulate_seconds(self._simulation_time)
        image = self._render()
        return image, self._camera.get_object_orientation()

    def get_view_cropped(
        self,
        orient: tuple[float, float, float],
        margin_factor: float = 1.2,
    ) -> tuple[np.ndarray, tuple[float, float, float]]:

        image, orient = self.get_view(orient)
        mask = ImageManipulator.calc_mask(image, bg_value=0, orig_dims=False)
        x1, y1, x2, y2 = ImageManipulator.calc_bboxes(mask, margin_factor)
        cropped = image[x1:x2, y1:y2, :]
        return cropped, orient

    def get_view_batch(
        self,
        orient_list: list[tuple[float, float, float]],
    ) -> tuple[np.ndarray, np.ndarray]:
        
        image_list = []
        orient_list = []
        for orient in orient_list:
            image, orient = self.get_view(orient)
            image_list.append(image)
            orient_list.append(orient)
        images = np.stack(image_list, axis=0)
        orients = np.asanyarray(orient_list)
        return images, orients

In [4]:
from camera_utils import xy_axes_to_rotation
camera_rotation = xy_axes_to_rotation([1, 0, 0], [0, 0, 1])

viewer = ImageSampler(
    world_file="data/world_mug_sim.xml",
    object_position=(0, 1.3, 0.1),
    camera_position=(0, 0, 0.1),           
    camera_rotation=camera_rotation,
    camra_fov=60,
)

random_orientations = np.random.uniform(0, 2 * np.pi, size=(100, 3))
for orient in random_orientations:
    im, _ = viewer.get_view(orient)
    cv.imshow("img", im)
    cv.waitKey(0)

QStandardPaths: wrong permissions on runtime directory /run/user/1000/, 0755 instead of 0700


In [7]:
from camera_utils import xy_axes_to_rotation
camera_rotation = xy_axes_to_rotation([1, 0, 0], [0, 0, 1])

viewer = ImageSampler(
    world_file="data/world_mug.xml", 
    object_position=(0, 1.3, 0.3),
    camera_position=(0, 0, 0.1),
    camera_rotation=camera_rotation,
    simulation_time=3,
    camra_fov=60,
)

random_orientations = np.random.uniform(0, 2 * np.pi, size=(100, 3))
for orient in random_orientations:
    im, _ = viewer.get_view(orient)
    cv.imshow("img", im)
    cv.waitKey(0)

In [6]:
cv.destroyAllWindows()