In [None]:
import numpy as np
import tensorflow as tf
import math

import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
%matplotlib inline

In [None]:
H, W = 7, 10
IMAGE = np.random.randint(0, 256, size=[H, W, 3], dtype='uint8')
BOX = [1/(H - 1), 2/(W - 1), 4/(H - 1), 7/(W - 1)]
CROP_SIZE = (4, 6)  # height and width

In [None]:
def show_image_with_box(box):
    _, ax = plt.subplots()
    ymin, xmin, ymax, xmax = box
    h, w = (ymax - ymin) * (H - 1), (xmax - xmin) * (W - 1)
    y, x = ymin * (H - 1), xmin * (W - 1)
    ax.add_patch(Rectangle((x, y), w, h, alpha=0.6, facecolor='white', edgecolor='black'))
    ax.imshow(IMAGE)
    
show_image_with_box(BOX)

# A simple toy implementation of `crop_and_resize`

In [None]:
def crop_and_resize(image, box, crop_size):
    """
    It is based on:
    https://github.com/longcw/RoIAlign.pytorch/blob/master/roi_align/src/crop_and_resize.c
    
    Arguments:
        image: a float numpy array with shape [height, width, depth].
        box: a list of floats with length 4.
        crop_size: a tuple of two integers.
    Returns:
        crop: a float numpy array with shape [crop_height, crop_width, depth].
    """
    
    image_height, image_width, depth = image.shape
    crop_height, crop_width = crop_size
    assert (crop_height > 1) and (crop_width > 1)
    crop = np.zeros((crop_height, crop_width, depth), dtype='float32')
    
    ymin, xmin, ymax, xmax = box
    assert (ymin >= 0.0) and (xmin >= 0.0) and (ymax >= 0.0) and (xmax >= 0.0)
    assert (ymin <= 1.0) and (xmin <= 1.0) and (ymax <= 1.0) and (xmax <= 1.0)
    assert (ymin < ymax) and (xmin < xmax)
    
    height_scale = (ymax - ymin) * (image_height - 1) / (crop_height - 1)
    width_scale = (xmax - xmin) * (image_width - 1) / (crop_width - 1)

    for y in range(0, crop_height):
        in_y = ymin * (image_height - 1) + y * height_scale
        assert (in_y >= 0) and (in_y <= image_height - 1)
    
        top_y_index = math.floor(in_y)
        bottom_y_index = math.ceil(in_y)
        y_lerp = in_y - top_y_index
        # "lerp" - linear interpolation, see:
        # https://en.wikipedia.org/wiki/Linear_interpolation#Applications
        
        for x in range(0, crop_width):
            in_x = xmin * (image_width - 1) + x * width_scale
            assert (in_x >= 0) and (in_x <= image_width - 1)               
                
            left_x_index = math.floor(in_x)
            right_x_index = math.ceil(in_x)
            x_lerp = in_x - left_x_index

            for d in range(0, depth): 

                top_left = image[top_y_index, left_x_index, d]
                top_right = image[top_y_index, right_x_index, d]
                bottom_left = image[bottom_y_index, left_x_index, d]
                bottom_right = image[bottom_y_index, right_x_index, d]

                top = top_left + (top_right - top_left) * x_lerp;
                bottom = bottom_left + (bottom_right - bottom_left) * x_lerp;

                crop[y, x, d] = top + (bottom - top) * y_lerp

    return crop

In [None]:
result1 = crop_and_resize(IMAGE.astype('float32'), BOX, CROP_SIZE)
plt.imshow(result1.astype('uint8'));

In [None]:
show_image_with_box(BOX)

# Compare with tensorflow

In [None]:
def tf_crop_and_resize(image, box, crop_size):
    tf.reset_default_graph()
    image = np.expand_dims(image, 0)
    image = tf.constant(image)
    crop = tf.image.crop_and_resize(image, boxes=[box], box_ind=[0], crop_size=crop_size)
    with tf.Session() as sess:
        crop = sess.run(crop)[0]
    return crop

In [None]:
result2 = tf_crop_and_resize(IMAGE.astype('float32'), BOX, CROP_SIZE)
plt.imshow(result2.astype('uint8'));

In [None]:
np.isclose(result1, result2, atol=1e-8).all()

# ROI Align

See:
1. https://github.com/longcw/RoIAlign.pytorch/blob/master/roi_align/roi_align.py
2. https://github.com/tensorpack/tensorpack/blob/master/examples/FasterRCNN/NOTES.md
3. https://github.com/tensorpack/tensorpack/issues/542

In [None]:
def show_image_with_box_correctly(box):
    _, ax = plt.subplots()
    ymin, xmin, ymax, xmax = box
    
    h, w = (ymax - ymin) * H, (xmax - xmin) * W
    y, x = ymin * H, xmin * W
    y, x = y - 0.5, x - 0.5

    ax.add_patch(Rectangle((x, y), w, h, alpha=0.6, facecolor='white', edgecolor='black'))
    ax.imshow(IMAGE)  

In [None]:
ANOTHER_CROP_SIZE = (3, 4)

In [None]:
# ANOTHER_BOX = [0, 0, 3/(H - 1), 4/(W - 1)]
ANOTHER_BOX = [0, 0, 3/7, 4/10]

show_image_with_box_correctly(ANOTHER_BOX)

In [None]:
ymin, xmin, ymax, xmax = ANOTHER_BOX
crop_height, crop_width = ANOTHER_CROP_SIZE

spacing_h = (ymax - ymin) * H / crop_height
spacing_w = (xmax - xmin) * W / crop_width

nymin = (ymin*(image_height - 1) + (spacing_h*0.5)) / (H - 1)
nxmin = (xmin*(image_width - 1) + (spacing_w*0.5)) / (W - 1)
nh = spacing_h * (crop_height - 1) / (H - 1)
nw = spacing_w * (crop_width - 1) / (W - 1)
box = [nymin, nxmin, nymin + nh, nxmin + nw]

crop = tf_crop_and_resize(IMAGE.astype('float32'), box, ANOTHER_CROP_SIZE)
plt.imshow(crop.astype('uint8'));

In [None]:
show_image_with_box(box)

In [None]:
np.isclose(crop, IMAGE[1:4, 1:5, :], atol=1.0).all()