In [None]:
import cv2
import numpy as np
import torch
from datetime import datetime, timedelta
from matplotlib import pyplot as plt
from PIL import Image
from torchvision.transforms.functional import pad, resize, InterpolationMode

import sys, os

sys.path.append(os.environ["BUILD_WORKSPACE_DIRECTORY"])
from core.labeling.tools.pull_kinesis_feed import get_frame_from_kinesis
from core.perception.detector_tracker.utils import (
    letterbox,
)
from lib.ml.inference.tasks.object_detection_2d.yolov5.utils import (
    preprocess_image,
)
from lib.ml.inference.tasks.object_detection_2d.yolov5.pre_processing_model import (
    letterbox as letterbox_scripted,
    preprocess_image as preprocess_image_scripted,
)

new_shape = (736, 1280)  # 720p

In [None]:
camera_uuid = "americold/modesto/0011/cha"
time = (datetime.now() - timedelta(hours=2)).strftime("%Y-%m-%d %H:%M:%S")
input_image = get_frame_from_kinesis(camera_uuid, time)
shape = input_image.shape[:2]

In [None]:
fs = 20
ar = shape[1] / shape[0]
original = plt.figure(figsize=(fs, fs * ar))
plt.imshow(input_image)
plt.title(f"Original Image: Shape, {shape}", fontsize=20)
plt.show()

## Letterbox Resize Operation Analysis

In [None]:
# Letterbox Function
r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
new_unpad = int(round(shape[0] * r)), int(round(shape[1] * r))

# Legacy Letterbox Resize (Using cv2)
legacy_resized_image = cv2.resize(
    input_image,
    (new_unpad[1], new_unpad[0]),  # cv2 expects width by height for resize
    interpolation=cv2.INTER_LINEAR,
)
# New Letterbox Resize (Using torchvision)
torchvision_resized_image = resize(
    img=torch.from_numpy(input_image.transpose(2, 0, 1)).unsqueeze(0),
    size=new_unpad,
    interpolation=InterpolationMode.BILINEAR,
    antialias=True,
)

In [None]:
new_size = legacy_resized_image.shape
ar = new_size[1] / new_size[0]
f1 = plt.figure(figsize=(fs, fs * ar))
plt.imshow(legacy_resized_image)
plt.title(f"CV2 Resize: Shape, {new_size}", fontsize=20)
plt.show()

new_size = torchvision_resized_image.shape
ar = new_size[3] / new_size[2]
f2 = plt.figure(figsize=(fs, fs * ar))
plt.imshow(torchvision_resized_image.numpy().squeeze().transpose(1, 2, 0))
plt.title(f"Torchvision Resize: Shape, {new_size}", fontsize=20)
plt.show()

In [None]:
legacy_torch_resized_image = torch.from_numpy(
    legacy_resized_image.transpose(2, 0, 1)
).unsqueeze(0)
pixel_diff = legacy_torch_resized_image - torchvision_resized_image

new_size = pixel_diff.shape
ar = new_size[3] / new_size[2]
f3 = plt.figure(figsize=(fs, fs * ar))
plt.imshow(pixel_diff.numpy().squeeze().transpose(1, 2, 0))
plt.title(f"Pixel Diff: Shape, {new_size}", fontsize=20)
plt.show()

## Letterbox Border Operation

In [None]:
# Legacy Letterbox
color = (114, 114, 114)
dh, dw = (
    new_shape[0] - new_unpad[0],
    new_shape[1] - new_unpad[1],
)
dh /= 2
dw /= 2
top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
# Use legacy padded image for analysis
legacy_padded_image = cv2.copyMakeBorder(
    legacy_resized_image,
    top,
    bottom,
    left,
    right,
    cv2.BORDER_CONSTANT,
    value=color,
)
# Use torchvision padded image
torchvision_padded_image = pad(
    img=torch.from_numpy(legacy_resized_image.transpose(2, 0, 1)).unsqueeze(0),
    padding=[left, top, right, bottom],
    fill=114,
    padding_mode="constant",
)

In [None]:
new_size = legacy_padded_image.shape
ar = new_size[1] / new_size[0]
f1 = plt.figure(figsize=(fs, fs * ar))
plt.imshow(legacy_padded_image)
plt.title(f"CV2 Pad: Shape, {new_size}", fontsize=20)
plt.show()

new_size = torchvision_padded_image.shape
ar = new_size[3] / new_size[2]
f2 = plt.figure(figsize=(fs, fs * ar))
plt.imshow(torchvision_padded_image.numpy().squeeze().transpose(1, 2, 0))
plt.title(f"Torchvision Pad: Shape, {new_size}", fontsize=20)
plt.show()

## Letterbox Comparison

In [None]:
legacy_letterbox = letterbox(
    input_image, (736, 1280), auto=False
)  # Legacy YOLO preprocessing
scripted_letterbox = letterbox_scripted(
    torch.from_numpy(input_image.transpose(2, 0, 1)).unsqueeze(0),
    torch.tensor(new_shape).unsqueeze(0),
)

In [None]:
legacy_letterbox_image = legacy_letterbox[0]
new_size = legacy_letterbox_image.shape
ar = new_size[1] / new_size[0]
f1 = plt.figure(figsize=(fs, fs * ar))
plt.imshow(legacy_letterbox_image)
plt.title(f"Legacy Letterbox: Shape, {new_size}", fontsize=20)
plt.show()

scripted_letterbox_image = (
    scripted_letterbox[0].numpy().squeeze().transpose(1, 2, 0)
)
new_size = scripted_letterbox_image.shape
ar = new_size[1] / new_size[0]
f2 = plt.figure(figsize=(fs, fs * ar))
plt.imshow(scripted_letterbox_image)
plt.title(f"Scripted Letterbox: Shape, {new_size}", fontsize=20)
plt.show()

letterbox_absdiff_image = np.abs(
    legacy_letterbox_image - scripted_letterbox_image
)
new_size = letterbox_absdiff_image.shape
ar = new_size[1] / new_size[0]
f2 = plt.figure(figsize=(fs, fs * ar))
plt.imshow(letterbox_absdiff_image)
plt.title(f"Diff in Letterbox: Shape, {new_size}", fontsize=20)
plt.show()

## Preprocess Image Operation

In [None]:
legacy_preprocessed = preprocess_image(
    torch.from_numpy(input_image).unsqueeze(0),
    new_shape,
    torch.device("cpu"),
)
scripted_preprocess = preprocess_image_scripted(
    torch.from_numpy(input_image).unsqueeze(0).to("cpu"),
    torch.tensor(new_shape).unsqueeze(0),
)

In [None]:
legacy_preprocess_image = (legacy_preprocessed[0] * 255).to(torch.uint8)
new_size = legacy_preprocess_image.shape
ar = new_size[3] / new_size[2]
f1 = plt.figure(figsize=(fs, fs * ar))
plt.imshow(legacy_preprocess_image.numpy().squeeze().transpose(1, 2, 0))
plt.title(f"Legacy Preprocess: Shape, {new_size}", fontsize=20)
plt.show()

scripted_preprocess_image = (scripted_preprocess[0] * 255).to(torch.uint8)
new_size = scripted_preprocess_image.shape
ar = new_size[3] / new_size[2]
f2 = plt.figure(figsize=(fs, fs * ar))
plt.imshow(scripted_preprocess_image.numpy().squeeze().transpose(1, 2, 0))
plt.title(f"Scripted Preprocess: Shape, {new_size}", fontsize=20)
plt.show()

preprocess_absdiff_image = (
    legacy_preprocess_image - scripted_preprocess_image
).abs()
new_size = preprocess_absdiff_image.shape
ar = new_size[3] / new_size[2]
f2 = plt.figure(figsize=(fs, fs * ar))
plt.imshow(preprocess_absdiff_image.numpy().squeeze().transpose(1, 2, 0))
plt.title(f"Diff in Preprocess: Shape, {new_size}", fontsize=20)
plt.show()