-
Notifications
You must be signed in to change notification settings - Fork 1
/
preprocessing.py
58 lines (46 loc) · 2.79 KB
/
preprocessing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
"""Preprocessing functions for image object detection tasks.
"""
import numpy as np
from PIL import Image
def preprocess(image: Image, img_max_side_length: int = 640, normalize: bool = True) -> np.array:
"""Preprocess image
Preprocess images so that they can be used by the ONNX-based model.
Preprocessing consists of 3 steps:
- Downscaling the image if it is too big (this is controlled by the img_max_side_length parameter)
- Normalizing the image colors in accordance with ImageNet's mean and standard deviation. This is a direct
recommendation from the Azure Machine Learning documentation linked in the "See Also" section.
- Shaping the image into a numpy array, adding a dimension so that it can be properly processed by the ONNX-based
model
See Also:
https://learn.microsoft.com/en-us/azure/machine-learning/how-to-inference-onnx-automl-image-models?view=azureml-api-2&tabs=instance-segmentation#preprocessing
Args:
image: Image to preprocess
img_max_side_length: Maximum side length for the output image in pixels. Input images which have any of their
sides longer than this variable will be downscaled. There is no upscaling. While the model may produce more
accurate outputs if the sizes of the supplied images are similar to the sizes of the images used in
training, smaller images result in a faster inference time. For this reason, a relatively small
img_max_side_length, 640, is chosen as a default
normalize: This parameter controls whether normalization is run. Setting this to False helps in the case where
the resulting image should be fed back to the user, so they only see it rescaled, but with the original
colors intact. Normalization should be enabled when the preprocessing is for producing images which will
be fed to the machine learning model for inference.
Returns:
img: Preprocessed image as numpy array
"""
if max(image.size) > img_max_side_length:
# Downscale if image too big
image = image.resize((int(image.size[0] / max(image.size) * img_max_side_length),
int(image.size[1] / max(image.size) * img_max_side_length)))
# HWC -> CHW
image_np = np.array(image)
image_np = image_np.transpose(2, 0, 1) # CxHxW
if normalize:
# normalize the image
mean_vec = np.array([0.485, 0.456, 0.406])
std_vec = np.array([0.229, 0.224, 0.225])
image_np_norm = np.zeros(image_np.shape).astype('float32')
for i in range(image_np.shape[0]):
image_np_norm[i, :, :] = (image_np[i, :, :] / 255 - mean_vec[i]) / std_vec[i]
image_np = image_np_norm
image_np = np.expand_dims(image_np, axis=0) # 1xCxHxW
return image_np