Allow constant value configuration.

faustomorales · Nov 21, 2022 · 4bd4939 · 4bd4939
1 parent b4b81f2
commit 4bd4939
Show file tree

Hide file tree

Showing 6 changed files with 46 additions and 10 deletions.
diff --git a/mira/classifiers/clip.py b/mira/classifiers/clip.py
@@ -40,6 +40,7 @@ def __init__(self, categories, model_name="RN50", device="cpu"):
             "method": "fit",
             "width": self.model.model.visual.input_resolution,
             "height": self.model.model.visual.input_resolution,
+            "cval": 0,
         }
         self.preprocess = torchvision.transforms.Compose(
             [

diff --git a/mira/classifiers/torchvision.py b/mira/classifiers/torchvision.py
@@ -71,6 +71,7 @@ def __init__(
             "method": "fit",
             "width": 224,
             "height": 224,
+            "cval": 0,
         }
         self.categories = core.Categories.from_categories(categories)
         self.model = TVW(

diff --git a/mira/core/resizing.py b/mira/core/resizing.py
@@ -17,11 +17,21 @@
 SideOptions = tx.Literal["longest", "shortest"]
 FixedSizeConfig = tx.TypedDict(
     "FixedSizeConfig",
-    {"method": tx.Literal["fit", "pad", "force"], "width": int, "height": int},
+    {
+        "method": tx.Literal["fit", "pad", "force"],
+        "width": int,
+        "height": int,
+        "cval": int,
+    },
 )
 VariableSizeConfig = tx.TypedDict(
     "VariableSizeConfig",
-    {"method": tx.Literal["pad_to_multiple"], "base": int, "max": typing.Optional[int]},
+    {
+        "method": tx.Literal["pad_to_multiple"],
+        "base": int,
+        "max": typing.Optional[int],
+        "cval": int,
+    },
 )
 AspectPreservingConfig = tx.TypedDict(
     "AspectPreservingConfig",
@@ -76,7 +86,7 @@ def fit_side(
 
 
 def fit(
-    image: ArrayType, height: int, width: int, force: bool
+    image: ArrayType, height: int, width: int, force: bool, cval=0
 ) -> typing.Tuple[ArrayType, typing.Tuple[float, float], typing.Tuple[int, int]]:
     """Fit an image to a specific size, padding where necessary to maintain
     aspect ratio.
@@ -108,9 +118,17 @@ def fit(
     )
     if pad_y > 0 or pad_x > 0:
         padded = (
-            torch.nn.functional.pad(resized, (0, pad_x, 0, pad_y))
+            torch.nn.functional.pad(
+                resized, (0, pad_x, 0, pad_y), mode="constant", value=cval
+            )
             if use_torch_ops
-            else np.pad(resized, ((0, pad_y), (0, pad_x), (0, 0)))
+            else np.pad(
+                resized,
+                ((0, pad_y), (0, pad_x))
+                + (((0, 0),) if len(resized.shape) == 3 else tuple()),
+                mode="constant",
+                constant_values=cval,
+            )
         )
     else:
         padded = resized
@@ -258,10 +276,11 @@ def resize(
     if resize_config["method"] == "none":
         # pylint: disable=unexpected-keyword-arg
         pad_dimensions = sizes_arr.max(axis=0, keepdims=True) - sizes_arr
+    cval = resize_config.get("cval", 0)
     padded = (
         torch.cat(
             [
-                torch.nn.functional.pad(i, (0, pad_x, 0, pad_y)).unsqueeze(0)  # type: ignore
+                torch.nn.functional.pad(i, (0, pad_x, 0, pad_y), mode="constant", value=cval).unsqueeze(0)  # type: ignore
                 if pad_y >= 0 and pad_x >= 0
                 else fit(
                     typing.cast(torch.Tensor, i),
@@ -277,7 +296,13 @@ def resize(
         if use_torch_ops
         else np.concatenate(
             [
-                np.pad(i, ((0, pad_y), (0, pad_x), (0, 0)))[np.newaxis]
+                np.pad(
+                    i,
+                    ((0, pad_y), (0, pad_x))
+                    + (((0, 0),) if len(i.shape) == 3 else tuple()),
+                    mode="constant",
+                    constant_values=cval,
+                )[np.newaxis]
                 if pad_y >= 0 and pad_x >= 0
                 else fit(
                     i, height=raw_height + pad_y, width=raw_width + pad_x, force=False

diff --git a/mira/detectors/common.py b/mira/detectors/common.py
@@ -204,6 +204,7 @@ def initialize_basic(
         "method": "pad_to_multiple",
         "base": 128,
         "max": None,
+        "cval": 0,
     }
     # In mira, backbone has meaning because we use it to skip
     # training these weights. But the FPN includes feature extraction

diff --git a/mira/detectors/segmentation.py b/mira/detectors/segmentation.py
@@ -77,6 +77,7 @@ def __init__(
             "method": "pad_to_multiple",
             "base": 64,
             "max": None,
+            "cval": 0,
         }
         self.categories = mc.Categories.from_categories(categories)
         self.model = SMPWrapper(

diff --git a/tests/test_resizing.py b/tests/test_resizing.py
@@ -34,7 +34,8 @@ def verify_results(values, xlim, ylim, tensor_mode):
 @pytest.mark.parametrize("examples,tensor_mode", typed_examples)
 def test_resize_pad(examples, tensor_mode):
     resized, scales, sizes = resize(
-        x=examples, resize_config={"method": "pad", "height": 256, "width": 256}
+        x=examples,
+        resize_config={"method": "pad", "height": 256, "width": 256, "cval": 0},
     )
     assert (resized.shape[2:] if tensor_mode else resized.shape[1:3]) == (256, 256)
     assert (scales == 1).all()
@@ -46,7 +47,12 @@ def test_resize_pad(examples, tensor_mode):
 def test_resize_pad_to_multiple(examples, tensor_mode):
     resized, scales, sizes = resize(
         x=examples,
-        resize_config={"method": "pad_to_multiple", "base": 512, "max": None},
+        resize_config={
+            "method": "pad_to_multiple",
+            "base": 512,
+            "max": None,
+            "cval": 0,
+        },
     )
     assert (resized.shape[2:] if tensor_mode else resized.shape[1:3]) == (512, 512)
     assert (scales == 1).all()
@@ -57,7 +63,8 @@ def test_resize_pad_to_multiple(examples, tensor_mode):
 @pytest.mark.parametrize("examples,tensor_mode", typed_examples)
 def test_resize_fit(examples, tensor_mode):
     resized, scales, sizes = resize(
-        x=examples, resize_config={"method": "fit", "height": 128, "width": 128}
+        x=examples,
+        resize_config={"method": "fit", "height": 128, "width": 128, "cval": 0},
     )
     assert (resized.shape[2:] if tensor_mode else resized.shape[1:3]) == (128, 128)
     np.testing.assert_allclose(scales[:, 0], np.array([128 / 256, 128 / 256, 128 / 56]))