Initial pass at more flexible training and removing persnickety archi…

…tectures.
faustomorales · May 21, 2022 · 5426ae1 · 5426ae1
1 parent 925ce47
commit 5426ae1
Show file tree

Hide file tree

Showing 31 changed files with 288 additions and 2,858 deletions.
diff --git a/README.md b/README.md
@@ -4,7 +4,7 @@ mira provides tooling for simple object detection projects. The package spans th
 
 - **Core** object detection abstractions for images and annotations
 - Access to **datasets** from common formats (e.g., VOC, COCO) and image sets (e.g., VOC 2012)
-- A common API to for **well-known models** (e.g., EfficientDet and FasterRCNN)
+- A common API to for **well-known models** (e.g., RetinaNet and FasterRCNN)
 
 Check out [the docs](https://mira-python.readthedocs.io/en/latest/).
 

diff --git a/docs/tutorials.md b/docs/tutorials.md
@@ -77,23 +77,23 @@ dataset = datasets.load_voc2012(subset='val')
 # set up to use COCO labels.
 detector_faster = detectors.FasterRCNN(pretrained_top=True)
 
-detector_ed = detectors.EfficientDet(pretrained_top=True)
+detector_retina = detectors.RetinaNet(pretrained_top=True)
 
 # Pick an example scene
 scene = dataset[5]
 
 # Set up side-by-side plots
-fig, (ax_ed, ax_faster) = plt.subplots(ncols=2, figsize=(10, 5))
-ax_ed.set_title('EfficientDet')
+fig, (ax_retinanet, ax_faster) = plt.subplots(ncols=2, figsize=(10, 5))
+ax_retinanet.set_title('EfficientDet')
 ax_faster.set_title('FasterRCNN')
 
 # We get predicted scenes from each detector. Detectors return
 # lists of annotations for a given image. So we can just replace
 # (assign) those new annotations to the scene to get a new scene
 # reflecting the detector's prediction.
-predicted_ed = scene.assign(
-    annotations=detector_ed.detect(scene.image),
-    annotation_config=detector_ed.annotation_config
+predicted_retinanet = scene.assign(
+    annotations=detector_retina.detect(scene.image),
+    annotation_config=detector_retina.annotation_config
 )
 predicted_faster = scene.assign(
     annotations=detector_faster.detect(scene.image, threshold=0.4),
@@ -102,7 +102,7 @@ predicted_faster = scene.assign(
 
 # Plot both predictions. The calls to annotation() get us
 # an image with the bounding boxes drawn.
-_ = predicted_ed.show(ax=ax_ed)
+_ = predicted_retinanet.show(ax=ax_retinanet)
 _ = predicted_faster.show(ax=ax_faster)
 ```
 

diff --git a/mira/core/annotation.py b/mira/core/annotation.py
@@ -200,20 +200,23 @@ def crop(self, width, height):
             )[-2]
         ]
 
-    def resize(self, scale: float) -> "Annotation":
+    def resize(self, scale: typing.Union[float, np.ndarray]) -> "Annotation":
         """Obtain a revised selection with a given
         uniform scaling."""
         return (
             self.assign(
                 **{
-                    k: int(getattr(self, k) * scale)
-                    for k in (
+                    k: int(getattr(self, k) * s)
+                    for k, s in zip(
                         [
                             "x1",
                             "y1",
                             "x2",
                             "y2",
-                        ]
+                        ],
+                        [scale[0], scale[1], scale[0], scale[1]]
+                        if isinstance(scale, np.ndarray)
+                        else [scale, scale, scale, scale],
                     )
                 }
             )

diff --git a/mira/core/callbacks.py b/mira/core/callbacks.py
@@ -45,9 +45,6 @@ def best_weights(
             maximizes it).
         key: What name to use for the saved flag.
     """
-    if torch is None:
-        raise ValueError("You must install pytorch to use this callback.")
-
     # pylint: disable=unused-argument
     def callback(detector, summaries, data_dir):
         saved = False
@@ -58,7 +55,7 @@ def callback(detector, summaries, data_dir):
             else summaries_df[metric].idxmin()
         )
         if best_idx == len(summaries_df) - 1:
-            torch.save(detector.model.state_dict(), filepath)
+            detector.save_weights(filepath)
             saved = True
         return {key: saved}
 

diff --git a/mira/core/scene.py b/mira/core/scene.py
@@ -311,7 +311,7 @@ def annotated(
             for ann in self.annotations:
                 x1, y1, _, _ = ann.x1y1x2y2()
                 ax.annotate(
-                    text=ann.category.name,
+                    ann.category.name,
                     xy=(x1, y1),
                     fontsize=fontsize,
                     backgroundcolor=(1, 1, 1, 0.5),

diff --git a/mira/detectors/__init__.py b/mira/detectors/__init__.py
@@ -1,5 +1,4 @@
-from .efficientdet import EfficientDet
 from .detector import Detector
 from .fasterrcnn import FasterRCNN
 from .retinanet import RetinaNet
-from .detr import DETR
+from .segmentation import SMP
diff --git a/mira/detectors/assets/serve/efficientdet.py b/mira/detectors/assets/serve/efficientdet.py
diff --git a/mira/detectors/assets/serve/fasterrcnn.py b/mira/detectors/assets/serve/fasterrcnn.py
@@ -42,24 +42,7 @@ def __init__(self, *args, **kwargs):
             ),
             **self.detector_kwargs,
         )
-        self.set_input_shape(width=INPUT_WIDTH, height=INPUT_HEIGHT)
-
-    @property
-    def input_shape(self):
-        return self._input_shape
-
-    def set_input_shape(self, width, height):
-        self._input_shape = (height, width, 3)
-        self.model.transform.fixed_size = (height, width)  # type: ignore
-        self.model.transform.min_size = (min(width, height),)  # type: ignore
-        self.model.transform.max_size = max(height, width)  # type: ignore
+        self.model.transform = mdc.convert_rcnn_transform(self.model.transform)
 
     def forward(self, x):
-        return mdc.torchvision_serve_inference(
-            self,
-            x=x,
-            resize_method=RESIZE_METHOD,
-            height=INPUT_HEIGHT,
-            width=INPUT_WIDTH,
-            base=None,
-        )
+        return mdc.torchvision_serve_inference(self, x=x, resize_config=RESIZE_CONFIG)
diff --git a/mira/detectors/assets/serve/retinanet.py b/mira/detectors/assets/serve/retinanet.py
@@ -43,24 +43,7 @@ def __init__(self, *args, **kwargs):
             ),
             **self.detector_kwargs,
         )
-        self.set_input_shape(width=INPUT_WIDTH, height=INPUT_HEIGHT)
-
-    @property
-    def input_shape(self):
-        return self._input_shape
-
-    def set_input_shape(self, width, height):
-        self._input_shape = (height, width, 3)
-        self.model.transform.fixed_size = (height, width)  # type: ignore
-        self.model.transform.min_size = (min(width, height),)  # type: ignore
-        self.model.transform.max_size = max(height, width)  # type: ignore
+        self.model.transform = mdc.convert_rcnn_transform(self.model.transform)
 
     def forward(self, x):
-        return mdc.torchvision_serve_inference(
-            self,
-            x=x,
-            resize_method=RESIZE_METHOD,
-            height=INPUT_HEIGHT,
-            width=INPUT_WIDTH,
-            base=None,
-        )
+        return mdc.torchvision_serve_inference(self, x=x, resize_config=RESIZE_CONFIG)