Nb/documentation and autocasting pixels (#1345)

Auto casting of pixels & verification to uint8 type. Updated object detection doc
deepchecks · Apr 28, 2022 · c7043bb · c7043bb
1 parent 4065254
commit c7043bb
Show file tree

Hide file tree

Showing 9 changed files with 89 additions and 82 deletions.
diff --git a/deepchecks/vision/batch_wrapper.py b/deepchecks/vision/batch_wrapper.py
@@ -66,7 +66,7 @@ def images(self):
         if self._images is None:
             dataset = self._context.get_data_by_kind(self._dataset_kind)
             dataset.assert_images_valid()
-            self._images = dataset.batch_to_images(self._batch)
+            self._images = [image.astype('uint8') for image in dataset.batch_to_images(self._batch)]
         return self._images
 
     def __getitem__(self, index: int):

diff --git a/deepchecks/vision/checks/performance/image_segment_performance.py b/deepchecks/vision/checks/performance/image_segment_performance.py
@@ -42,7 +42,7 @@ class ImageSegmentPerformance(SingleDatasetCheck):
         Each property is dictionary with keys 'name' (str), 'method' (Callable) and 'output_type' (str),
         representing attributes of said method. 'output_type' must be one of 'continuous'/'discrete'
     alternative_metrics : Dict[str, Metric], default: None
-        A dictionary of metrics, where the key is the metric name and the value is an ignite.Metric object whose score
+        A dictionary of metrics, where the key is the metric name and the value is an ignite. Metric object whose score
         should be used. If None are given, use the default metrics.
     number_of_bins: int, default : 5
         Maximum number of bins to segment a single property into.

diff --git a/deepchecks/vision/vision_data.py b/deepchecks/vision/vision_data.py
@@ -404,8 +404,8 @@ def validate_image_data(self, batch):
         sample_min = np.min(sample)
         sample_max = np.max(sample)
         if sample_min < 0 or sample_max > 255 or sample_max <= 1:
-            raise ValidationError(f'Image data found to be in range [{sample_min}, {sample_max}] instead of expected '
-                                  f'range [0, 255].')
+            raise ValidationError(f'Image data should be in uint8 format(integers between 0 and 255). '
+                                  f'Found values in range [{sample_min}, {sample_max}].')
 
     def validate_get_classes(self, batch):
         """Validate that the get_classes function returns data in the correct format.

diff --git a/docs/source/user-guide/vision/data-classes/DetectionData.rst b/docs/source/user-guide/vision/data-classes/DetectionData.rst
@@ -4,11 +4,9 @@
 The Object Detection Data Class
 ===============================
 
-The DetectionData is a :doc:`data class </user-guide/vision/data-classes/index>` represents a CV object detection task in deepchecks.
-It is a subclass of the :class:`~deepchecks.vision.VisionData` class and is used to load and preprocess data for an
-object detection task.
-The DetectionData class contains additional data and general methods intended for easy access to relevant metadata
-for object detection ML models validation.
+The DetectionData is a :doc:`data class </user-guide/vision/data-classes/index>` designed for object detection tasks.
+It is a subclass of the :class:`~deepchecks.vision.VisionData` class and is used to help deepchecks load and interact with object detection data using a well defined format.
+detection related checks.
 
 For more info, please visit the API reference page: :class:`~deepchecks.vision.DetectionData`
 
@@ -22,31 +20,34 @@ Accepted Label Format
 ---------------------
 Deepchecks' checks use the :func:`~deepchecks.vision.DetectionData.batch_to_labels` function in order to get the labels in the correct format.
 The accepted label format for is a a list of length N containing tensors of shape (B, 5), where N is the number
-of samples, B is the number of bounding boxes in the sample and each bounding box is represented by 5 values:
-``(class_id, x, y, w, h)``.
+of samples within a batch, B is the number of bounding boxes in the sample and each bounding box is represented by 5 values:
+``(class_id, x_min, y_min, w, h)``.
 
-    x and y are the coordinates (in pixels) of the upper left corner of the bounding box, w
+    x_min and y_min are the coordinates (in pixels) of the **top left corner** of the bounding box, w
     and h are the width and height of the bounding box (in pixels) and class_id is the class id of the prediction.
 
 For example, for a sample with 2 bounding boxes, the label format may be:
-``[(1, 8.4, 50.2, 100, 100), (5, 26.4, 10.1, 20, 40)]``.
+``tensor([[1, 8.4, 50.2, 100, 100], [5, 26.4, 10.1, 20, 40]])``.
 
 Accepted Prediction Format
 --------------------------
 Deepchecks' checks use the :func:`~deepchecks.vision.DetectionData.infer_on_batch` function in order to get the predictions of the model in the correct format.
 The accepted prediction format is a list of length N containing tensors of shape (B, 6), where N is the number
 of images, B is the number of bounding boxes detected in the sample and each bounding box is represented by 6
-values: ``[x, y, w, h, confidence, class_id]``.
+values: ``[x_min, y_min, w, h, confidence, class_id]``.
 
-    x and y are the coordinates (in pixels) of the upper left corner
-    of the bounding box, w and h are the width and height of the bounding box (in pixels), confidence is the
-    confidence of the model and class_id is the class id.
+    x_min,y_min,w and h represent the bounding box location as above, confidence is the confidence score given by the model to
+    bounding box and class_id is the class id predicted by the model.
 
 For example, for a sample with 2 bounding boxes, the prediction format may be:
-``[(8.4, 50.2, 100, 100, 0.9, 1), (26.4, 10.1, 20, 40, 0.8, 5)]``.
+``tensor([[8.4, 50.2, 100, 100, 0.9, 1], [26.4, 10.1, 20, 40, 0.8, 5]])``.
 
-Examples
+Example
 --------
+Assuming we have implemented a torch DataLoader whose underlying __getitem__ method returns a tuple of the form:
+``(images, bboxes)``. ``image`` is a tensor of shape (N, C, H, W) in which the images pixel values are normalized to
+[0, 1] range based on the mean and std of the ImageNet dataset. ``bboxes`` is a tensor of shape (N, B, 5) in which
+each box arrives in the format: ``(class_id, x_min, y_min, x_max, y_max)``. Additionally, we are using Yolo as a model.
 
 .. code-block:: python
 
@@ -56,7 +57,7 @@ Examples
     import numpy as np
 
     class MyDetectionTaskData(DetectionData)
-    """Implement a ClassificationData class for a classification task."""
+    """A deepchecks data digestion class for object detection related checks."""
 
         def batch_to_images(self, batch):
             """Convert a batch of images to a list of PIL images.
@@ -86,48 +87,51 @@ Examples
             return imgs
 
         def batch_to_labels(self, batch):
-            """Convert a batch of labels to a tensor.
+            """Convert a batch bounding boxes to the required format.
 
             Parameters
             ----------
-            batch : torch.Tensor
-                The batch of labels to convert.
+            batch : tuple
+                The batch of data, containing images and bounding boxes.
 
             Returns
             -------
-            torch.Tensor
-                A tensor of shape (N,).
+            List
+                A list of size N containing tensors of shape (B,5).
             """
 
             # each bbox in the labels is (class_id, x, y, x, y). convert to (class_id, x, y, w, h)
-            return [torch.stack(
-                   [torch.cat((bbox[0], bbox[1:3], bbox[4:] - bbox[1:3]), dim=0)
-                       for bbox in image])
-                    for image in batch[1]]
+            bboxes = []
+            for bboxes_single_image in batch[1]:
+                formatted_bboxes = [torch.cat((bbox[0], bbox[1:3], bbox[4:] - bbox[1:3]), dim=0)
+                                    for bbox in bboxes_single_image]
+                if len(formatted_bboxes) != 0:
+                    bboxes.append(torch.stack(formatted_bboxes))
+            return bboxes
 
         def infer_on_batch(self, batch, model, device):
             """Get the predictions of the model on a batch of images.
 
             Parameters
             ----------
-            batch : torch.Tensor
-                The batch of data.
+            batch : tuple
+                The batch of data, containing images and bounding boxes.
             model : torch.nn.Module
                 The model to use for inference.
             device : torch.device
                 The device to use for inference.
 
             Returns
             -------
-            torch.Tensor
-                A tensor of shape (N, n_classes).
+            List
+                A list of size N containing tensors of shape (B,6).
             """
 
-            # Converts a yolo prediction batch to the accepted xywh format
             return_list = []
+            predictions = model.to(device)(batch[0])
 
-            predictions = model(batch[0])
-            # yolo Detections objects have List[torch.Tensor] xyxy output in .pred
+            # yolo Detections objects have List[torch.Tensor(B,6)] output where each bbox is
+            #(x_min, y_min, x_max, y_max, confidence, class_id).
             for single_image_tensor in predictions.pred:
                 pred_modified = torch.clone(single_image_tensor)
                 pred_modified[:, 2] = pred_modified[:, 2] - pred_modified[:, 0]
@@ -140,5 +144,5 @@ Examples
     data = MyDetectionTaskData(your_dataloader)
 
     # And validate the implementation:
-    data.validate()
+    data.validate_format(your_model)
 
diff --git a/tests/vision/checks/distribution/image_dataset_drift_test.py b/tests/vision/checks/distribution/image_dataset_drift_test.py
@@ -58,11 +58,11 @@ def test_drift_grayscale(mnist_dataset_train, mnist_dataset_test, device):
     result = check.run(train, test, random_state=42, device=device, n_samples=None)
     # Assert
     assert_that(result.value, has_entries({
-        'domain_classifier_auc': close_to(0.516, 0.001),
-        'domain_classifier_drift_score': close_to(0.033, 0.001),
+        'domain_classifier_auc': close_to(0.5146, 0.001),
+        'domain_classifier_drift_score': close_to(0.029, 0.001),
         'domain_classifier_feature_importance': has_entries({
-            'RMS Contrast': close_to(0.965, 0.001),
-            'Brightness': close_to(0.034, 0.001),
+            'RMS Contrast': close_to(1, 0.001),
+            'Brightness': close_to(0, 0.001),
             'Aspect Ratio': equal_to(0),
             'Area': equal_to(0),
             'Mean Red Relative Intensity': equal_to(0),
@@ -110,8 +110,8 @@ def batch_to_images(self, batch):
     result = check.run(train, test, random_state=42, device=device)
     # Assert
     assert_that(result.value, has_entries({
-        'domain_classifier_auc': close_to(1, 0.001),
-        'domain_classifier_drift_score': close_to(1, 0.001),
+        'domain_classifier_auc': close_to(0.908, 0.001),
+        'domain_classifier_drift_score': close_to(0.815, 0.001),
         'domain_classifier_feature_importance': has_entries({
             'Brightness': close_to(1, 0.001),
             'Aspect Ratio': equal_to(0),

diff --git a/tests/vision/checks/distribution/image_property_outliers_test.py b/tests/vision/checks/distribution/image_property_outliers_test.py
@@ -65,9 +65,9 @@ def test_image_property_outliers_check_mnist(mnist_dataset_train, device):
     assert_that(result, is_correct_image_property_outliers_result())
     assert_that(result.value, has_entries({
         'Brightness': has_entries({
-            'indices': has_length(610),
-            'lower_limit': close_to(6.487, .001),
-            'upper_limit': close_to(62.650, .001)
+            'indices': has_length(609),
+            'lower_limit': close_to(6.45, .01),
+            'upper_limit': close_to(62.37, .01)
         }),
         'Mean Red Relative Intensity': instance_of(str),
         'Mean Green Relative Intensity': instance_of(str),

diff --git a/tests/vision/checks/methodology/simple_feature_contribution_test.py b/tests/vision/checks/methodology/simple_feature_contribution_test.py
@@ -104,8 +104,8 @@ def test_drift_classification(mnist_dataset_train, mnist_dataset_test):
     # Assert
     assert_that(result.value, has_entries({
         'train': has_entries({'Brightness': close_to(0.08, 0.005)}),
-        'test': has_entries({'Brightness': close_to(0.239, 0.001)}),
-        'train-test difference': has_entries({'Brightness': close_to(-0.159, 0.001)})
+        'test': has_entries({'Brightness': close_to(0.234, 0.001)}),
+        'train-test difference': has_entries({'Brightness': close_to(-0.153, 0.001)})
     }))
 
 
@@ -173,8 +173,8 @@ def test_drift_classification_per_class(mnist_dataset_train, mnist_dataset_test)
     # Assert
     assert_that(result.value, has_entries({
         'Brightness': has_entries({'train':  has_entries({'1': equal_to(0)}),
-                                   'test':  has_entries({'1': close_to(0.64, 0.01)}),
-                                   'train-test difference':  has_entries({'1': close_to(-0.64, 0.01)})}),
+                                   'test':  has_entries({'1': close_to(0.659, 0.01)}),
+                                   'train-test difference':  has_entries({'1': close_to(-0.659, 0.01)})}),
     }))
 
 

diff --git a/tests/vision/checks/performance/vision_model_error_analysis_test.py b/tests/vision/checks/performance/vision_model_error_analysis_test.py
@@ -26,7 +26,7 @@ def test_classification(mnist_dataset_train, mock_trained_mnist, device):
                        device=device, n_samples=None)
     # Assert
     assert_that(len(result.value['feature_segments']), equal_to(2))
-    assert_that(result.value['feature_segments']['Brightness']['segment1']['n_samples'], equal_to(254))
+    assert_that(result.value['feature_segments']['Brightness']['segment1']['n_samples'], equal_to(251))
 
 
 def test_detection(coco_train_visiondata, coco_test_visiondata, mock_trained_yolov5_object_detection, device):