Split eval into per_scene and overall

azavea · Jan 24, 2019 · 0178c1c · 0178c1c
1 parent 7097791
commit 0178c1c
Show file tree

Hide file tree

Showing 11 changed files with 149 additions and 14 deletions.
diff --git a/docs/changelog.rst b/docs/changelog.rst
@@ -6,6 +6,7 @@ Raster Vision 0.9
 
 Raster Vision 0.9.0
 ~~~~~~~~~~~~~~~~~~~
+- Include per-scene eval metrics `#641 <https://github.com/azavea/raster-vision/pull/641>`_
 - Make and save predictions and do eval chip-by-chip `#635 <https://github.com/azavea/raster-vision/pull/635>`_
 - Decrease semseg memory usage `#630 <https://github.com/azavea/raster-vision/pull/630>`_
 - Add support for vector tiles in .mbtiles files `#601 <https://github.com/azavea/raster-vision/pull/601>`_

diff --git a/integration_tests/chip_classification_tests/expected-output/eval.json b/integration_tests/chip_classification_tests/expected-output/eval.json
@@ -1,4 +1,4 @@
-[
+{"overall": [
   {
     "class_id": 1,
     "recall": 1.0,
@@ -31,4 +31,4 @@
     "f1": 1.0,
     "gt_count": 6.0
   }
-]
+]}
diff --git a/integration_tests/integration_tests.py b/integration_tests/integration_tests.py
@@ -112,8 +112,8 @@ def check_eval(test, temp_dir):
     expected_eval_path = get_expected_eval_path(test)
 
     if os.path.isfile(actual_eval_path):
-        expected_eval = open_json(expected_eval_path)
-        actual_eval = open_json(actual_eval_path)
+        expected_eval = open_json(expected_eval_path)['overall']
+        actual_eval = open_json(actual_eval_path)['overall']
 
         for expected_item in expected_eval:
             class_name = expected_item['class_name']

diff --git a/integration_tests/object_detection_tests/expected-output/eval.json b/integration_tests/object_detection_tests/expected-output/eval.json
@@ -1,4 +1,4 @@
-[
+{"overall": [
     {
         "class_id": 1,
         "precision": 1.0,
@@ -26,4 +26,4 @@
         "count_error": 0.0,
         "gt_count": 7
     }
-]
+]}
diff --git a/integration_tests/semantic_segmentation_tests/expected-output/eval.json b/integration_tests/semantic_segmentation_tests/expected-output/eval.json
@@ -1,4 +1,4 @@
-[
+{"overall": [
     {
         "count_error": 245,
         "gt_count": 90000,
@@ -26,4 +26,4 @@
         "recall": 0.9993194444444444,
         "class_name": "average"
     }
-]
+]}
diff --git a/rastervision/evaluation/classification_evaluation.py b/rastervision/evaluation/classification_evaluation.py
@@ -1,4 +1,5 @@
 from abc import (ABC, abstractmethod)
+import copy
 
 import json
 
@@ -18,6 +19,7 @@ def __init__(self):
     def clear(self):
         """Clear the Evaluation."""
         self.class_to_eval_item = {}
+        self.scene_to_eval = {}
         self.avg_item = None
 
     def set_class_to_eval_item(self, class_to_eval_item):
@@ -36,6 +38,14 @@ def to_json(self):
         for eval_item in self.class_to_eval_item.values():
             json_rep.append(eval_item.to_json())
         json_rep.append(self.avg_item.to_json())
+
+        if self.scene_to_eval:
+            json_rep = {'overall': json_rep}
+            scene_to_eval_json = {}
+            for scene_id, eval in self.scene_to_eval.items():
+                scene_to_eval_json[scene_id] = eval.to_json()
+            json_rep['per_scene'] = scene_to_eval_json
+
         return json_rep
 
     def save(self, output_uri):
@@ -47,7 +57,7 @@ def save(self, output_uri):
         json_str = json.dumps(self.to_json(), indent=4)
         str_to_file(json_str, output_uri)
 
-    def merge(self, evaluation):
+    def merge(self, evaluation, scene_id=None):
         """Merge Evaluation for another Scene into this one.
 
         This is useful for computing the average metrics of a set of scenes.
@@ -68,6 +78,9 @@ def merge(self, evaluation):
 
         self.compute_avg()
 
+        if scene_id is not None:
+            self.scene_to_eval[scene_id] = copy.deepcopy(evaluation)
+
     def compute_avg(self):
         """Compute average metrics over all keys."""
         self.avg_item = ClassEvaluationItem(class_name='average')

diff --git a/rastervision/evaluation/classification_evaluator.py b/rastervision/evaluation/classification_evaluator.py
@@ -36,6 +36,5 @@ def process(self, scenes, tmp_dir):
                     predictions = predictions.filter_by_aoi(scene.aoi_polygons)
                 scene_evaluation = self.create_evaluation()
                 scene_evaluation.compute(ground_truth, predictions)
-                evaluation.merge(scene_evaluation)
-
+                evaluation.merge(scene_evaluation, scene_id=scene.id)
         evaluation.save(self.output_uri)
diff --git a/rastervision/evaluation/semantic_segmentation_evaluator.py b/rastervision/evaluation/semantic_segmentation_evaluator.py
@@ -36,7 +36,7 @@ def process(self, scenes, tmp_dir):
                     predictions = predictions.filter_by_aoi(scene.aoi_polygons)
                 scene_evaluation = self.create_evaluation()
                 scene_evaluation.compute(ground_truth, predictions)
-                evaluation.merge(scene_evaluation)
+                evaluation.merge(scene_evaluation, scene_id=scene.id)
 
             if hasattr(label_source, 'source') and hasattr(
                     label_source.source, 'vector_source') and hasattr(
@@ -52,6 +52,6 @@ def process(self, scenes, tmp_dir):
                     scene_evaluation = self.create_evaluation()
                     scene_evaluation.compute_vector(
                         gt_geojson, pred_geojson_local, mode, class_id)
-                    evaluation.merge(scene_evaluation)
+                    evaluation.merge(scene_evaluation, scene_id=scene.id)
 
         evaluation.save(self.output_uri)
diff --git a/tests/data-files/expected-eval.json b/tests/data-files/expected-eval.json
@@ -0,0 +1,63 @@
+{   "overall": [   {   "class_id": 1,
+                       "class_name": "one",
+                       "count_error": 50,
+                       "f1": 0.6666666666666666,
+                       "gt_count": 100,
+                       "precision": 1.0,
+                       "recall": 0.5},
+                   {   "class_id": 2,
+                       "class_name": "two",
+                       "count_error": 50.0,
+                       "f1": 0.6666666666666666,
+                       "gt_count": 100,
+                       "precision": 1.0,
+                       "recall": 0.5},
+                   {   "class_id": null,
+                       "class_name": "average",
+                       "count_error": 50.0,
+                       "f1": 0.6666666666666666,
+                       "gt_count": 200,
+                       "precision": 1.0,
+                       "recall": 0.5}],
+    "per_scene": {   "1": [   {   "class_id": 1,
+                                  "class_name": "one",
+                                  "count_error": 50,
+                                  "f1": 0.6666666666666666,
+                                  "gt_count": 100,
+                                  "precision": 1.0,
+                                  "recall": 0.5},
+                              {   "class_id": 2,
+                                  "class_name": "two",
+                                  "count_error": 50,
+                                  "f1": null,
+                                  "gt_count": 0,
+                                  "precision": 0.0,
+                                  "recall": null},
+                              {   "class_id": null,
+                                  "class_name": "average",
+                                  "count_error": 50.0,
+                                  "f1": 0.6666666666666666,
+                                  "gt_count": 100,
+                                  "precision": 1.0,
+                                  "recall": 0.5}],
+                     "2": [   {   "class_id": 1,
+                                  "class_name": "one",
+                                  "count_error": 50,
+                                  "f1": null,
+                                  "gt_count": 0,
+                                  "precision": 0.0,
+                                  "recall": null},
+                              {   "class_id": 2,
+                                  "class_name": "two",
+                                  "count_error": 50,
+                                  "f1": 0.6666666666666666,
+                                  "gt_count": 100,
+                                  "precision": 1.0,
+                                  "recall": 0.5},
+                              {   "class_id": null,
+                                  "class_name": "average",
+                                  "count_error": 50.0,
+                                  "f1": 0.6666666666666666,
+                                  "gt_count": 100,
+                                  "precision": 1.0,
+                                  "recall": 0.5}]}}
diff --git a/tests/evaluation/test_chip_classification_evaluator.py b/tests/evaluation/test_chip_classification_evaluator.py
@@ -54,7 +54,7 @@ def test_accounts_for_aoi(self):
 
             results = None
             with open(output_uri) as f:
-                results = json.loads(f.read())
+                results = json.loads(f.read())['overall']
 
             for result in results:
                 self.assertEqual(result['f1'], 1.0)

diff --git a/tests/evaluation/test_semantic_segmentation_evaluator.py b/tests/evaluation/test_semantic_segmentation_evaluator.py
@@ -0,0 +1,59 @@
+import unittest
+from os.path import join
+import json
+
+import numpy as np
+
+from rastervision.core.class_map import (ClassItem, ClassMap)
+from rastervision.data.label_source.semantic_segmentation_label_source import (
+    SemanticSegmentationLabelSource)
+from rastervision.data import Scene
+from tests.mock import MockRasterSource
+from rastervision.evaluation import SemanticSegmentationEvaluator
+from rastervision.rv_config import RVConfig
+from rastervision.utils.files import file_to_str
+from tests import data_file_path
+
+
+class TestSemanticSegmentationEvaluator(unittest.TestCase):
+    def setUp(self):
+        self.tmp_dir = RVConfig.get_tmp_dir()
+
+    def tearDown(self):
+        self.tmp_dir.cleanup()
+
+    def get_scene(self, class_id):
+        scene_id = str(class_id)
+        rs = MockRasterSource(channel_order=[0, 1, 3], num_channels=3)
+        rs.set_raster(np.zeros((10, 10, 3)))
+
+        gt_rs = MockRasterSource(channel_order=[0], num_channels=1)
+        gt_arr = np.full((10, 10, 1), class_id)
+        gt_rs.set_raster(gt_arr)
+        gt_ls = SemanticSegmentationLabelSource(source=gt_rs)
+
+        pred_rs = MockRasterSource(channel_order=[0], num_channels=1)
+        pred_arr = np.ones((10, 10, 1))
+        pred_arr[5:10, :, :] = 2
+        pred_rs.set_raster(pred_arr)
+        pred_ls = SemanticSegmentationLabelSource(source=pred_rs)
+
+        return Scene(scene_id, rs, gt_ls, pred_ls)
+
+    def test_evaluator(self):
+        class_map = ClassMap([
+            ClassItem(id=1, name='one'),
+            ClassItem(id=2, name='two'),
+        ])
+        output_uri = join(self.tmp_dir.name, 'out.json')
+        scenes = [self.get_scene(1), self.get_scene(2)]
+        evaluator = SemanticSegmentationEvaluator(class_map, output_uri)
+        evaluator.process(scenes, self.tmp_dir.name)
+        eval_json = json.loads(file_to_str(output_uri))
+        exp_eval_json = json.loads(
+            file_to_str(data_file_path('expected-eval.json')))
+        self.assertDictEqual(eval_json, exp_eval_json)
+
+
+if __name__ == '__main__':
+    unittest.main()