Fixed valuecount for continuous data.

geodesign · Sep 13, 2017 · b84d14d · b84d14d
1 parent b52fc67
commit b84d14d
Show file tree

Hide file tree

Showing 4 changed files with 100 additions and 11 deletions.
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -1,6 +1,18 @@
 django-raster change log
 ========================
 
+0.5
+---
+* Added ``memory_efficient`` flag to value count aggregator. The value counts
+  are now computed after collecting the complete array of data for the value
+  count area. This might require a lot of memory, a tile-by-tile based
+  computation can be activated with this flag.
+
+* Fixed bug when computing continuous histograms over multiple tiles. The
+  histogram now has consistent breaks.
+
+* A histogram breaks range can now be specified on the value count aggregation.
+
 0.4
 ---
 

diff --git a/raster/valuecount.py b/raster/valuecount.py
@@ -23,14 +23,17 @@ class Aggregator(object):
     functions on all tiles from a set of layers.
     """
 
-    def __init__(self, layer_dict, formula, zoom=None, geom=None, acres=True, grouping='auto', all_touched=True):
+    def __init__(self, layer_dict, formula, zoom=None, geom=None, acres=True,
+                 grouping='auto', all_touched=True, memory_efficient=False, hist_range=None):
         # Set defining parameter for this aggregator
         self.layer_dict = layer_dict
         self.formula = formula
         self.geom = geom
         self.acres = acres
         self.rastgeom = None
         self.all_touched = all_touched
+        self.memory_efficient = memory_efficient
+        self.hist_range = hist_range
 
         # Get layers from input dict
         self.layers = RasterLayer.objects.filter(id__in=layer_dict.values())
@@ -215,7 +218,18 @@ def value_count(self):
         results = Counter({})
         self._clear_stats()
 
-        for result_data in self.tiles():
+        if self.memory_efficient:
+            # Loop through tiles individually.
+            all_result_data = self.tiles()
+        else:
+            # Combine all tiles into one big array.
+            all_result_data = [tile for tile in self.tiles()]
+            if len(all_result_data):
+                all_result_data = (
+                    numpy.concatenate(all_result_data),
+                )
+
+        for result_data in all_result_data:
 
             if self.grouping == 'discrete':
                 # Compute unique counts for discrete input data
@@ -224,8 +238,13 @@ def value_count(self):
                 values = dict(zip(unique_counts[0], unique_counts[1]))
 
             elif self.grouping == 'continuous':
+                if self.memory_efficient and not self.hist_range:
+                    raise RasterAggregationException(
+                        'Secify a histogram range for memory efficient continuous aggregation.'
+                    )
+
                 # Handle continuous case - compute histogram on masked data
-                counts, bins = numpy.histogram(result_data)
+                counts, bins = numpy.histogram(result_data, range=self.hist_range)
 
                 # Create dictionary with bins as keys and histogram counts as values
                 values = {}
@@ -257,9 +276,8 @@ def value_count(self):
                         selector = formula_parser.evaluate({'x': result_data}, key)
                     values[key] = numpy.sum(selector)
 
-            # Add counts to results
-            results += Counter(values)
-
+            # Add counts to results.
+            results.update(Counter(values))
             # Push statistics.
             self._push_stats(result_data)
 

diff --git a/tests/raster_testcase.py b/tests/raster_testcase.py
@@ -107,6 +107,9 @@ def setUp(self):
             '(1.8, 2.7000000000000002)': 56,
             '(2.7000000000000002, 3.6000000000000001)': 4131,
             '(3.6000000000000001, 4.5)': 31490,
+            '(4.5, 5.4000000000000004)': 0,
+            '(5.4000000000000004, 6.2999999999999998)': 0,
+            '(6.2999999999999998, 7.2000000000000002)': 0,
             '(7.2000000000000002, 8.0999999999999996)': 1350,
             '(8.0999999999999996, 9.0)': 2977
         }

diff --git a/tests/test_valuecount.py b/tests/test_valuecount.py
@@ -172,21 +172,21 @@ def test_layer_with_legend_grouping(self):
         agg = Aggregator(
             layer_dict={'a': self.rasterlayer.id},
             formula='a',
-            grouping=self.legend.id
+            grouping=self.legend.id,
         )
         self.assertDictEqual(
             agg.value_count(),
-            {'2': self.expected_totals[2]}
+            {'2': self.expected_totals[2], '10': 0},
         )
         # Use a legend with formula expression
         agg = Aggregator(
             layer_dict={'a': self.rasterlayer.id},
             formula='a',
-            grouping=self.legend_with_expression.id
+            grouping=self.legend_with_expression.id,
         )
         self.assertDictEqual(
             agg.value_count(),
-            {'(x >= 2) & (x < 5)': self.expected_totals[2] + self.expected_totals[3] + self.expected_totals[4]}
+            {'(x >= 2) & (x < 5)': self.expected_totals[2] + self.expected_totals[3] + self.expected_totals[4]},
         )
 
     def test_layer_with_json_grouping(self):
@@ -198,7 +198,7 @@ def test_layer_with_json_grouping(self):
         )
         self.assertDictEqual(
             agg.value_count(),
-            {'2': self.expected_totals[2]}
+            {'2': self.expected_totals[2], '10': 0}
         )
 
     def test_layer_stats(self):
@@ -261,3 +261,59 @@ def test_full_mask_data(self):
             formula='a',
         )
         self.assertEqual((None, None, None, None), agg.statistics())
+
+    def test_histogram_range(self):
+        agg = Aggregator(
+            layer_dict={'a': self.rasterlayer.id},
+            formula='a',
+            grouping='continuous',
+            hist_range=(0, 100)
+        )
+        self.assertDictEqual(
+            agg.value_count(),
+            {
+                '(0.0, 10.0)': 62440, '(20.0, 30.0)': 0, '(70.0, 80.0)': 0,
+                '(80.0, 90.0)': 0, '(30.0, 40.0)': 0, '(10.0, 20.0)': 0,
+                '(90.0, 100.0)': 0, '(60.0, 70.0)': 0, '(50.0, 60.0)': 0,
+                '(40.0, 50.0)': 0,
+            }
+        )
+
+    def test_memory_efficient(self):
+        agg = Aggregator(
+            layer_dict={'a': self.rasterlayer.id},
+            formula='a',
+            grouping='discrete',
+            memory_efficient=True,
+        )
+        self.assertDictEqual(
+            agg.value_count(),
+            {str(k): v for k, v in self.expected_totals.items()}
+        )
+        agg = Aggregator(
+            layer_dict={'a': self.rasterlayer.id},
+            formula='a',
+            grouping='continuous',
+            memory_efficient=True,
+            hist_range=(0, 100),
+        )
+        self.assertDictEqual(
+            agg.value_count(),
+            {
+                '(10.0, 20.0)': 0, '(60.0, 70.0)': 0, '(40.0, 50.0)': 0,
+                '(90.0, 100.0)': 0, '(70.0, 80.0)': 0, '(50.0, 60.0)': 0,
+                '(30.0, 40.0)': 0, '(20.0, 30.0)': 0, '(0.0, 10.0)': 62440,
+                '(80.0, 90.0)': 0
+            },
+        )
+
+    def test_memory_efficient_error(self):
+        msg = 'Secify a histogram range for memory efficient continuous aggregation.'
+        with self.assertRaisesMessage(RasterAggregationException, msg):
+            agg = Aggregator(
+                layer_dict={'a': self.rasterlayer.id},
+                formula='a',
+                grouping='continuous',
+                memory_efficient=True,
+            )
+            agg.value_count()