Skip to content

Commit

Permalink
Fixed valuecount for continuous data.
Browse files Browse the repository at this point in the history
  • Loading branch information
yellowcap committed Sep 13, 2017
1 parent b52fc67 commit b84d14d
Show file tree
Hide file tree
Showing 4 changed files with 100 additions and 11 deletions.
12 changes: 12 additions & 0 deletions CHANGELOG.rst
@@ -1,6 +1,18 @@
django-raster change log
========================

0.5
---
* Added ``memory_efficient`` flag to value count aggregator. The value counts
are now computed after collecting the complete array of data for the value
count area. This might require a lot of memory, a tile-by-tile based
computation can be activated with this flag.

* Fixed bug when computing continuous histograms over multiple tiles. The
histogram now has consistent breaks.

* A histogram breaks range can now be specified on the value count aggregation.

0.4
---

Expand Down
30 changes: 24 additions & 6 deletions raster/valuecount.py
Expand Up @@ -23,14 +23,17 @@ class Aggregator(object):
functions on all tiles from a set of layers.
"""

def __init__(self, layer_dict, formula, zoom=None, geom=None, acres=True, grouping='auto', all_touched=True):
def __init__(self, layer_dict, formula, zoom=None, geom=None, acres=True,
grouping='auto', all_touched=True, memory_efficient=False, hist_range=None):
# Set defining parameter for this aggregator
self.layer_dict = layer_dict
self.formula = formula
self.geom = geom
self.acres = acres
self.rastgeom = None
self.all_touched = all_touched
self.memory_efficient = memory_efficient
self.hist_range = hist_range

# Get layers from input dict
self.layers = RasterLayer.objects.filter(id__in=layer_dict.values())
Expand Down Expand Up @@ -215,7 +218,18 @@ def value_count(self):
results = Counter({})
self._clear_stats()

for result_data in self.tiles():
if self.memory_efficient:
# Loop through tiles individually.
all_result_data = self.tiles()
else:
# Combine all tiles into one big array.
all_result_data = [tile for tile in self.tiles()]
if len(all_result_data):
all_result_data = (
numpy.concatenate(all_result_data),
)

for result_data in all_result_data:

if self.grouping == 'discrete':
# Compute unique counts for discrete input data
Expand All @@ -224,8 +238,13 @@ def value_count(self):
values = dict(zip(unique_counts[0], unique_counts[1]))

elif self.grouping == 'continuous':
if self.memory_efficient and not self.hist_range:
raise RasterAggregationException(
'Secify a histogram range for memory efficient continuous aggregation.'
)

# Handle continuous case - compute histogram on masked data
counts, bins = numpy.histogram(result_data)
counts, bins = numpy.histogram(result_data, range=self.hist_range)

# Create dictionary with bins as keys and histogram counts as values
values = {}
Expand Down Expand Up @@ -257,9 +276,8 @@ def value_count(self):
selector = formula_parser.evaluate({'x': result_data}, key)
values[key] = numpy.sum(selector)

# Add counts to results
results += Counter(values)

# Add counts to results.
results.update(Counter(values))
# Push statistics.
self._push_stats(result_data)

Expand Down
3 changes: 3 additions & 0 deletions tests/raster_testcase.py
Expand Up @@ -107,6 +107,9 @@ def setUp(self):
'(1.8, 2.7000000000000002)': 56,
'(2.7000000000000002, 3.6000000000000001)': 4131,
'(3.6000000000000001, 4.5)': 31490,
'(4.5, 5.4000000000000004)': 0,
'(5.4000000000000004, 6.2999999999999998)': 0,
'(6.2999999999999998, 7.2000000000000002)': 0,
'(7.2000000000000002, 8.0999999999999996)': 1350,
'(8.0999999999999996, 9.0)': 2977
}
Expand Down
66 changes: 61 additions & 5 deletions tests/test_valuecount.py
Expand Up @@ -172,21 +172,21 @@ def test_layer_with_legend_grouping(self):
agg = Aggregator(
layer_dict={'a': self.rasterlayer.id},
formula='a',
grouping=self.legend.id
grouping=self.legend.id,
)
self.assertDictEqual(
agg.value_count(),
{'2': self.expected_totals[2]}
{'2': self.expected_totals[2], '10': 0},
)
# Use a legend with formula expression
agg = Aggregator(
layer_dict={'a': self.rasterlayer.id},
formula='a',
grouping=self.legend_with_expression.id
grouping=self.legend_with_expression.id,
)
self.assertDictEqual(
agg.value_count(),
{'(x >= 2) & (x < 5)': self.expected_totals[2] + self.expected_totals[3] + self.expected_totals[4]}
{'(x >= 2) & (x < 5)': self.expected_totals[2] + self.expected_totals[3] + self.expected_totals[4]},
)

def test_layer_with_json_grouping(self):
Expand All @@ -198,7 +198,7 @@ def test_layer_with_json_grouping(self):
)
self.assertDictEqual(
agg.value_count(),
{'2': self.expected_totals[2]}
{'2': self.expected_totals[2], '10': 0}
)

def test_layer_stats(self):
Expand Down Expand Up @@ -261,3 +261,59 @@ def test_full_mask_data(self):
formula='a',
)
self.assertEqual((None, None, None, None), agg.statistics())

def test_histogram_range(self):
agg = Aggregator(
layer_dict={'a': self.rasterlayer.id},
formula='a',
grouping='continuous',
hist_range=(0, 100)
)
self.assertDictEqual(
agg.value_count(),
{
'(0.0, 10.0)': 62440, '(20.0, 30.0)': 0, '(70.0, 80.0)': 0,
'(80.0, 90.0)': 0, '(30.0, 40.0)': 0, '(10.0, 20.0)': 0,
'(90.0, 100.0)': 0, '(60.0, 70.0)': 0, '(50.0, 60.0)': 0,
'(40.0, 50.0)': 0,
}
)

def test_memory_efficient(self):
agg = Aggregator(
layer_dict={'a': self.rasterlayer.id},
formula='a',
grouping='discrete',
memory_efficient=True,
)
self.assertDictEqual(
agg.value_count(),
{str(k): v for k, v in self.expected_totals.items()}
)
agg = Aggregator(
layer_dict={'a': self.rasterlayer.id},
formula='a',
grouping='continuous',
memory_efficient=True,
hist_range=(0, 100),
)
self.assertDictEqual(
agg.value_count(),
{
'(10.0, 20.0)': 0, '(60.0, 70.0)': 0, '(40.0, 50.0)': 0,
'(90.0, 100.0)': 0, '(70.0, 80.0)': 0, '(50.0, 60.0)': 0,
'(30.0, 40.0)': 0, '(20.0, 30.0)': 0, '(0.0, 10.0)': 62440,
'(80.0, 90.0)': 0
},
)

def test_memory_efficient_error(self):
msg = 'Secify a histogram range for memory efficient continuous aggregation.'
with self.assertRaisesMessage(RasterAggregationException, msg):
agg = Aggregator(
layer_dict={'a': self.rasterlayer.id},
formula='a',
grouping='continuous',
memory_efficient=True,
)
agg.value_count()

0 comments on commit b84d14d

Please sign in to comment.