Skip to content

Commit

Permalink
refactor: deprecate outlier (#185)
Browse files Browse the repository at this point in the history
  • Loading branch information
12rambau committed Jan 30, 2024
2 parents 8154fe8 + 029a32d commit c56b1cc
Show file tree
Hide file tree
Showing 8 changed files with 270 additions and 46 deletions.
79 changes: 78 additions & 1 deletion geetools/ImageCollection/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import ee_extra

from geetools.accessors import geetools_accessor
from geetools.types import number
from geetools.types import ee_list, ee_number, number


@geetools_accessor(ee.ImageCollection)
Expand Down Expand Up @@ -518,3 +518,80 @@ def computeIntegral(image, integral):
return integral.add(locIntegral).set("last", image)

return ee.Image(self._obj.iterate(computeIntegral, s))

def outliers(
self, bands: ee_list = [], sigma: ee_number = 2, drop: bool = False
) -> ee.ImageCollection:
"""Compute the outlier for each pixel in the specified bands.
A pixel is considered as an outlier if:
.. code-block::
outlier = value > mean+(sigma*stddev)
outlier = value < mean-(sigma*stddev)
In a 1D example it would be:
- values = [1, 5, 6, 4, 7, 10]
- mean = 5.5
- std dev = 3
- mean + (sigma*stddev) = 8.5
- mean - (sigma*stddev) = 2.5
- outliers = values between 2.5 and 8.5 = [1, 10]
Here in this function an extra band is added to each image for each of the evaluated bands with the outlier status. The band name is the original band name with the suffix "_outlier". A value of 1 means that the pixel is an outlier, 0 means that it is not.
Optionally users can discard this band by setting ``drop`` to ``True`` and the outlier will simply be masked from each ilmage. This is useful when the outlier band is not needed and the user wants to save space.
idea from: https://www.kdnuggets.com/2017/02/removing-outliers-standard-deviation-python.html
Args:
bands: the bands to evaluate for outliers. If empty, all bands are evaluated
sigma: the number of standard deviations to use to compute the outlier
drop: whether to drop the outlier band from the images
Returns:
an ImageCollection with the outlier band added to each image or masked if ``drop`` is ``True``
Examples:
.. code-block:: python
import ee, LDCGEETools
collection = (
ee.ImageCollection("LANDSAT/LC08/C01/T1_TOA")
.filterBounds(ee.Geometry.Point(-122.262, 37.8719))
.filterDate("2014-01-01", "2014-12-31")
)
outliers = collection.ldc.outliers(["B1", "B2"], 2)
print(outliers.getInfo())
"""
# cast parameters and compute the outlier band names
initBands = self._obj.first().bandNames()
statBands = ee.List(bands) if bands else initBands
outBands = statBands.map(lambda b: ee.String(b).cat("_outlier"))

# compute the mean and std dev for each band
statCollection = self._obj.select(statBands)
mean = statCollection.mean()
stdDev = statCollection.reduce(ee.Reducer.stdDev())
minValues = mean.subtract(stdDev.multiply(sigma))
maxValues = mean.add(stdDev.multiply(sigma))

# compute the outlier band for each image
def computeOutlierBands(i):
outImage = i.select(statBands)
outImage = outImage.gt(maxValues).Or(outImage.lt(minValues))
return i.addBands(outImage.rename(outBands))

ic = self._obj.map(computeOutlierBands)

# drop the outlier band and mask each image if requested
def maskOutliers(i):
maskedBands = i.select(statBands).updateMask(i.select(outBands).Not())
return i.addBands(maskedBands, overwrite=True).select(initBands)

ic = ic if drop is False else ic.map(maskOutliers)

return ee.ImageCollection(ic)
48 changes: 3 additions & 45 deletions geetools/tools/_deprecated_imagecollection.py
Original file line number Diff line number Diff line change
Expand Up @@ -477,52 +477,10 @@ def true():
raise ValueError("side parameter must be 'server' or 'client'")


@deprecated(version="1.0.0", reason="Use ee.ImageCollection.geetools.outliers instead")
def outliers(collection, bands, sigma=2, updateMask=False):
"""Compute outliers by.
outlier = value > mean+(sigma*stddev)
outlier = value < mean-(sigma*stddev)
Example (sigma = 1):
- values = [1, 5, 6, 4, 7, 10]
- mean = 5.5
- std dev = 3
- mean + (sigma*stddev) = 8.5
- mean - (sigma*stddev) = 2.5
- outliers = values between 2.5 and 8.5 = [1, 10]
if `updateMask` is False return the passed collection in which each image
have new bands (a mask) corresponding to the passed dict and a suffix '_outlier'
else return the passed collection with the passed bands masked if are
outliers (the outlier band is not returned).
idea from: https://www.kdnuggets.com/2017/02/removing-outliers-standard-deviation-python.html
"""
bands = bands or ee.Image(collection.first()).bandNames()
bands = ee.List(bands)
forstats = collection.select(bands)
mean = forstats.mean()
stddev = forstats.reduce(ee.Reducer.stdDev())
imin = mean.subtract(stddev.multiply(sigma))
imax = mean.add(stddev.multiply(sigma))

def getOutlier(im, imin, imax):
ismin = im.lt(imin)
ismax = im.gt(imax)
outlier = ismin.Or(ismax)
return outlier

def overcol(im):
outs = getOutlier(im.select(bands), imin, imax)
if updateMask:
ibands = im.select(bands)
ibands = ibands.updateMask(outs.Not())
else:
ibands = image_module.addSuffix(outs, "_outlier")

return im.addBands(ibands, overwrite=True)

return collection.map(overcol)
"""Compute outliers in the collection"""
return ee.ImageCollection(collection).geetools.outliers(bands, sigma, updateMask)


@deprecated(version="1.0.0", reason="geetools will mostly focus on server-side methods now")
Expand Down
25 changes: 25 additions & 0 deletions tests/test_ImageCollection.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,3 +167,28 @@ def test_deprecated_integral(self, s2_sr, amazonas, data_regression):
)
ic = ee.ImageCollection([integral])
data_regression.check(reduce(ic, amazonas).getInfo())


class TestOutliers:
"""Test the ``outliers`` method."""

def test_outliers(self, s2_sr, amazonas, data_regression):
ic = s2_sr.limit(10).geetools.outliers()
data_regression.check(reduce(ic, amazonas).getInfo())

def test_outliers_with_bands(self, s2_sr, amazonas, data_regression):
ic = s2_sr.limit(10).geetools.outliers(bands=["B4", "B2"])
data_regression.check(reduce(ic, amazonas).getInfo())

def test_outliers_with_sigma(self, s2_sr, amazonas, data_regression):
ic = s2_sr.limit(10).geetools.outliers(sigma=3)
data_regression.check(reduce(ic, amazonas).getInfo())

def test_outliers_with_drop(self, s2_sr, amazonas, data_regression):
ic = s2_sr.limit(10).geetools.outliers(drop=True)
data_regression.check(reduce(ic, amazonas).getInfo())

def test_deprecated_outliers(self, s2_sr, amazonas, data_regression):
with pytest.deprecated_call():
ic = geetools.imagecollection.outliers(s2_sr.limit(10), ["B4"])
data_regression.check(reduce(ic, amazonas).getInfo())
24 changes: 24 additions & 0 deletions tests/test_ImageCollection/test_deprecated_outliers.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
AOT: null
B1: null
B11: null
B12: null
B2: null
B3: null
B4: null
B4_outlier: null
B5: null
B6: null
B7: null
B8: null
B8A: null
B9: null
MSK_CLDPRB: null
MSK_SNWPRB: null
QA10: null
QA20: null
QA60: null
SCL: null
TCI_B: null
TCI_G: null
TCI_R: null
WVP: null
46 changes: 46 additions & 0 deletions tests/test_ImageCollection/test_outliers.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
AOT: null
AOT_outlier: null
B1: null
B11: null
B11_outlier: null
B12: null
B12_outlier: null
B1_outlier: null
B2: null
B2_outlier: null
B3: null
B3_outlier: null
B4: null
B4_outlier: null
B5: null
B5_outlier: null
B6: null
B6_outlier: null
B7: null
B7_outlier: null
B8: null
B8A: null
B8A_outlier: null
B8_outlier: null
B9: null
B9_outlier: null
MSK_CLDPRB: null
MSK_CLDPRB_outlier: null
MSK_SNWPRB: null
MSK_SNWPRB_outlier: null
QA10: null
QA10_outlier: null
QA20: null
QA20_outlier: null
QA60: null
QA60_outlier: null
SCL: null
SCL_outlier: null
TCI_B: null
TCI_B_outlier: null
TCI_G: null
TCI_G_outlier: null
TCI_R: null
TCI_R_outlier: null
WVP: null
WVP_outlier: null
25 changes: 25 additions & 0 deletions tests/test_ImageCollection/test_outliers_with_bands.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
AOT: null
B1: null
B11: null
B12: null
B2: null
B2_outlier: null
B3: null
B4: null
B4_outlier: null
B5: null
B6: null
B7: null
B8: null
B8A: null
B9: null
MSK_CLDPRB: null
MSK_SNWPRB: null
QA10: null
QA20: null
QA60: null
SCL: null
TCI_B: null
TCI_G: null
TCI_R: null
WVP: null
23 changes: 23 additions & 0 deletions tests/test_ImageCollection/test_outliers_with_drop.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
AOT: null
B1: null
B11: null
B12: null
B2: null
B3: null
B4: null
B5: null
B6: null
B7: null
B8: null
B8A: null
B9: null
MSK_CLDPRB: null
MSK_SNWPRB: null
QA10: null
QA20: null
QA60: null
SCL: null
TCI_B: null
TCI_G: null
TCI_R: null
WVP: null
46 changes: 46 additions & 0 deletions tests/test_ImageCollection/test_outliers_with_sigma.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
AOT: null
AOT_outlier: null
B1: null
B11: null
B11_outlier: null
B12: null
B12_outlier: null
B1_outlier: null
B2: null
B2_outlier: null
B3: null
B3_outlier: null
B4: null
B4_outlier: null
B5: null
B5_outlier: null
B6: null
B6_outlier: null
B7: null
B7_outlier: null
B8: null
B8A: null
B8A_outlier: null
B8_outlier: null
B9: null
B9_outlier: null
MSK_CLDPRB: null
MSK_CLDPRB_outlier: null
MSK_SNWPRB: null
MSK_SNWPRB_outlier: null
QA10: null
QA10_outlier: null
QA20: null
QA20_outlier: null
QA60: null
QA60_outlier: null
SCL: null
SCL_outlier: null
TCI_B: null
TCI_B_outlier: null
TCI_G: null
TCI_G_outlier: null
TCI_R: null
TCI_R_outlier: null
WVP: null
WVP_outlier: null

0 comments on commit c56b1cc

Please sign in to comment.