Standard features using NaN (#48)

* refactor: attributes -> attrs * NaN features from spike, rate of change, and morello * style: PEP8 on rate_of_change.py * fix: Correct relative path to fuzzy * refactor: cumulative rate of change with NaN Cumulative Rate of Change procedure now returns NaN features when values are invalid, and a few more improvements, including improvements in the validation tests. * Global range using NaN instead of masked array * Rate of Change, feature with NaN
castelao · Oct 27, 2020 · 551c924 · 551c924
1 parent 24c8dad
commit 551c924
Show file tree

Hide file tree

Showing 9 changed files with 127 additions and 45 deletions.
diff --git a/cotede/qctests/constant_cluster_size.py b/cotede/qctests/constant_cluster_size.py
@@ -19,7 +19,7 @@ def constant_cluster_size(x, tol=0):
     tol = tol + 1e-5 * tol
 
     ivalid = np.nonzero(~ma.getmaskarray(ma.fix_invalid(x)))[0]
-    dx = np.diff(x[ivalid])
+    dx = np.diff(np.atleast_1d(x)[ivalid])
 
     cluster_size = np.zeros(np.shape(x), dtype='i')
     for i, iv in enumerate(ivalid):
@@ -37,6 +37,9 @@ def constant_cluster_size(x, tol=0):
 
 
 class ConstantClusterSize(QCCheckVar):
+    """
+       Need to implement a check on time. TSG specifies constant value during 6 hrs.
+    """
     def set_features(self):
         cluster_size = constant_cluster_size(self.data[self.varname])
         N = ma.compressed(self.data[self.varname]).size

diff --git a/cotede/qctests/cum_rate_of_change.py b/cotede/qctests/cum_rate_of_change.py
@@ -1,4 +1,6 @@
 # -*- coding: utf-8 -*-
+# Licensed under a 3-clause BSD style license - see LICENSE.rst
+
 
 """
 
@@ -23,14 +25,21 @@
 
 import numpy as np
 from numpy import ma
+import logging
 
 from .qctests import QCCheckVar
 
+module_logger = logging.getLogger(__name__)
 
 def cum_rate_of_change(x, memory):
+    """Cummulative rate of change
+    """
+    if isinstance(x, ma.MaskedArray):
+        x[x.mask] = np.nan
+        x = x.data
 
-    y = ma.fix_invalid(np.ones_like(x) * np.nan)
-    y[1:] = ma.absolute(ma.diff(x))
+    y = np.nan * np.ones_like(x)
+    y[1:] = np.absolute(np.diff(x))
 
     for i in range(2, y.size):
         if y[i] < y[i - 1]:
@@ -41,6 +50,7 @@ def cum_rate_of_change(x, memory):
 
 class CumRateOfChange(QCCheckVar):
     def set_features(self):
+        module_logger.debug("Feature: cummulative rate of change")
         self.features = {
             "cum_rate_of_change": cum_rate_of_change(
                 self.data[self.varname], self.cfg["memory"]
@@ -51,19 +61,18 @@ def test(self):
         self.flags = {}
         try:
             threshold = self.cfg["threshold"]
-        except:
+        except KeyError:
             print("Deprecated cfg format. It should contain a threshold item.")
             threshold = self.cfg
 
-        assert (
-            (np.size(threshold) == 1)
-            and (threshold is not None)
-            and (np.isfinite(threshold))
-        )
+        assert np.size(threshold) == 1, "Threshold should be a single value"
+        assert threshold is not None, "Threshold can't be None"
+        assert np.isfinite(threshold), "Threshold must be a valid number"
 
         flag = np.zeros(self.data[self.varname].shape, dtype="i1")
-        feature = ma.absolute(self.features["cum_rate_of_change"])
+        feature = np.absolute(self.features["cum_rate_of_change"])
         flag[np.nonzero(feature > threshold)] = self.flag_bad
         flag[np.nonzero(feature <= threshold)] = self.flag_good
-        flag[ma.getmaskarray(self.data[self.varname])] = 9
+        x = self.data[self.varname]
+        flag[ma.getmaskarray(x) | ~np.isfinite(x)] = 9
         self.flags["cum_rate_of_change"] = flag
diff --git a/cotede/qctests/global_range.py b/cotede/qctests/global_range.py
@@ -38,6 +38,9 @@ def test(self):
         maxval = self.cfg["maxval"]
 
         feature = self.data[self.varname]
+        if isinstance(feature, ma.MaskedArray):
+            feature[feature.mask] = np.nan
+            feature = feature.data
 
         flag = np.zeros(feature.shape, dtype="i1")
         flag[np.nonzero(feature < minval)] = self.flag_bad

diff --git a/cotede/qctests/morello2014.py b/cotede/qctests/morello2014.py
@@ -6,7 +6,8 @@
 
 
 import numpy as np
-from cotede.fuzzy import fuzzyfy
+from numpy import ma
+from ..fuzzy import fuzzyfy
 
 
 def morello2014(features, cfg):
@@ -34,6 +35,12 @@ def morello2014(features, cfg):
 
     f = fuzzyfy(features, cfg)
 
+    for level in f:
+        if isinstance(f[level], ma.MaskedArray):
+            mask = f[level].mask
+            f[level] = f[level].data
+            f[level][mask] = np.nan
+
     # This is how Timms and Morello defined the Fuzzy Logic approach
     # flag = np.zeros(N, dtype='i1')
     # Flag must be np.array, not a ma.array.

diff --git a/cotede/qctests/rate_of_change.py b/cotede/qctests/rate_of_change.py
@@ -22,33 +22,41 @@
 
 
 def rate_of_change(x):
-    y = ma.fix_invalid(np.ones_like(x) * np.nan)
-    y[1:] = ma.diff(x)
+    if isinstance(x, ma.MaskedArray):
+        x[x.mask] = np.nan
+        x = x.data
+
+    y = x * np.nan
+    y[1:] = np.diff(x)
 
     return y
 
 
 class RateOfChange(QCCheckVar):
     def set_features(self):
-        self.features = {
-                'rate_of_change': rate_of_change(self.data[self.varname])}
+        self.features = {"rate_of_change": rate_of_change(self.data[self.varname])}
 
     def test(self):
         self.flags = {}
         try:
-            threshold = self.cfg['threshold']
+            threshold = self.cfg["threshold"]
         except KeyError:
             print("Deprecated cfg format. It should contain a threshold item.")
             threshold = self.cfg
 
-        assert (np.size(threshold) == 1) \
-            and (threshold is not None) \
+        assert (
+            (np.size(threshold) == 1)
+            and (threshold is not None)
             and (np.isfinite(threshold))
+        )
+
+        feature = np.absolute(self.features["rate_of_change"])
+        if ("sd_scale" in self.cfg) and self.cfg["sd_scale"]:
+            feature /= feature.std()
 
-        flag = np.zeros(self.data[self.varname].shape, dtype='i1')
-        feature = ma.absolute(self.features['rate_of_change'])
+        flag = np.zeros(self.data[self.varname].shape, dtype="i1")
         flag[np.nonzero(feature > threshold)] = self.flag_bad
         flag[np.nonzero(feature <= threshold)] = self.flag_good
         x = self.data[self.varname]
         flag[ma.getmaskarray(x) | ~np.isfinite(x)] = 9
-        self.flags['rate_of_change'] = flag
+        self.flags["rate_of_change"] = flag
diff --git a/cotede/qctests/spike.py b/cotede/qctests/spike.py
@@ -4,6 +4,11 @@
 
 """
 
+Threshold - |median(v0..v4)| + |sigma(v0..v4)|
+y = ma.masked_all_like(x)
+yy = np.stack([x[:-4], x[1:-3], x[2:-2], x[3:-1], x[4:]])
+y[2:-2] = np.median(yy, axis=0) + yy.std(axis=0)
+y = np.stack([x[:-4], x[1:-3], x[2:-2], x[3:-1], x[4:]])
 """
 
 import logging
@@ -20,14 +25,19 @@
 def spike(x):
     """ Spike
     """
-    y = ma.fix_invalid(np.ones_like(x) * np.nan)
+    if isinstance(x, ma.MaskedArray):
+        mask = x.mask
+        x = x.data
+        x[mask] = np.nan
+
+    y = np.nan * x
     y[1:-1] = np.abs(x[1:-1] - (x[:-2] + x[2:]) / 2.0) - np.abs((x[2:] - x[:-2]) / 2.0)
     return y
 
 
 class Spike(QCCheckVar):
     def set_features(self):
-        self.features = {'spike': spike(self.data[self.varname])}
+        self.features = {"spike": spike(self.data[self.varname])}
 
     def test(self):
         self.flags = {}
@@ -46,7 +56,7 @@ def test(self):
         )
 
         flag = np.zeros(self.data[self.varname].shape, dtype="i1")
-        feature = self.features["spike"]
+        feature = np.absolute(self.features["spike"])
         flag[np.nonzero(feature > threshold)] = self.flag_bad
         flag[np.nonzero(feature <= threshold)] = self.flag_good
         # Flag as 9 any masked input value

diff --git a/cotede/qctests/tukey53H.py b/cotede/qctests/tukey53H.py
@@ -1,6 +1,22 @@
 # -*- coding: utf-8 -*-
 
 """
+
+
+Shall I use a decorator??
+
+DATA = [25.32, 25.34, 25.34, 25.31, 24.99, 23.46, 21.85, 17.95, 15.39, 11.08, 6.93, 7.93, 5.71, 3.58, np.nan, 1, 1]
+
+
+tukey53H(np.array, np.maskedArray, pd.Series, xr.DataArray)
+
+
+    delta = tukey53H(x)
+
+    w = np.hamming(l)
+    sigma = (ma.convolve(x, w, mode="same") / w.sum()).std()
+
+    return delta / sigma
 """
 
 import logging

diff --git a/tests/qctests/test_qc_cum_rate_of_change.py b/tests/qctests/test_qc_cum_rate_of_change.py
@@ -1,30 +1,56 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 
-""" Check cummulative Rate of Change QC test
+""" Verify the Cummulative Rate of Change QC test
 """
 
-from numpy import ma
+import numpy as np
 from cotede.qctests import CumRateOfChange, cum_rate_of_change
 from data import DummyData
 
 
-def test():
-    profile = DummyData()
+def test_cum_rate_of_change():
+    x = [1, -1, 2, 2, 3, 2, 4]
+    memory = 0.8
+    y = cum_rate_of_change(x, memory)
+
+    output = [np.nan, 2.0, 3.0, 2.4, 2.12, 1.896, 2.0]
 
-    dummy_output = ma.masked_array([0, 5.43, 4.93, 14.68],
-            mask=[True, False, False, False])
+    assert isinstance(y, np.ndarray)
+    assert np.allclose(y, output, equal_nan=True)
 
-    cfg = {
-            'memory': 0.8,
-            'threshold': 4,
-            'flag_good': 1,
-            'flag_bad': 4
-            }
 
-    y = CumRateOfChange(profile, 'TEMP', cfg)
-    assert type(y.features) is dict
+def test_standard_dataset():
+    """Test CumRateOfChange with a standard dataset
+    """
+    profile = DummyData()
 
-    x = cum_rate_of_change(profile['TEMP'], cfg['memory'])
-    assert type(x) is ma.MaskedArray
-    # assert ma.allclose(x, dummy_output)
+    features = {
+        "cum_rate_of_change": [
+            np.nan,
+            0.02,
+            0.016,
+            0.03,
+            0.32,
+            1.53,
+            1.61,
+            3.9,
+            3.632,
+            4.31,
+            4.278,
+            3.6224,
+            3.34192,
+            3.099536,
+            np.nan,
+        ]
+    }
+    flags = {"cum_rate_of_change": [0, 1, 1, 1, 1, 1, 1, 1, 1, 4, 4, 1, 1, 1, 9]}
+
+    cfg = {"memory": 0.8, "threshold": 4, "flag_good": 1, "flag_bad": 4}
+
+    y = CumRateOfChange(profile, "TEMP", cfg)
+
+    for f in features:
+        assert np.allclose(y.features[f], features[f], equal_nan=True)
+    for f in flags:
+        assert np.allclose(y.flags[f], flags[f], equal_nan=True)
diff --git a/tests/test_pqc.py b/tests/test_pqc.py
@@ -29,9 +29,9 @@ def test():
         assert v in pqc.keys()
         assert np.allclose(profile[v], pqc[v])
 
-    for a in profile.attributes:
-        assert a in pqc.attributes
-        assert profile.attributes[a] == pqc.attributes[a]
+    for a in profile.attrs:
+        assert a in pqc.attrs
+        assert profile.attrs[a] == pqc.attrs[a]
 
     assert hasattr(pqc, 'flags')
     assert type(pqc.flags) is dict