Skip to content

Commit

Permalink
Standard features using NaN (#48)
Browse files Browse the repository at this point in the history
* refactor: attributes -> attrs

* NaN features from spike, rate of change, and morello

* style: PEP8 on rate_of_change.py

* fix: Correct relative path to fuzzy

* refactor: cumulative rate of change with NaN

Cumulative Rate of Change procedure now returns NaN features when
values are invalid, and a few more improvements, including improvements
in the validation tests.

* Global range using NaN instead of masked array

* Rate of Change, feature with NaN
  • Loading branch information
castelao committed Oct 27, 2020
1 parent 24c8dad commit 551c924
Show file tree
Hide file tree
Showing 9 changed files with 127 additions and 45 deletions.
5 changes: 4 additions & 1 deletion cotede/qctests/constant_cluster_size.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def constant_cluster_size(x, tol=0):
tol = tol + 1e-5 * tol

ivalid = np.nonzero(~ma.getmaskarray(ma.fix_invalid(x)))[0]
dx = np.diff(x[ivalid])
dx = np.diff(np.atleast_1d(x)[ivalid])

cluster_size = np.zeros(np.shape(x), dtype='i')
for i, iv in enumerate(ivalid):
Expand All @@ -37,6 +37,9 @@ def constant_cluster_size(x, tol=0):


class ConstantClusterSize(QCCheckVar):
"""
Need to implement a check on time. TSG specifies constant value during 6 hrs.
"""
def set_features(self):
cluster_size = constant_cluster_size(self.data[self.varname])
N = ma.compressed(self.data[self.varname]).size
Expand Down
29 changes: 19 additions & 10 deletions cotede/qctests/cum_rate_of_change.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
# -*- coding: utf-8 -*-
# Licensed under a 3-clause BSD style license - see LICENSE.rst


"""
Expand All @@ -23,14 +25,21 @@

import numpy as np
from numpy import ma
import logging

from .qctests import QCCheckVar

module_logger = logging.getLogger(__name__)

def cum_rate_of_change(x, memory):
"""Cummulative rate of change
"""
if isinstance(x, ma.MaskedArray):
x[x.mask] = np.nan
x = x.data

y = ma.fix_invalid(np.ones_like(x) * np.nan)
y[1:] = ma.absolute(ma.diff(x))
y = np.nan * np.ones_like(x)
y[1:] = np.absolute(np.diff(x))

for i in range(2, y.size):
if y[i] < y[i - 1]:
Expand All @@ -41,6 +50,7 @@ def cum_rate_of_change(x, memory):

class CumRateOfChange(QCCheckVar):
def set_features(self):
module_logger.debug("Feature: cummulative rate of change")
self.features = {
"cum_rate_of_change": cum_rate_of_change(
self.data[self.varname], self.cfg["memory"]
Expand All @@ -51,19 +61,18 @@ def test(self):
self.flags = {}
try:
threshold = self.cfg["threshold"]
except:
except KeyError:
print("Deprecated cfg format. It should contain a threshold item.")
threshold = self.cfg

assert (
(np.size(threshold) == 1)
and (threshold is not None)
and (np.isfinite(threshold))
)
assert np.size(threshold) == 1, "Threshold should be a single value"
assert threshold is not None, "Threshold can't be None"
assert np.isfinite(threshold), "Threshold must be a valid number"

flag = np.zeros(self.data[self.varname].shape, dtype="i1")
feature = ma.absolute(self.features["cum_rate_of_change"])
feature = np.absolute(self.features["cum_rate_of_change"])
flag[np.nonzero(feature > threshold)] = self.flag_bad
flag[np.nonzero(feature <= threshold)] = self.flag_good
flag[ma.getmaskarray(self.data[self.varname])] = 9
x = self.data[self.varname]
flag[ma.getmaskarray(x) | ~np.isfinite(x)] = 9
self.flags["cum_rate_of_change"] = flag
3 changes: 3 additions & 0 deletions cotede/qctests/global_range.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@ def test(self):
maxval = self.cfg["maxval"]

feature = self.data[self.varname]
if isinstance(feature, ma.MaskedArray):
feature[feature.mask] = np.nan
feature = feature.data

flag = np.zeros(feature.shape, dtype="i1")
flag[np.nonzero(feature < minval)] = self.flag_bad
Expand Down
9 changes: 8 additions & 1 deletion cotede/qctests/morello2014.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@


import numpy as np
from cotede.fuzzy import fuzzyfy
from numpy import ma
from ..fuzzy import fuzzyfy


def morello2014(features, cfg):
Expand Down Expand Up @@ -34,6 +35,12 @@ def morello2014(features, cfg):

f = fuzzyfy(features, cfg)

for level in f:
if isinstance(f[level], ma.MaskedArray):
mask = f[level].mask
f[level] = f[level].data
f[level][mask] = np.nan

# This is how Timms and Morello defined the Fuzzy Logic approach
# flag = np.zeros(N, dtype='i1')
# Flag must be np.array, not a ma.array.
Expand Down
28 changes: 18 additions & 10 deletions cotede/qctests/rate_of_change.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,33 +22,41 @@


def rate_of_change(x):
y = ma.fix_invalid(np.ones_like(x) * np.nan)
y[1:] = ma.diff(x)
if isinstance(x, ma.MaskedArray):
x[x.mask] = np.nan
x = x.data

y = x * np.nan
y[1:] = np.diff(x)

return y


class RateOfChange(QCCheckVar):
def set_features(self):
self.features = {
'rate_of_change': rate_of_change(self.data[self.varname])}
self.features = {"rate_of_change": rate_of_change(self.data[self.varname])}

def test(self):
self.flags = {}
try:
threshold = self.cfg['threshold']
threshold = self.cfg["threshold"]
except KeyError:
print("Deprecated cfg format. It should contain a threshold item.")
threshold = self.cfg

assert (np.size(threshold) == 1) \
and (threshold is not None) \
assert (
(np.size(threshold) == 1)
and (threshold is not None)
and (np.isfinite(threshold))
)

feature = np.absolute(self.features["rate_of_change"])
if ("sd_scale" in self.cfg) and self.cfg["sd_scale"]:
feature /= feature.std()

flag = np.zeros(self.data[self.varname].shape, dtype='i1')
feature = ma.absolute(self.features['rate_of_change'])
flag = np.zeros(self.data[self.varname].shape, dtype="i1")
flag[np.nonzero(feature > threshold)] = self.flag_bad
flag[np.nonzero(feature <= threshold)] = self.flag_good
x = self.data[self.varname]
flag[ma.getmaskarray(x) | ~np.isfinite(x)] = 9
self.flags['rate_of_change'] = flag
self.flags["rate_of_change"] = flag
16 changes: 13 additions & 3 deletions cotede/qctests/spike.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,11 @@

"""
Threshold - |median(v0..v4)| + |sigma(v0..v4)|
y = ma.masked_all_like(x)
yy = np.stack([x[:-4], x[1:-3], x[2:-2], x[3:-1], x[4:]])
y[2:-2] = np.median(yy, axis=0) + yy.std(axis=0)
y = np.stack([x[:-4], x[1:-3], x[2:-2], x[3:-1], x[4:]])
"""

import logging
Expand All @@ -20,14 +25,19 @@
def spike(x):
""" Spike
"""
y = ma.fix_invalid(np.ones_like(x) * np.nan)
if isinstance(x, ma.MaskedArray):
mask = x.mask
x = x.data
x[mask] = np.nan

y = np.nan * x
y[1:-1] = np.abs(x[1:-1] - (x[:-2] + x[2:]) / 2.0) - np.abs((x[2:] - x[:-2]) / 2.0)
return y


class Spike(QCCheckVar):
def set_features(self):
self.features = {'spike': spike(self.data[self.varname])}
self.features = {"spike": spike(self.data[self.varname])}

def test(self):
self.flags = {}
Expand All @@ -46,7 +56,7 @@ def test(self):
)

flag = np.zeros(self.data[self.varname].shape, dtype="i1")
feature = self.features["spike"]
feature = np.absolute(self.features["spike"])
flag[np.nonzero(feature > threshold)] = self.flag_bad
flag[np.nonzero(feature <= threshold)] = self.flag_good
# Flag as 9 any masked input value
Expand Down
16 changes: 16 additions & 0 deletions cotede/qctests/tukey53H.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,22 @@
# -*- coding: utf-8 -*-

"""
Shall I use a decorator??
DATA = [25.32, 25.34, 25.34, 25.31, 24.99, 23.46, 21.85, 17.95, 15.39, 11.08, 6.93, 7.93, 5.71, 3.58, np.nan, 1, 1]
tukey53H(np.array, np.maskedArray, pd.Series, xr.DataArray)
delta = tukey53H(x)
w = np.hamming(l)
sigma = (ma.convolve(x, w, mode="same") / w.sum()).std()
return delta / sigma
"""

import logging
Expand Down
60 changes: 43 additions & 17 deletions tests/qctests/test_qc_cum_rate_of_change.py
Original file line number Diff line number Diff line change
@@ -1,30 +1,56 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-

""" Check cummulative Rate of Change QC test
""" Verify the Cummulative Rate of Change QC test
"""

from numpy import ma
import numpy as np
from cotede.qctests import CumRateOfChange, cum_rate_of_change
from data import DummyData


def test():
profile = DummyData()
def test_cum_rate_of_change():
x = [1, -1, 2, 2, 3, 2, 4]
memory = 0.8
y = cum_rate_of_change(x, memory)

output = [np.nan, 2.0, 3.0, 2.4, 2.12, 1.896, 2.0]

dummy_output = ma.masked_array([0, 5.43, 4.93, 14.68],
mask=[True, False, False, False])
assert isinstance(y, np.ndarray)
assert np.allclose(y, output, equal_nan=True)

cfg = {
'memory': 0.8,
'threshold': 4,
'flag_good': 1,
'flag_bad': 4
}

y = CumRateOfChange(profile, 'TEMP', cfg)
assert type(y.features) is dict
def test_standard_dataset():
"""Test CumRateOfChange with a standard dataset
"""
profile = DummyData()

x = cum_rate_of_change(profile['TEMP'], cfg['memory'])
assert type(x) is ma.MaskedArray
# assert ma.allclose(x, dummy_output)
features = {
"cum_rate_of_change": [
np.nan,
0.02,
0.016,
0.03,
0.32,
1.53,
1.61,
3.9,
3.632,
4.31,
4.278,
3.6224,
3.34192,
3.099536,
np.nan,
]
}
flags = {"cum_rate_of_change": [0, 1, 1, 1, 1, 1, 1, 1, 1, 4, 4, 1, 1, 1, 9]}

cfg = {"memory": 0.8, "threshold": 4, "flag_good": 1, "flag_bad": 4}

y = CumRateOfChange(profile, "TEMP", cfg)

for f in features:
assert np.allclose(y.features[f], features[f], equal_nan=True)
for f in flags:
assert np.allclose(y.flags[f], flags[f], equal_nan=True)
6 changes: 3 additions & 3 deletions tests/test_pqc.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,9 @@ def test():
assert v in pqc.keys()
assert np.allclose(profile[v], pqc[v])

for a in profile.attributes:
assert a in pqc.attributes
assert profile.attributes[a] == pqc.attributes[a]
for a in profile.attrs:
assert a in pqc.attrs
assert profile.attrs[a] == pqc.attrs[a]

assert hasattr(pqc, 'flags')
assert type(pqc.flags) is dict
Expand Down

0 comments on commit 551c924

Please sign in to comment.