feat: CARS comparison using flexible coordinates (#58)

* feat: CARS comparison using flexible coordinates Upgrading CARS to be more closer to WOA thus also taking advantage of extracting coordinates from multiple standards. * Bump version: 0.23.3 → 0.23.4
castelao · Jan 6, 2021 · 2a8796d · 2a8796d
1 parent de016af
commit 2a8796d
Show file tree

Hide file tree

Showing 6 changed files with 189 additions and 88 deletions.
diff --git a/cotede/__init__.py b/cotede/__init__.py
@@ -2,7 +2,7 @@
 
 __author__ = 'Guilherme Castelao'
 __email__ = 'guilherme@castelao.net'
-__version__ = '0.23.3'
+__version__ = '0.23.4'
 
 from cotede import qc
 from cotede.qc import ProfileQC, ProfileQCed
diff --git a/cotede/qctests/__init__.py b/cotede/qctests/__init__.py
@@ -9,7 +9,7 @@
 from .fuzzylogic import fuzzylogic
 
 from .bin_spike import Bin_Spike, bin_spike
-from .cars_normbias import CARS_NormBias
+from .cars_normbias import CARS_NormBias, cars_normbias
 from .constant_cluster_size import ConstantClusterSize, constant_cluster_size
 from .cum_rate_of_change import CumRateOfChange, cum_rate_of_change
 from .deepest_pressure import DeepestPressure

diff --git a/cotede/qctests/cars_normbias.py b/cotede/qctests/cars_normbias.py
@@ -10,12 +10,134 @@
 from numpy import ma
 from oceansdb import CARS
 
-from .qctests import QCCheckVar
+from . import QCCheckVar
+from ..utils import extract_coordinates, extract_time, day_of_year, extract_depth
 
 
 module_logger = logging.getLogger(__name__)
 
 
+def cars_normbias(data, varname, attrs=None, use_standard_error=False):
+    """
+
+    Notes
+    -----
+    - Include arguments to overwrite target variable (timename=None, latname=None, lonname=None)
+
+    """
+    try:
+        doy = day_of_year(extract_time(data, attrs))
+    except LookupError as err:
+        module_logger.error("Missing time")
+        raise
+
+    try:
+        # Note that QCCheck fallback to self.data.attrs if attrs not given
+        lat, lon = extract_coordinates(data, attrs)
+    except LookupError:
+        module_logger.error("Missing geolocation (lat/lon)")
+        raise
+
+    kwargs = {"lat": lat, "lon": lon}
+
+    if (np.size(lat) > 1) | (np.size(lon) > 1):
+        dLmax = max(np.max(lat) - np.min(lat), np.max(lon) - np.min(lon))
+        if dLmax >= 0.01:
+            mode = "track"
+            # kwargs["alongtrack_axis"] = ['lat', 'lon']
+        else:
+            mode = "profile"
+            kwargs = {
+                "lat": np.mean(lat),
+                "lon": np.mean(lon),
+            }
+            module_logger.warning(
+                "Multiple lat/lon positions but too close to each other so it will be considered a single position for the WOA comparison. lat: {}, lon: {}".format(
+                    kwargs["lat"], kwargs["lon"]
+                )
+            )
+    else:
+        mode = "profile"
+
+    depth = extract_depth(data)
+
+    db = CARS()
+    # This must go away. This was a trick to handle Seabird CTDs, but
+    # now that seabird is a different package it should be handled there.
+    if isinstance(varname, str) and (varname[-1] == "2"):
+        vtype = varname[:-1]
+    else:
+        vtype = varname
+
+    cars_vars = [
+        "mean",
+        # "standard_deviation",
+        "std_dev",
+        # "number_of_observations",
+    ]
+
+    # Eventually the case of some invalid depth levels will be handled by
+    # OceansDB and the following steps will be simplified.
+    valid_depth = depth
+    if np.size(depth) > 0:
+        idx = ~ma.getmaskarray(depth) & (np.array(depth) >= 0) & np.isfinite(depth)
+        if not idx.any():
+            module_logger.error(
+                "Invalid depth(s) for CARS comparison: {}".format(depth)
+            )
+            raise IndexError
+        elif not idx.all():
+            valid_depth = depth[idx]
+    if mode == "track":
+        cars = db[vtype].track(var=cars_vars, doy=doy, depth=valid_depth, **kwargs)
+    else:
+        cars = db[vtype].extract(var=cars_vars, doy=doy, depth=valid_depth, **kwargs)
+
+    if not np.all(depth == valid_depth):
+        for v in cars.keys():
+            tmp = ma.masked_all(depth.shape, dtype=cars[v].dtype)
+            tmp[idx] = cars[v]
+            cars[v] = tmp
+
+    features = {
+        "cars_mean": cars["mean"],
+        "cars_std": cars["std_dev"],
+        # "cars_nsamples": cars["number_of_observations"],
+    }
+
+    features["cars_bias"] = data[varname] - features["cars_mean"]
+
+    for v in features:
+        idx = ma.getmaskarray(features[v])
+        if idx.any():
+            if v == "cars_nsamples":
+                missing_value = -1
+            else:
+                missing_value = np.nan
+            features[v][idx] = missing_value
+        features[v] = np.array(features[v])
+
+    # if use_standard_error = True, the comparison with the climatology
+    #   considers the standard error, i.e. the bias will be only the
+    #   ammount above the standard error range.
+    if use_standard_error is True:
+        standard_error = features["cars_std"]
+        idx = features["cars_nsamples"] > 0
+        standard_error[~idx] = np.nan
+        standard_error[idx] /= features["cars_nsamples"][idx] ** 0.5
+
+        idx = np.absolute(features["cars_bias"]) <= standard_error
+        features["cars_bias"][idx] = 0
+        idx = np.absolute(features["cars_bias"]) > standard_error
+        features["cars_bias"][idx] -= (
+            np.sign(features["cars_bias"][idx]) * standard_error[idx]
+        )
+
+    features["cars_normbias"] = features["cars_bias"] / features["cars_std"]
+
+    return features
+
+
 class CARS_NormBias(QCCheckVar):
     """Compares measuremnts with CARS climatology
 
@@ -29,107 +151,39 @@ class CARS_NormBias(QCCheckVar):
 
     flag_bad = 3
     use_standard_error = False
+    # 3 is the possible minimum to estimate the std, but I shold use higher.
+    min_samples = 3
 
     def __init__(self, data, varname, cfg=None, autoflag=True):
         try:
             self.use_standard_error = cfg["use_standard_error"]
         except (KeyError, TypeError):
             module_logger.debug("use_standard_error undefined. Using default value")
-        super().__init__(data, varname, cfg, autoflag)
+        try:
+            self.min_samples = cfg["min_samples"]
+        except (KeyError, TypeError):
+            module_logger.debug("min_samples undefined. Using default value")
 
-    def keys(self):
-        return self.features.keys() + ["flag_%s" % f for f in self.flags.keys()]
+        super().__init__(data, varname, cfg, autoflag)
 
     def set_features(self):
-
-        if ("LATITUDE" in self.data.attributes.keys()) and (
-            "LONGITUDE" in self.data.attributes.keys()
-        ):
-            kwargs = {
-                "lat": self.data.attributes["LATITUDE"],
-                "lon": self.data.attributes["LONGITUDE"],
-            }
-
-        if ("LATITUDE" in self.data.keys()) and ("LONGITUDE" in self.data.keys()):
-            dLmax = max(
-                data["LATITUDE"].max() - data["LATITUDE"].min(),
-                data["LONGITUDE"].max() - data["LONGITUDE"].min(),
-            )
-            # Only use each measurement coordinate if it is spread.
-            if dLmax >= 0.01:
-                kwargs = {
-                    "lat": self.data["LATITUDE"],
-                    "lon": self.data["LONGITUDE"],
-                    "alongtrack_axis": ["lat", "lon"],
-                }
-
-        if "DEPTH" in self.data.keys():
-            depth = self.data["DEPTH"]
-        elif "PRES" in self.data.keys():
-            depth = self.data["PRES"]
-
         try:
-            doy = int(self.data.attributes["date"].strftime("%j"))
-        except:
-            doy = int(self.data.attributes["datetime"].strftime("%j"))
-
-        db = CARS()
-        if self.varname[-1] == "2":
-            vtype = self.varname[:-1]
-        else:
-            vtype = self.varname
-
-        idx = ~ma.getmaskarray(depth) & np.array(depth >= 0)
-        cars = db[vtype].extract(
-            var=["mn", "std_dev"], doy=doy, depth=depth[idx], **kwargs
-        )
-
-        if idx.all() is not True:
-            for v in cars.keys():
-                tmp = ma.masked_all(depth.shape, dtype=cars[v].dtype)
-                tmp[idx] = cars[v]
-                cars[v] = tmp
-
-        self.features = {"cars_mean": cars["mn"], "cars_std": cars["std_dev"]}
-
-        self.features["cars_bias"] = (
-            self.data[self.varname] - self.features["cars_mean"]
-        )
-
-        # if use_standard_error = True, the comparison with the climatology
-        #   considers the standard error, i.e. the bias will be only the
-        #   ammount above the standard error range.
-        assert not self.use_standard_error, "Sorry, I'm not ready for that"
-        if self.use_standard_error is True:
-            standard_error = (
-                self.features["cars_std"] / self.features["cars_nsamples"] ** 0.5
-            )
-            idx = np.absolute(self.features["cars_bias"]) <= standard_error
-            self.features["cars_bias"][idx] = 0
-            idx = np.absolute(self.features["cars_bias"]) > standard_error
-            self.features["cars_bias"][idx] -= (
-                np.sign(self.features["cars_bias"][idx]) * standard_error[idx]
-            )
-
-        self.features["cars_normbias"] = (
-            self.features["cars_bias"] / self.features["cars_std"]
-        )
+            self.features = cars_normbias(self.data, self.varname, self.attrs)
+        except LookupError:
+            self.features = {}
 
     def test(self):
-
-        # 3 is the possible minimum to estimate the std, but I shold use higher.
-        try:
-            min_samples = self.cfg["min_samples"]
-        except KeyError:
-            min_samples = 3
-
         self.flags = {}
 
         threshold = self.cfg["threshold"]
         assert (np.size(threshold) == 1) and (threshold is not None)
 
         flag = np.zeros(self.data[self.varname].shape, dtype="i1")
 
+        if "cars_normbias" not in self.features:
+            self.flags["cars_normbias"] = flag
+            return
+
         normbias_abs = np.absolute(self.features["cars_normbias"])
         ind = np.nonzero(normbias_abs <= threshold)
         flag[ind] = self.flag_good

diff --git a/setup.cfg b/setup.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.23.3
+current_version = 0.23.4
 commit = True
 tag = True
 

diff --git a/setup.py b/setup.py
@@ -24,7 +24,7 @@
 
 setup(
     name='cotede',
-    version='0.23.3',
+    version='0.23.4',
     description='Quality Control of Oceanographic Data',
     long_description=readme + '\n\n' + history,
     author='Guilherme Castelão',

diff --git a/tests/qctests/test_qc_cars_normbias.py b/tests/qctests/test_qc_cars_normbias.py
@@ -7,11 +7,58 @@
 from datetime import datetime
 import numpy as np
 
-from cotede.qctests import cars_normbias
+from cotede.qctests import CARS_NormBias, cars_normbias
 from cotede.qc import ProfileQC
 from ..data import DummyData
 
 
+def test_cars_normbias_standard_dataset():
+    profile = DummyData()
+    features = cars_normbias(profile, "TEMP")
+
+    for v in [
+        "cars_mean",
+        "cars_std",
+        # "cars_nsamples",
+        "cars_bias",
+        "cars_normbias",
+    ]:
+        assert v in features
+
+
+def test_cars_normbias_invalid_position():
+    profile = DummyData()
+    assert "LONGITUDE" in profile.attrs
+    profile.attrs["LONGITUDE"] = 38
+    features = cars_normbias(profile, "TEMP")
+    for v in [
+        "cars_mean",
+        "cars_std",
+        # "cars_nsamples",
+        "cars_bias",
+        "cars_normbias",
+    ]:
+        assert np.isnan(features[v]).all()
+
+
+def test_standard_dataset():
+    """Test CARS_NormBias with a standard dataset
+    """
+    flags = {
+        "cars_normbias": np.array(
+            [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 9], dtype="i1"
+        )
+    }
+
+    profile = DummyData()
+    cfg = {"threshold": 3}
+    y = CARS_NormBias(profile, "TEMP", cfg, autoflag=True)
+
+    assert len(y.features) > 0
+    for f in y.flags:
+        assert np.allclose(y.flags[f], flags[f], equal_nan=True)
+
+
 def test():
     """
     """