Skip to content

Commit

Permalink
feat: CARS comparison using flexible coordinates (#58)
Browse files Browse the repository at this point in the history
* feat: CARS comparison using flexible coordinates

Upgrading CARS to be more closer to WOA thus also taking advantage of
extracting coordinates from multiple standards.

* Bump version: 0.23.3 → 0.23.4
  • Loading branch information
castelao committed Jan 6, 2021
1 parent de016af commit 2a8796d
Show file tree
Hide file tree
Showing 6 changed files with 189 additions and 88 deletions.
2 changes: 1 addition & 1 deletion cotede/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

__author__ = 'Guilherme Castelao'
__email__ = 'guilherme@castelao.net'
__version__ = '0.23.3'
__version__ = '0.23.4'

from cotede import qc
from cotede.qc import ProfileQC, ProfileQCed
2 changes: 1 addition & 1 deletion cotede/qctests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from .fuzzylogic import fuzzylogic

from .bin_spike import Bin_Spike, bin_spike
from .cars_normbias import CARS_NormBias
from .cars_normbias import CARS_NormBias, cars_normbias
from .constant_cluster_size import ConstantClusterSize, constant_cluster_size
from .cum_rate_of_change import CumRateOfChange, cum_rate_of_change
from .deepest_pressure import DeepestPressure
Expand Down
220 changes: 137 additions & 83 deletions cotede/qctests/cars_normbias.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,134 @@
from numpy import ma
from oceansdb import CARS

from .qctests import QCCheckVar
from . import QCCheckVar
from ..utils import extract_coordinates, extract_time, day_of_year, extract_depth


module_logger = logging.getLogger(__name__)


def cars_normbias(data, varname, attrs=None, use_standard_error=False):
"""
Notes
-----
- Include arguments to overwrite target variable (timename=None, latname=None, lonname=None)
"""
try:
doy = day_of_year(extract_time(data, attrs))
except LookupError as err:
module_logger.error("Missing time")
raise

try:
# Note that QCCheck fallback to self.data.attrs if attrs not given
lat, lon = extract_coordinates(data, attrs)
except LookupError:
module_logger.error("Missing geolocation (lat/lon)")
raise

kwargs = {"lat": lat, "lon": lon}

if (np.size(lat) > 1) | (np.size(lon) > 1):
dLmax = max(np.max(lat) - np.min(lat), np.max(lon) - np.min(lon))
if dLmax >= 0.01:
mode = "track"
# kwargs["alongtrack_axis"] = ['lat', 'lon']
else:
mode = "profile"
kwargs = {
"lat": np.mean(lat),
"lon": np.mean(lon),
}
module_logger.warning(
"Multiple lat/lon positions but too close to each other so it will be considered a single position for the WOA comparison. lat: {}, lon: {}".format(
kwargs["lat"], kwargs["lon"]
)
)
else:
mode = "profile"

depth = extract_depth(data)

db = CARS()
# This must go away. This was a trick to handle Seabird CTDs, but
# now that seabird is a different package it should be handled there.
if isinstance(varname, str) and (varname[-1] == "2"):
vtype = varname[:-1]
else:
vtype = varname

cars_vars = [
"mean",
# "standard_deviation",
"std_dev",
# "number_of_observations",
]

# Eventually the case of some invalid depth levels will be handled by
# OceansDB and the following steps will be simplified.
valid_depth = depth
if np.size(depth) > 0:
idx = ~ma.getmaskarray(depth) & (np.array(depth) >= 0) & np.isfinite(depth)
if not idx.any():
module_logger.error(
"Invalid depth(s) for CARS comparison: {}".format(depth)
)
raise IndexError
elif not idx.all():
valid_depth = depth[idx]
if mode == "track":
cars = db[vtype].track(var=cars_vars, doy=doy, depth=valid_depth, **kwargs)
else:
cars = db[vtype].extract(var=cars_vars, doy=doy, depth=valid_depth, **kwargs)

if not np.all(depth == valid_depth):
for v in cars.keys():
tmp = ma.masked_all(depth.shape, dtype=cars[v].dtype)
tmp[idx] = cars[v]
cars[v] = tmp

features = {
"cars_mean": cars["mean"],
"cars_std": cars["std_dev"],
# "cars_nsamples": cars["number_of_observations"],
}

features["cars_bias"] = data[varname] - features["cars_mean"]

for v in features:
idx = ma.getmaskarray(features[v])
if idx.any():
if v == "cars_nsamples":
missing_value = -1
else:
missing_value = np.nan
features[v][idx] = missing_value
features[v] = np.array(features[v])

# if use_standard_error = True, the comparison with the climatology
# considers the standard error, i.e. the bias will be only the
# ammount above the standard error range.
if use_standard_error is True:
standard_error = features["cars_std"]
idx = features["cars_nsamples"] > 0
standard_error[~idx] = np.nan
standard_error[idx] /= features["cars_nsamples"][idx] ** 0.5

idx = np.absolute(features["cars_bias"]) <= standard_error
features["cars_bias"][idx] = 0
idx = np.absolute(features["cars_bias"]) > standard_error
features["cars_bias"][idx] -= (
np.sign(features["cars_bias"][idx]) * standard_error[idx]
)

features["cars_normbias"] = features["cars_bias"] / features["cars_std"]

return features


class CARS_NormBias(QCCheckVar):
"""Compares measuremnts with CARS climatology
Expand All @@ -29,107 +151,39 @@ class CARS_NormBias(QCCheckVar):

flag_bad = 3
use_standard_error = False
# 3 is the possible minimum to estimate the std, but I shold use higher.
min_samples = 3

def __init__(self, data, varname, cfg=None, autoflag=True):
try:
self.use_standard_error = cfg["use_standard_error"]
except (KeyError, TypeError):
module_logger.debug("use_standard_error undefined. Using default value")
super().__init__(data, varname, cfg, autoflag)
try:
self.min_samples = cfg["min_samples"]
except (KeyError, TypeError):
module_logger.debug("min_samples undefined. Using default value")

def keys(self):
return self.features.keys() + ["flag_%s" % f for f in self.flags.keys()]
super().__init__(data, varname, cfg, autoflag)

def set_features(self):

if ("LATITUDE" in self.data.attributes.keys()) and (
"LONGITUDE" in self.data.attributes.keys()
):
kwargs = {
"lat": self.data.attributes["LATITUDE"],
"lon": self.data.attributes["LONGITUDE"],
}

if ("LATITUDE" in self.data.keys()) and ("LONGITUDE" in self.data.keys()):
dLmax = max(
data["LATITUDE"].max() - data["LATITUDE"].min(),
data["LONGITUDE"].max() - data["LONGITUDE"].min(),
)
# Only use each measurement coordinate if it is spread.
if dLmax >= 0.01:
kwargs = {
"lat": self.data["LATITUDE"],
"lon": self.data["LONGITUDE"],
"alongtrack_axis": ["lat", "lon"],
}

if "DEPTH" in self.data.keys():
depth = self.data["DEPTH"]
elif "PRES" in self.data.keys():
depth = self.data["PRES"]

try:
doy = int(self.data.attributes["date"].strftime("%j"))
except:
doy = int(self.data.attributes["datetime"].strftime("%j"))

db = CARS()
if self.varname[-1] == "2":
vtype = self.varname[:-1]
else:
vtype = self.varname

idx = ~ma.getmaskarray(depth) & np.array(depth >= 0)
cars = db[vtype].extract(
var=["mn", "std_dev"], doy=doy, depth=depth[idx], **kwargs
)

if idx.all() is not True:
for v in cars.keys():
tmp = ma.masked_all(depth.shape, dtype=cars[v].dtype)
tmp[idx] = cars[v]
cars[v] = tmp

self.features = {"cars_mean": cars["mn"], "cars_std": cars["std_dev"]}

self.features["cars_bias"] = (
self.data[self.varname] - self.features["cars_mean"]
)

# if use_standard_error = True, the comparison with the climatology
# considers the standard error, i.e. the bias will be only the
# ammount above the standard error range.
assert not self.use_standard_error, "Sorry, I'm not ready for that"
if self.use_standard_error is True:
standard_error = (
self.features["cars_std"] / self.features["cars_nsamples"] ** 0.5
)
idx = np.absolute(self.features["cars_bias"]) <= standard_error
self.features["cars_bias"][idx] = 0
idx = np.absolute(self.features["cars_bias"]) > standard_error
self.features["cars_bias"][idx] -= (
np.sign(self.features["cars_bias"][idx]) * standard_error[idx]
)

self.features["cars_normbias"] = (
self.features["cars_bias"] / self.features["cars_std"]
)
self.features = cars_normbias(self.data, self.varname, self.attrs)
except LookupError:
self.features = {}

def test(self):

# 3 is the possible minimum to estimate the std, but I shold use higher.
try:
min_samples = self.cfg["min_samples"]
except KeyError:
min_samples = 3

self.flags = {}

threshold = self.cfg["threshold"]
assert (np.size(threshold) == 1) and (threshold is not None)

flag = np.zeros(self.data[self.varname].shape, dtype="i1")

if "cars_normbias" not in self.features:
self.flags["cars_normbias"] = flag
return

normbias_abs = np.absolute(self.features["cars_normbias"])
ind = np.nonzero(normbias_abs <= threshold)
flag[ind] = self.flag_good
Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 0.23.3
current_version = 0.23.4
commit = True
tag = True

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@

setup(
name='cotede',
version='0.23.3',
version='0.23.4',
description='Quality Control of Oceanographic Data',
long_description=readme + '\n\n' + history,
author='Guilherme Castelão',
Expand Down
49 changes: 48 additions & 1 deletion tests/qctests/test_qc_cars_normbias.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,58 @@
from datetime import datetime
import numpy as np

from cotede.qctests import cars_normbias
from cotede.qctests import CARS_NormBias, cars_normbias
from cotede.qc import ProfileQC
from ..data import DummyData


def test_cars_normbias_standard_dataset():
profile = DummyData()
features = cars_normbias(profile, "TEMP")

for v in [
"cars_mean",
"cars_std",
# "cars_nsamples",
"cars_bias",
"cars_normbias",
]:
assert v in features


def test_cars_normbias_invalid_position():
profile = DummyData()
assert "LONGITUDE" in profile.attrs
profile.attrs["LONGITUDE"] = 38
features = cars_normbias(profile, "TEMP")
for v in [
"cars_mean",
"cars_std",
# "cars_nsamples",
"cars_bias",
"cars_normbias",
]:
assert np.isnan(features[v]).all()


def test_standard_dataset():
"""Test CARS_NormBias with a standard dataset
"""
flags = {
"cars_normbias": np.array(
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 9], dtype="i1"
)
}

profile = DummyData()
cfg = {"threshold": 3}
y = CARS_NormBias(profile, "TEMP", cfg, autoflag=True)

assert len(y.features) > 0
for f in y.flags:
assert np.allclose(y.flags[f], flags[f], equal_nan=True)


def test():
"""
"""
Expand Down

0 comments on commit 2a8796d

Please sign in to comment.