From a5acf1ef3a44d26481160f9f65ec9d5ee7469beb Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Tue, 9 Jul 2024 13:55:07 +0200 Subject: [PATCH 01/42] Implement `DBEntry.get_sample` in IMASPy --- imaspy/backends/db_entry_impl.py | 32 +- imaspy/backends/imas_core/al_context.py | 36 ++ imaspy/backends/imas_core/db_entry_al.py | 27 +- imaspy/backends/imas_core/imas_interface.py | 7 + imaspy/db_entry.py | 130 ++++++- imaspy/test/test_get_sample.py | 393 ++++++++++++++++++++ 6 files changed, 603 insertions(+), 22 deletions(-) create mode 100644 imaspy/test/test_get_sample.py diff --git a/imaspy/backends/db_entry_impl.py b/imaspy/backends/db_entry_impl.py index 7f86e622..dbbb1329 100644 --- a/imaspy/backends/db_entry_impl.py +++ b/imaspy/backends/db_entry_impl.py @@ -2,13 +2,34 @@ # You should have received the IMASPy LICENSE file with this project. from abc import ABC, abstractmethod -from typing import Any, List, Optional +from dataclasses import dataclass +from typing import Any, List, Optional, Union + +import numpy from imaspy.ids_convert import NBCPathMap from imaspy.ids_factory import IDSFactory from imaspy.ids_toplevel import IDSToplevel +@dataclass +class GetSliceParameters: + """Helper class to store parameters to get_slice.""" + + time_requested: float + interpolation_method: int + + +@dataclass +class GetSampleParameters: + """Helper class to store parameters to get_sample.""" + + tmin: float + tmax: float + dtime: Optional[numpy.ndarray] + interpolation_method: Optional[int] + + class DBEntryImpl(ABC): """Interface for DBEntry implementations.""" @@ -47,20 +68,17 @@ def get( self, ids_name: str, occurrence: int, - time_requested: Optional[float], - interpolation_method: int, + parameters: Union[None, GetSliceParameters, GetSampleParameters], destination: IDSToplevel, lazy: bool, nbc_map: Optional[NBCPathMap], ) -> None: - """Implement DBEntry.get()/get_slice(). Load data from the data source. + """Implement DBEntry.get/get_slice/get_sample. Load data from the data source. Args: ids_name: Name of the IDS to load. occurrence: Which occurence of the IDS to load. - time_requested: None for get(), requested time slice for get_slice(). - interpolation_method: Requested interpolation method (ignore when - time_requested is None). + parameters: Additional parameters for a get_slice/get_sample call. destination: IDS object to store data in. lazy: Use lazy loading. nbc_map: NBCPathMap to use for implicit conversion. When None, no implicit diff --git a/imaspy/backends/imas_core/al_context.py b/imaspy/backends/imas_core/al_context.py index 07f37dec..d14f6bfd 100644 --- a/imaspy/backends/imas_core/al_context.py +++ b/imaspy/backends/imas_core/al_context.py @@ -8,6 +8,8 @@ from contextlib import contextmanager from typing import TYPE_CHECKING, Any, Callable, Iterator, List, Optional, Tuple +import numpy + from imaspy.backends.imas_core.imas_interface import ll_interface from imaspy.exception import LowlevelError from imaspy.ids_defs import ( @@ -105,6 +107,21 @@ def slice_action( raise LowlevelError("slice_action", status) return ALContext(ctx) + def timerange_action( + self, + path: str, + rwmode: int, + tmin: float, + tmax: float, + dtime: Optional[numpy.ndarray], + interpolation_method: int, + ) -> "ALContext": + """Begin a new timerange action for use in a ``with`` context.""" + ctx = ll_interface.begin_timerange_action( + self.ctx, path, rwmode, tmin, tmax, dtime, interpolation_method + ) + return ALContext(ctx) + def arraystruct_action( self, path: str, timebase: str, size: int ) -> "ALArrayStructContext": @@ -317,6 +334,25 @@ def slice_action( (path, rwmode, time_requested, interpolation_method), ) + @contextmanager + def timerange_action( + self, + path: str, + rwmode: int, + tmin: float, + tmax: float, + dtime: Optional[numpy.ndarray], + interpolation_method: int, + ) -> Iterator["LazyALContext"]: + """Lazily start a lowlevel timerange action, see + :meth:`ALContext.timerange_action`. + """ + yield LazyALContext( + self, + ALContext.timerange_action, + (path, rwmode, tmin, tmax, dtime, interpolation_method), + ) + def arraystruct_action( self, path: str, timebase: str, size: int ) -> "LazyALArrayStructContext": diff --git a/imaspy/backends/imas_core/db_entry_al.py b/imaspy/backends/imas_core/db_entry_al.py index e126bf9b..89cf3625 100644 --- a/imaspy/backends/imas_core/db_entry_al.py +++ b/imaspy/backends/imas_core/db_entry_al.py @@ -5,9 +5,10 @@ import logging import os from collections import deque -from typing import Any, Deque, List, Optional +from typing import Any, Deque, List, Optional, Union from urllib.parse import urlparse +from imaspy.backends.db_entry_impl import GetSampleParameters, GetSliceParameters from imaspy.db_entry import DBEntryImpl from imaspy.exception import DataEntryException, LowlevelError from imaspy.ids_convert import NBCPathMap, dd_version_map_from_factories @@ -216,8 +217,7 @@ def get( self, ids_name: str, occurrence: int, - time_requested: Optional[float], - interpolation_method: int, + parameters: Union[None, GetSliceParameters, GetSampleParameters], destination: IDSToplevel, lazy: bool, nbc_map: Optional[NBCPathMap], @@ -245,13 +245,28 @@ def get( else: context = self._db_ctx # Now fill the IDSToplevel - if time_requested is None or destination.metadata.type is IDSType.CONSTANT: + if parameters is None or destination.metadata.type is IDSType.CONSTANT: # called from get(), or when the IDS is constant (see IMAS-3330) manager = context.global_action(ll_path, READ_OP) - else: # get_slice + elif isinstance(parameters, GetSliceParameters): manager = context.slice_action( - ll_path, READ_OP, time_requested, interpolation_method + ll_path, + READ_OP, + parameters.time_requested, + parameters.interpolation_method, ) + elif isinstance(parameters, GetSampleParameters): + manager = context.timerange_action( + ll_path, + READ_OP, + parameters.tmin, + parameters.tmax, + parameters.dtime, + parameters.interpolation_method, + ) + else: + raise TypeError(f"Incorrect type for parameters: {type(parameters)}.") + with manager as read_ctx: if lazy: destination._set_lazy_context(read_ctx) diff --git a/imaspy/backends/imas_core/imas_interface.py b/imaspy/backends/imas_core/imas_interface.py index 07f4783e..cca7d42f 100644 --- a/imaspy/backends/imas_core/imas_interface.py +++ b/imaspy/backends/imas_core/imas_interface.py @@ -215,6 +215,13 @@ def get_occurrences(self, ctx, ids_name): def get_al_version(self): return self._al_version_str + # New methods added in AL 5.3 + + def begin_timerange_action( + self, ctx, path, rwmode, tmin, tmax, dtime, interpolation_method + ): + raise self._minimal_version("5.3") + # Dummy documentation for interface: for funcname in dir(LowlevelInterface): diff --git a/imaspy/db_entry.py b/imaspy/db_entry.py index 9ca826b7..ba5bcac6 100644 --- a/imaspy/db_entry.py +++ b/imaspy/db_entry.py @@ -5,10 +5,16 @@ import logging import os -from typing import Any, List, Optional, Tuple, Type, overload +from typing import Any, List, Optional, Tuple, Type, Union, overload + +import numpy import imaspy -from imaspy.backends.db_entry_impl import DBEntryImpl +from imaspy.backends.db_entry_impl import ( + DBEntryImpl, + GetSampleParameters, + GetSliceParameters, +) from imaspy.dd_zip import dd_xml_versions from imaspy.exception import IDSNameError, UnknownDDVersion, ValidationError from imaspy.ids_base import IDSBase @@ -347,7 +353,6 @@ def get( ids_name, occurrence, None, - 0, destination, lazy, autoconvert, @@ -416,8 +421,117 @@ def get_slice( return self._get( ids_name, occurrence, - time_requested, - interpolation_method, + GetSliceParameters(time_requested, interpolation_method), + destination, + lazy, + autoconvert, + ignore_unknown_dd_version, + ) + + def get_sample( + self, + ids_name: str, + tmin: float, + tmax: float, + dtime: Optional[Union[float, numpy.ndarray]] = None, + interpolation_method: Optional[int] = None, + occurrence: int = 0, + *, + lazy: bool = False, + autoconvert: bool = True, + ignore_unknown_dd_version: bool = False, + destination: Optional[IDSToplevel] = None, + ) -> IDSToplevel: + """Read a range of time slices from an IDS in this Database Entry. + + This method has three different modes, depending on the provided arguments: + + 1. No interpolation. This method is selected when :param:`dtime` and + :param:`interpolation_method` are not provided. + + This mode returns an IDS object with all constant/static data filled. The + dynamic data is retrieved for the provided time range [tmin, tmax]. + + 2. Interpolate dynamic data on a uniform time base. This method is selected + when :param:`dtime` and :param:`interpolation_method` are provided. + :param:`dtime` must be a number or a numpy array of size 1. + + This mode will generate an IDS with a homogeneous time vector ``[tmin, tmin + + dtime, tmin + 2*dtime, ...`` up to ``tmax``. The returned IDS always has + ``ids_properties.homogeneous_time = 1``. + + 3. Interpolate dynamic data on an explicit time base. This method is selected + when :param:`dtime` and :param:`interpolation_method` are provided. + :param:`dtime` must be a numpy array of size larger than 1. + + This mode will generate an IDS with a homogeneous time vector equal to + :param:`dtime`. :param:`tmin` and :param:`tmax` are ignored in this mode. + The returned IDS always has ``ids_properties.homogeneous_time = 1``. + + Args: + ids_name: Name of the IDS to read from the backend + tmin: Lower bound of the requested time range + tmax: Upper bound of the requested time range, must be larger than or + equal to :param:`tmin` + dtime: Interval to use when interpolating, must be positive, or numpy array + containing an explicit time base to interpolate. + interpolation_method: Interpolation method to use. Available options: + + - :const:`~imaspy.ids_defs.CLOSEST_INTERP` + - :const:`~imaspy.ids_defs.PREVIOUS_INTERP` + - :const:`~imaspy.ids_defs.LINEAR_INTERP` + + occurrence: Which occurrence of the IDS to read. + + Keyword Args: + lazy: When set to ``True``, values in this IDS will be retrieved only when + needed (instead of getting the full IDS immediately). See :ref:`Lazy + loading` for more details. + autoconvert: Automatically convert IDSs. + + If enabled (default), a call to ``get_sample()`` will return + an IDS from the Data Dictionary version attached to this Data Entry. + Data is automatically converted between the on-disk version and the + in-memory version. + + When set to ``False``, the IDS will be returned in the DD version it was + stored in. + ignore_unknown_dd_version: When an IDS is stored with an unknown DD version, + do not attempt automatic conversion and fetch the data in the Data + Dictionary version attached to this Data Entry. + destination: Populate this IDSToplevel instead of creating an empty one. + + Returns: + The loaded IDS. + + Example: + .. code-block:: python + + import imaspy + import numpy + from imaspy import ids_defs + + imas_entry = imaspy.DBEntry( + "imas:mdsplus?user=public;pulse=131024;run=41;database=ITER", "r") + + # All time slices between t=200 and t=370 + core_profiles = imas_entry.get_sample("core_profiles", 200, 370) + + # Closest points to [0, 100, 200, ..., 1000] + core_profiles_interp = imas_entry.get_sample( + "core_profiles", 0, 1000, 100, ids_defs.CLOSEST_INTERP) + + # Linear interpolation for [10, 11, 12, 14, 16, 20, 30, 40, 50] + times = numpy.array([10, 11, 12, 14, 16, 20, 30, 40, 50]) + core_profiles_interp = imas_entry.get_sample( + "core_profiles", 0, 0, times, ids_defs.LINEAR_INTERP) + """ + if dtime is not None: + dtime = numpy.atleast_1d(dtime) # Convert floats and 0D arrays to 1D array + return self._get( + ids_name, + occurrence, + GetSampleParameters(tmin, tmax, dtime, interpolation_method), destination, lazy, autoconvert, @@ -428,8 +542,7 @@ def _get( self, ids_name: str, occurrence: int, - time_requested: Optional[float], - interpolation_method: int, + parameters: Union[None, GetSliceParameters, GetSampleParameters], destination: Optional[IDSToplevel], lazy: bool, autoconvert: bool, @@ -492,8 +605,7 @@ def _get( return self._dbe_impl.get( ids_name, occurrence, - time_requested, - interpolation_method, + parameters, destination, lazy, nbc_map, diff --git a/imaspy/test/test_get_sample.py b/imaspy/test/test_get_sample.py new file mode 100644 index 00000000..7c3b210f --- /dev/null +++ b/imaspy/test/test_get_sample.py @@ -0,0 +1,393 @@ +import numpy as np +import pytest + +import imaspy +from imaspy.backends.imas_core.imas_interface import lowlevel +from imaspy.exception import DataEntryException +from imaspy.ids_defs import ( + CLOSEST_INTERP, + EMPTY_FLOAT, + HDF5_BACKEND, + IDS_TIME_MODE_HETEROGENEOUS, + IDS_TIME_MODE_HOMOGENEOUS, + LINEAR_INTERP, + MDSPLUS_BACKEND, + PREVIOUS_INTERP, +) + + +@pytest.fixture() +def test_db_uri(backend, worker_id, tmp_path_factory): + # Check if begin_timerange_action is available in imas_core + if not hasattr(lowlevel, "al_begin_timerange_action"): + pytest.skip("imas_core version doesn't support begin_timerange_action.") + + if backend not in [HDF5_BACKEND, MDSPLUS_BACKEND]: + pytest.skip("Backend doesn't support time range operations.") + + tmp_path = tmp_path_factory.mktemp(f"testdb.{worker_id}") + backend_str = {HDF5_BACKEND: "hdf5", MDSPLUS_BACKEND: "mdsplus"}[backend] + uri = f"imas:{backend_str}?path={tmp_path}" + entry = imaspy.DBEntry(uri, "x") + + # Homogeneous core profiles: + cp = entry.factory.core_profiles() + cp.ids_properties.homogeneous_time = IDS_TIME_MODE_HOMOGENEOUS + N_time = 32 + cp.time = np.linspace(0, 1, N_time) + cp.profiles_1d.resize(N_time) + for i in range(N_time): + # FLT_1D: + cp.profiles_1d[i].grid.rho_tor_norm = np.array([0.0, 1.0]) + cp.profiles_1d[i].t_i_average = np.array([2.0, 1.0]) * (i + 1) + cp.profiles_1d[i].ion.resize(1) + # STR_0D: + cp.profiles_1d[i].ion[0].label = "D" + # FLT_0D + cp.profiles_1d[i].ion[0].z_ion = 1.0 + cp.profiles_1d[i].ion[0].temperature = cp.profiles_1d[i].t_i_average + # INT_0D + cp.profiles_1d[i].ion[0].temperature_validity = 0 + cp.global_quantities.ip = (2 - cp.time) ** 0.5 + entry.put(cp) + + # Inhomogeneous equilibrium + eq = entry.factory.equilibrium() + eq.ids_properties.homogeneous_time = IDS_TIME_MODE_HETEROGENEOUS + eq.time = np.linspace(0, 2, 512) + # GGD Grid with 1 time slice + eq.grids_ggd.resize(1) + eq.grids_ggd[0].time = 0.0 + eq.grids_ggd[0].grid.resize(1) + eq.grids_ggd[0].grid[0].path = "wall:0/description_ggd(1)/grid_ggd" + # multiple time slices with data + N_time = 6 + eq.time_slice.resize(N_time) + for i in range(N_time): + # FLT_0D + eq.time_slice[i].time = i / 5.0 + eq.time_slice[i].profiles_2d.resize(1) + # FLT_1D + eq.time_slice[i].profiles_2d[0].grid.dim1 = np.array([0.0, 1.0]) + eq.time_slice[i].profiles_2d[0].grid.dim2 = np.array([3.0, 4.0]) + # STR_0D + eq.time_slice[i].profiles_2d[0].grid_type.name = f"test {i}" + eq.time_slice[i].profiles_2d[0].grid_type.description = "test description" + # INT_0D + eq.time_slice[i].profiles_2d[0].grid_type.index = -1 + # FLT_2D + eq.time_slice[i].profiles_2d[0].r = np.array([[0.0, 0.0], [1.0, 1.0]]) + eq.time_slice[i].profiles_2d[0].z = np.array([[3.0, 4.0], [3.0, 4.0]]) + eq.time_slice[i].profiles_2d[0].psi = ( + eq.time_slice[i].profiles_2d[0].r - eq.time_slice[i].profiles_2d[0].z + ) * (1 + eq.time_slice[i].time) ** 2 + entry.put(eq) + + # Equilibrium only has dynamic AOS and no other non-homogenous time nodes + # Use magnetics to test that case: + mag = entry.factory.magnetics() + mag.ids_properties.homogeneous_time = IDS_TIME_MODE_HETEROGENEOUS + mag.time = np.array([0.0]) + mag.flux_loop.resize(3) + for i in range(3): + mag.flux_loop[i].flux.time = np.linspace(0.0123, 1, 5 + i) + mag.flux_loop[i].flux.data = 2 + 2 * mag.flux_loop[i].flux.time + mag.flux_loop[i].voltage.time = np.linspace(0.0123, 1, 8 + i) + mag.flux_loop[i].voltage.data = 2 - 5 * mag.flux_loop[i].voltage.time + entry.put(mag) + + entry.close() + return uri + + +def test_invalid_arguments(test_db_uri): + entry = imaspy.DBEntry(test_db_uri, "r") + with pytest.raises(ValueError): + entry.get_sample("core_profiles", 0.3, 0.2) # tmin > tmax + with pytest.raises(DataEntryException): + entry.get_sample("core_profiles", 0.1, 0.2, occurrence="invalid") + with pytest.raises(ValueError): + entry.get_sample("core_profiles", 0.1, 0.2, 0.05) # no interpolation method + + +def test_get_sample_homogeneous(test_db_uri): + entry = imaspy.DBEntry(test_db_uri, "r") + cp = entry.get_sample("core_profiles", 0.3, 14 / 31) + assert np.array_equal(cp.time, np.linspace(0, 1, 32)[10:15]) + + for i, p1d in enumerate(cp.profiles_1d): + assert np.array_equal(p1d.grid.rho_tor_norm, [0.0, 1.0]) + assert np.array_equal(p1d.t_i_average, np.array([2.0, 1.0]) * (i + 11)) + assert len(p1d.ion) == 1 + assert p1d.ion[0].label == "D" + assert p1d.ion[0].z_ion == 1 + assert np.array_equal(p1d.ion[0].temperature, p1d.t_i_average) + assert p1d.ion[0].temperature_validity == 0 + + assert np.array_equal(cp.global_quantities.ip, (2 - cp.time) ** 0.5) + + +def test_get_sample_heterogeneous(test_db_uri): + entry = imaspy.DBEntry(test_db_uri, "r") + eq = entry.get_sample("equilibrium", -1.0, 0.2) + # Main time array + assert np.array_equal(eq.time, np.linspace(0, 2, 512)[:52]) + # grids_ggd AoS + assert len(eq.grids_ggd) == 1 + assert eq.grids_ggd[0].time == 0.0 + assert eq.grids_ggd[0].grid[0].path == "wall:0/description_ggd(1)/grid_ggd" + # time_slice AoS + assert len(eq.time_slice) == 2 + assert eq.time_slice[0].time == 0.0 + assert eq.time_slice[1].time == 0.2 + + for i in range(2): + p2d = eq.time_slice[i].profiles_2d[0] + assert np.array_equal(p2d.grid.dim1, [0.0, 1.0]) + assert np.array_equal(p2d.grid.dim2, [3.0, 4.0]) + assert p2d.grid_type.name == f"test {i}" + assert p2d.grid_type.index == -1 + assert np.array_equal(p2d.r, [[0.0, 0.0], [1.0, 1.0]]) + assert np.array_equal(p2d.z, [[3.0, 4.0], [3.0, 4.0]]) + expected_psi = (p2d.r - p2d.z) * (1 + eq.time_slice[i].time) ** 2 + assert np.array_equal(p2d.psi, expected_psi) + + mag = entry.get_sample("magnetics", 0.25, 0.75) + assert mag.ids_properties.homogeneous_time == IDS_TIME_MODE_HETEROGENEOUS + assert len(mag.time) == 0 + assert len(mag.flux_loop) == 3 + for i in range(3): + fl = mag.flux_loop[i] + + flux_time = np.linspace(0.0123, 1, 5 + i) + flux_time = flux_time[0.25 <= flux_time] + flux_time = flux_time[flux_time <= 0.75] + assert np.array_equal(fl.flux.time, flux_time) + assert np.array_equal(fl.flux.data, 2 + 2 * flux_time) + + voltage_time = np.linspace(0.0123, 1, 8 + i) + voltage_time = voltage_time[0.25 <= voltage_time] + voltage_time = voltage_time[voltage_time <= 0.75] + assert np.array_equal(fl.voltage.time, voltage_time) + assert np.array_equal(fl.voltage.data, 2 - 5 * voltage_time) + + +def test_get_sample_homogeneous_linear_interp(test_db_uri): + entry = imaspy.DBEntry(test_db_uri, "r") + # Note requesting 0.401 and not 0.4, since + # (0.3 + 0.02 + 0.02 + 0.02 + 0.02 + 0.02) = 0.4 + 5e-17 + cp = entry.get_sample("core_profiles", 0.3, 0.401, 0.02, LINEAR_INTERP) + assert np.allclose(cp.time, np.linspace(0.3, 0.4, 6), rtol=1e-14, atol=0) + + assert len(cp.profiles_1d) == 6 + # Check some interpolated values + for i in range(6): + # Check rho_tor_norm + rho_tor_norm = cp.profiles_1d[i].grid.rho_tor_norm + assert np.array_equal(rho_tor_norm, np.array([0.0, 1.0])) + # Check t_i_average + expected = np.array([2.0, 1.0]) * (1 + 31 * cp.time[i]) + t_i_average = cp.profiles_1d[i].t_i_average + assert np.allclose(t_i_average, expected, rtol=1e-14, atol=0) + + +def test_get_sample_homogeneous_explicit_timebase(test_db_uri): + entry = imaspy.DBEntry(test_db_uri, "r") + times = [0.1, 0.2345, 0.5, np.sqrt(2) / 2] + cp = entry.get_sample("core_profiles", 0, 0, times, LINEAR_INTERP) + assert np.allclose(cp.time, times, rtol=1e-14, atol=0) + + assert len(cp.profiles_1d) == 4 + # Check some interpolated values + for i in range(4): + # Check rho_tor_norm + rho_tor_norm = cp.profiles_1d[i].grid.rho_tor_norm + assert np.array_equal(rho_tor_norm, np.array([0.0, 1.0])) + # Check t_i_average + expected = np.array([2.0, 1.0]) * (1 + 31 * cp.time[i]) + t_i_average = cp.profiles_1d[i].t_i_average + assert np.allclose(t_i_average, expected, rtol=1e-14, atol=0) + + +def test_get_sample_homogeneous_previous_interp(test_db_uri): + entry = imaspy.DBEntry(test_db_uri, "r") + # Note requesting 0.401 and not 0.4, since + # (0.3 + 0.02 + 0.02 + 0.02 + 0.02 + 0.02) = 0.4 + 5e-17 + cp = entry.get_sample("core_profiles", 0.3, 0.401, 0.02, PREVIOUS_INTERP) + assert np.allclose(cp.time, np.linspace(0.3, 0.4, 6), rtol=1e-14, atol=0) + + assert len(cp.profiles_1d) == 6 + # Check some interpolated values + for i in range(6): + # Check rho_tor_norm + rho_tor_norm = cp.profiles_1d[i].grid.rho_tor_norm + assert np.array_equal(rho_tor_norm, np.array([0.0, 1.0])) + # Check t_i_average + expected = np.array([2.0, 1.0]) * [10, 10, 11, 12, 12, 13][i] + t_i_average = cp.profiles_1d[i].t_i_average + assert np.allclose(t_i_average, expected, rtol=1e-14, atol=0) + + +def test_get_sample_homogeneous_closest_interp(test_db_uri): + entry = imaspy.DBEntry(test_db_uri, "r") + # Note requesting 0.401 and not 0.4, since + # (0.3 + 0.02 + 0.02 + 0.02 + 0.02 + 0.02) = 0.4 + 5e-17 + cp = entry.get_sample("core_profiles", 0.3, 0.401, 0.02, CLOSEST_INTERP) + assert np.allclose(cp.time, np.linspace(0.3, 0.4, 6), rtol=1e-14, atol=0) + + assert len(cp.profiles_1d) == 6 + # Check some interpolated values + for i in range(6): + # Check rho_tor_norm + rho_tor_norm = cp.profiles_1d[i].grid.rho_tor_norm + assert np.array_equal(rho_tor_norm, np.array([0.0, 1.0])) + # Check t_i_average + expected = np.array([2.0, 1.0]) * [10, 11, 12, 12, 13, 13][i] + t_i_average = cp.profiles_1d[i].t_i_average + assert np.allclose(t_i_average, expected, rtol=1e-14, atol=0) + + +def test_get_sample_heterogeneous_linear_interp(test_db_uri): + entry = imaspy.DBEntry(test_db_uri, "r") + eq = entry.get_sample("equilibrium", 0.2, 0.501, 0.05, LINEAR_INTERP) + N_samples = 7 + # IDS becomes homogeneous after resampling + assert np.allclose(eq.time, np.linspace(0.2, 0.5, N_samples)) + assert eq.ids_properties.homogeneous_time == IDS_TIME_MODE_HOMOGENEOUS + + # Check interpolated grids_ggd + assert len(eq.grids_ggd) == N_samples + for i in range(N_samples): + assert eq.grids_ggd[i].time == EMPTY_FLOAT + assert len(eq.grids_ggd[i].grid) == 1 + assert eq.grids_ggd[i].grid[0].path == "wall:0/description_ggd(1)/grid_ggd" + + # Check interpolated time_slice + assert len(eq.time_slice) == N_samples + for i in range(N_samples): + assert eq.time_slice[i].time == EMPTY_FLOAT + assert len(eq.time_slice[i].profiles_2d) == 1 + p2d = eq.time_slice[i].profiles_2d[0] + assert np.array_equal(p2d.grid.dim1, [0.0, 1.0]) + assert np.array_equal(p2d.grid.dim2, [3.0, 4.0]) + + # Determine the data as we have stored it in test_db_uri() + time = eq.time[i] + original_times = [0, 0.2, 0.4, 0.6, 0.8, 1.0] + index = np.searchsorted(original_times, time) + prevtime = original_times[index - 1] + nexttime = original_times[index] + prevpsi = (p2d.r - p2d.z) * (1 + prevtime) ** 2 + nextpsi = (p2d.r - p2d.z) * (1 + nexttime) ** 2 + # Linear interpolation + expected_psi = (nextpsi * (time - prevtime) + prevpsi * (nexttime - time)) / ( + nexttime - prevtime + ) + assert np.allclose(p2d.psi, expected_psi, rtol=1e-14, atol=0) + + mag = entry.get_sample("magnetics", 0.2, 0.501, 0.05, LINEAR_INTERP) + assert mag.ids_properties.homogeneous_time == IDS_TIME_MODE_HOMOGENEOUS + assert np.allclose(mag.time, np.linspace(0.2, 0.5, N_samples)) + + assert len(mag.flux_loop) == 3 + for i in range(3): + fl = mag.flux_loop[i] + assert np.allclose(fl.flux.data, 2 + 2 * mag.time, rtol=1e-14, atol=0) + assert np.allclose(fl.voltage.data, 2 - 5 * mag.time, rtol=1e-14, atol=2e-16) + + +def test_get_sample_heterogeneous_previous_interp(test_db_uri): + entry = imaspy.DBEntry(test_db_uri, "r") + eq = entry.get_sample("equilibrium", 0.2, 0.501, 0.05, PREVIOUS_INTERP) + N_samples = 7 + # IDS becomes homogeneous after resampling + assert np.allclose(eq.time, np.linspace(0.2, 0.5, N_samples)) + assert eq.ids_properties.homogeneous_time == IDS_TIME_MODE_HOMOGENEOUS + + # Check interpolated grids_ggd + assert len(eq.grids_ggd) == N_samples + for i in range(N_samples): + assert eq.grids_ggd[i].time == EMPTY_FLOAT + assert len(eq.grids_ggd[i].grid) == 1 + assert eq.grids_ggd[i].grid[0].path == "wall:0/description_ggd(1)/grid_ggd" + + # Check interpolated time_slice + assert len(eq.time_slice) == N_samples + for i in range(N_samples): + assert eq.time_slice[i].time == EMPTY_FLOAT + assert len(eq.time_slice[i].profiles_2d) == 1 + p2d = eq.time_slice[i].profiles_2d[0] + assert np.array_equal(p2d.grid.dim1, [0.0, 1.0]) + assert np.array_equal(p2d.grid.dim2, [3.0, 4.0]) + + origtime = [0.2, 0.2, 0.2, 0.2, 0.4, 0.4, 0.4][i] + expected_psi = (p2d.r - p2d.z) * (1 + origtime) ** 2 + assert np.allclose(p2d.psi, expected_psi, rtol=1e-14, atol=0) + + mag = entry.get_sample("magnetics", 0.2, 0.501, 0.05, PREVIOUS_INTERP) + assert mag.ids_properties.homogeneous_time == IDS_TIME_MODE_HOMOGENEOUS + assert np.allclose(mag.time, np.linspace(0.2, 0.5, N_samples)) + + assert len(mag.flux_loop) == 3 + for i in range(3): + fl = mag.flux_loop[i] + + flux_time = np.linspace(0.0123, 1, 5 + i) + flux_time = flux_time[np.searchsorted(flux_time, mag.time, side="right") - 1] + assert np.array_equal(fl.flux.data, 2 + 2 * flux_time) + + voltage_time = np.linspace(0.0123, 1, 8 + i) + voltage_time = voltage_time[ + np.searchsorted(voltage_time, mag.time, side="right") - 1 + ] + assert np.array_equal(fl.voltage.data, 2 - 5 * voltage_time) + + +def test_get_sample_heterogeneous_closest_interp(test_db_uri): + entry = imaspy.DBEntry(test_db_uri, "r") + eq = entry.get_sample("equilibrium", 0.2, 0.501, 0.05, CLOSEST_INTERP) + N_samples = 7 + # IDS becomes homogeneous after resampling + assert np.allclose(eq.time, np.linspace(0.2, 0.5, N_samples)) + assert eq.ids_properties.homogeneous_time == IDS_TIME_MODE_HOMOGENEOUS + + # Check interpolated grids_ggd + assert len(eq.grids_ggd) == N_samples + for i in range(N_samples): + assert eq.grids_ggd[i].time == EMPTY_FLOAT + assert len(eq.grids_ggd[i].grid) == 1 + assert eq.grids_ggd[i].grid[0].path == "wall:0/description_ggd(1)/grid_ggd" + + # Check interpolated time_slice + assert len(eq.time_slice) == N_samples + for i in range(N_samples): + assert eq.time_slice[i].time == EMPTY_FLOAT + assert len(eq.time_slice[i].profiles_2d) == 1 + p2d = eq.time_slice[i].profiles_2d[0] + assert np.array_equal(p2d.grid.dim1, [0.0, 1.0]) + assert np.array_equal(p2d.grid.dim2, [3.0, 4.0]) + + # Note: CLOSEST appears to round up: 0.4 is closer to 0.3 than 0.2 + origtime = [0.2, 0.2, 0.4, 0.4, 0.4, 0.4, 0.6][i] + expected_psi = (p2d.r - p2d.z) * (1 + origtime) ** 2 + assert np.allclose(p2d.psi, expected_psi, rtol=1e-14, atol=0) + + mag = entry.get_sample("magnetics", 0.2, 0.501, 0.05, CLOSEST_INTERP) + assert mag.ids_properties.homogeneous_time == IDS_TIME_MODE_HOMOGENEOUS + assert np.allclose(mag.time, np.linspace(0.2, 0.5, N_samples)) + + assert len(mag.flux_loop) == 3 + for i in range(3): + fl = mag.flux_loop[i] + + flux_time = np.linspace(0.0123, 1, 5 + i) + flux_time = flux_time[ + np.argmin(np.abs(flux_time[None, :] - mag.time[:, None]), axis=1) + ] + assert np.array_equal(fl.flux.data, 2 + 2 * flux_time) + + voltage_time = np.linspace(0.0123, 1, 8 + i) + voltage_time = voltage_time[ + np.argmin(np.abs(voltage_time[None, :] - mag.time[:, None]), axis=1) + ] + assert np.array_equal(fl.voltage.data, 2 - 5 * voltage_time) From fd49e02171ab03cb997303bb31eb9543db2ae251 Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Tue, 9 Jul 2024 14:16:14 +0200 Subject: [PATCH 02/42] Add docstrings for GetSliceParameters / GetSampleParameters --- imaspy/backends/db_entry_impl.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/imaspy/backends/db_entry_impl.py b/imaspy/backends/db_entry_impl.py index dbbb1329..9fa42bd8 100644 --- a/imaspy/backends/db_entry_impl.py +++ b/imaspy/backends/db_entry_impl.py @@ -17,7 +17,9 @@ class GetSliceParameters: """Helper class to store parameters to get_slice.""" time_requested: float + """See :param:`imaspy.db_entry.DBEntry.get_slice.time_requested`.""" interpolation_method: int + """See :param:`imaspy.db_entry.DBEntry.get_slice.interpolation_method`.""" @dataclass @@ -25,9 +27,13 @@ class GetSampleParameters: """Helper class to store parameters to get_sample.""" tmin: float + """See :param:`imaspy.db_entry.DBEntry.get_sample.tmin`.""" tmax: float + """See :param:`imaspy.db_entry.DBEntry.get_sample.tmax`.""" dtime: Optional[numpy.ndarray] + """See :param:`imaspy.db_entry.DBEntry.get_sample.dtime`.""" interpolation_method: Optional[int] + """See :param:`imaspy.db_entry.DBEntry.get_sample.interpolation_method`.""" class DBEntryImpl(ABC): From 73268bb212c06c1dc23223a24e31537d8cffbee6 Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Mon, 11 Nov 2024 10:11:44 +0100 Subject: [PATCH 03/42] Update tests for DD 4.0.0 --- imaspy/test/test_get_sample.py | 54 +++++++++++++++------------------- 1 file changed, 24 insertions(+), 30 deletions(-) diff --git a/imaspy/test/test_get_sample.py b/imaspy/test/test_get_sample.py index 7c3b210f..beffe52d 100644 --- a/imaspy/test/test_get_sample.py +++ b/imaspy/test/test_get_sample.py @@ -6,7 +6,6 @@ from imaspy.exception import DataEntryException from imaspy.ids_defs import ( CLOSEST_INTERP, - EMPTY_FLOAT, HDF5_BACKEND, IDS_TIME_MODE_HETEROGENEOUS, IDS_TIME_MODE_HOMOGENEOUS, @@ -28,7 +27,7 @@ def test_db_uri(backend, worker_id, tmp_path_factory): tmp_path = tmp_path_factory.mktemp(f"testdb.{worker_id}") backend_str = {HDF5_BACKEND: "hdf5", MDSPLUS_BACKEND: "mdsplus"}[backend] uri = f"imas:{backend_str}?path={tmp_path}" - entry = imaspy.DBEntry(uri, "x") + entry = imaspy.DBEntry(uri, "x", dd_version="4.0.0") # Homogeneous core profiles: cp = entry.factory.core_profiles() @@ -42,7 +41,7 @@ def test_db_uri(backend, worker_id, tmp_path_factory): cp.profiles_1d[i].t_i_average = np.array([2.0, 1.0]) * (i + 1) cp.profiles_1d[i].ion.resize(1) # STR_0D: - cp.profiles_1d[i].ion[0].label = "D" + cp.profiles_1d[i].ion[0].name = "D" # FLT_0D cp.profiles_1d[i].ion[0].z_ion = 1.0 cp.profiles_1d[i].ion[0].temperature = cp.profiles_1d[i].t_i_average @@ -100,8 +99,12 @@ def test_db_uri(backend, worker_id, tmp_path_factory): return uri -def test_invalid_arguments(test_db_uri): - entry = imaspy.DBEntry(test_db_uri, "r") +@pytest.fixture() +def entry(test_db_uri): + return imaspy.DBEntry(test_db_uri, "r", dd_version="4.0.0") + + +def test_invalid_arguments(entry): with pytest.raises(ValueError): entry.get_sample("core_profiles", 0.3, 0.2) # tmin > tmax with pytest.raises(DataEntryException): @@ -110,8 +113,7 @@ def test_invalid_arguments(test_db_uri): entry.get_sample("core_profiles", 0.1, 0.2, 0.05) # no interpolation method -def test_get_sample_homogeneous(test_db_uri): - entry = imaspy.DBEntry(test_db_uri, "r") +def test_get_sample_homogeneous(entry): cp = entry.get_sample("core_profiles", 0.3, 14 / 31) assert np.array_equal(cp.time, np.linspace(0, 1, 32)[10:15]) @@ -119,7 +121,7 @@ def test_get_sample_homogeneous(test_db_uri): assert np.array_equal(p1d.grid.rho_tor_norm, [0.0, 1.0]) assert np.array_equal(p1d.t_i_average, np.array([2.0, 1.0]) * (i + 11)) assert len(p1d.ion) == 1 - assert p1d.ion[0].label == "D" + assert p1d.ion[0].name == "D" assert p1d.ion[0].z_ion == 1 assert np.array_equal(p1d.ion[0].temperature, p1d.t_i_average) assert p1d.ion[0].temperature_validity == 0 @@ -127,8 +129,7 @@ def test_get_sample_homogeneous(test_db_uri): assert np.array_equal(cp.global_quantities.ip, (2 - cp.time) ** 0.5) -def test_get_sample_heterogeneous(test_db_uri): - entry = imaspy.DBEntry(test_db_uri, "r") +def test_get_sample_heterogeneous(entry): eq = entry.get_sample("equilibrium", -1.0, 0.2) # Main time array assert np.array_equal(eq.time, np.linspace(0, 2, 512)[:52]) @@ -172,8 +173,7 @@ def test_get_sample_heterogeneous(test_db_uri): assert np.array_equal(fl.voltage.data, 2 - 5 * voltage_time) -def test_get_sample_homogeneous_linear_interp(test_db_uri): - entry = imaspy.DBEntry(test_db_uri, "r") +def test_get_sample_homogeneous_linear_interp(entry): # Note requesting 0.401 and not 0.4, since # (0.3 + 0.02 + 0.02 + 0.02 + 0.02 + 0.02) = 0.4 + 5e-17 cp = entry.get_sample("core_profiles", 0.3, 0.401, 0.02, LINEAR_INTERP) @@ -191,8 +191,7 @@ def test_get_sample_homogeneous_linear_interp(test_db_uri): assert np.allclose(t_i_average, expected, rtol=1e-14, atol=0) -def test_get_sample_homogeneous_explicit_timebase(test_db_uri): - entry = imaspy.DBEntry(test_db_uri, "r") +def test_get_sample_homogeneous_explicit_timebase(entry): times = [0.1, 0.2345, 0.5, np.sqrt(2) / 2] cp = entry.get_sample("core_profiles", 0, 0, times, LINEAR_INTERP) assert np.allclose(cp.time, times, rtol=1e-14, atol=0) @@ -209,8 +208,7 @@ def test_get_sample_homogeneous_explicit_timebase(test_db_uri): assert np.allclose(t_i_average, expected, rtol=1e-14, atol=0) -def test_get_sample_homogeneous_previous_interp(test_db_uri): - entry = imaspy.DBEntry(test_db_uri, "r") +def test_get_sample_homogeneous_previous_interp(entry): # Note requesting 0.401 and not 0.4, since # (0.3 + 0.02 + 0.02 + 0.02 + 0.02 + 0.02) = 0.4 + 5e-17 cp = entry.get_sample("core_profiles", 0.3, 0.401, 0.02, PREVIOUS_INTERP) @@ -228,8 +226,7 @@ def test_get_sample_homogeneous_previous_interp(test_db_uri): assert np.allclose(t_i_average, expected, rtol=1e-14, atol=0) -def test_get_sample_homogeneous_closest_interp(test_db_uri): - entry = imaspy.DBEntry(test_db_uri, "r") +def test_get_sample_homogeneous_closest_interp(entry): # Note requesting 0.401 and not 0.4, since # (0.3 + 0.02 + 0.02 + 0.02 + 0.02 + 0.02) = 0.4 + 5e-17 cp = entry.get_sample("core_profiles", 0.3, 0.401, 0.02, CLOSEST_INTERP) @@ -247,8 +244,7 @@ def test_get_sample_homogeneous_closest_interp(test_db_uri): assert np.allclose(t_i_average, expected, rtol=1e-14, atol=0) -def test_get_sample_heterogeneous_linear_interp(test_db_uri): - entry = imaspy.DBEntry(test_db_uri, "r") +def test_get_sample_heterogeneous_linear_interp(entry): eq = entry.get_sample("equilibrium", 0.2, 0.501, 0.05, LINEAR_INTERP) N_samples = 7 # IDS becomes homogeneous after resampling @@ -258,14 +254,14 @@ def test_get_sample_heterogeneous_linear_interp(test_db_uri): # Check interpolated grids_ggd assert len(eq.grids_ggd) == N_samples for i in range(N_samples): - assert eq.grids_ggd[i].time == EMPTY_FLOAT + # assert eq.grids_ggd[i].time == EMPTY_FLOAT assert len(eq.grids_ggd[i].grid) == 1 assert eq.grids_ggd[i].grid[0].path == "wall:0/description_ggd(1)/grid_ggd" # Check interpolated time_slice assert len(eq.time_slice) == N_samples for i in range(N_samples): - assert eq.time_slice[i].time == EMPTY_FLOAT + # assert eq.time_slice[i].time == EMPTY_FLOAT assert len(eq.time_slice[i].profiles_2d) == 1 p2d = eq.time_slice[i].profiles_2d[0] assert np.array_equal(p2d.grid.dim1, [0.0, 1.0]) @@ -296,8 +292,7 @@ def test_get_sample_heterogeneous_linear_interp(test_db_uri): assert np.allclose(fl.voltage.data, 2 - 5 * mag.time, rtol=1e-14, atol=2e-16) -def test_get_sample_heterogeneous_previous_interp(test_db_uri): - entry = imaspy.DBEntry(test_db_uri, "r") +def test_get_sample_heterogeneous_previous_interp(entry): eq = entry.get_sample("equilibrium", 0.2, 0.501, 0.05, PREVIOUS_INTERP) N_samples = 7 # IDS becomes homogeneous after resampling @@ -307,14 +302,14 @@ def test_get_sample_heterogeneous_previous_interp(test_db_uri): # Check interpolated grids_ggd assert len(eq.grids_ggd) == N_samples for i in range(N_samples): - assert eq.grids_ggd[i].time == EMPTY_FLOAT + # assert eq.grids_ggd[i].time == EMPTY_FLOAT assert len(eq.grids_ggd[i].grid) == 1 assert eq.grids_ggd[i].grid[0].path == "wall:0/description_ggd(1)/grid_ggd" # Check interpolated time_slice assert len(eq.time_slice) == N_samples for i in range(N_samples): - assert eq.time_slice[i].time == EMPTY_FLOAT + # assert eq.time_slice[i].time == EMPTY_FLOAT assert len(eq.time_slice[i].profiles_2d) == 1 p2d = eq.time_slice[i].profiles_2d[0] assert np.array_equal(p2d.grid.dim1, [0.0, 1.0]) @@ -343,8 +338,7 @@ def test_get_sample_heterogeneous_previous_interp(test_db_uri): assert np.array_equal(fl.voltage.data, 2 - 5 * voltage_time) -def test_get_sample_heterogeneous_closest_interp(test_db_uri): - entry = imaspy.DBEntry(test_db_uri, "r") +def test_get_sample_heterogeneous_closest_interp(entry): eq = entry.get_sample("equilibrium", 0.2, 0.501, 0.05, CLOSEST_INTERP) N_samples = 7 # IDS becomes homogeneous after resampling @@ -354,14 +348,14 @@ def test_get_sample_heterogeneous_closest_interp(test_db_uri): # Check interpolated grids_ggd assert len(eq.grids_ggd) == N_samples for i in range(N_samples): - assert eq.grids_ggd[i].time == EMPTY_FLOAT + # assert eq.grids_ggd[i].time == EMPTY_FLOAT assert len(eq.grids_ggd[i].grid) == 1 assert eq.grids_ggd[i].grid[0].path == "wall:0/description_ggd(1)/grid_ggd" # Check interpolated time_slice assert len(eq.time_slice) == N_samples for i in range(N_samples): - assert eq.time_slice[i].time == EMPTY_FLOAT + # assert eq.time_slice[i].time == EMPTY_FLOAT assert len(eq.time_slice[i].profiles_2d) == 1 p2d = eq.time_slice[i].profiles_2d[0] assert np.array_equal(p2d.grid.dim1, [0.0, 1.0]) From bc9ea191da4626ec92905b68352d3e12d2a10c08 Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Mon, 11 Nov 2024 10:47:46 +0100 Subject: [PATCH 04/42] Update NCDBEntryImpl for get_sample and raise NotImplementedError --- imaspy/backends/netcdf/db_entry_nc.py | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/imaspy/backends/netcdf/db_entry_nc.py b/imaspy/backends/netcdf/db_entry_nc.py index ba7334fc..f04630db 100644 --- a/imaspy/backends/netcdf/db_entry_nc.py +++ b/imaspy/backends/netcdf/db_entry_nc.py @@ -1,9 +1,13 @@ """DBEntry implementation using NetCDF as a backend.""" import logging -from typing import List +from typing import List, Optional, Union -from imaspy.backends.db_entry_impl import DBEntryImpl +from imaspy.backends.db_entry_impl import ( + DBEntryImpl, + GetSampleParameters, + GetSliceParameters, +) from imaspy.backends.netcdf.ids2nc import IDS2NC from imaspy.backends.netcdf.nc2ids import nc2ids from imaspy.exception import DataEntryException @@ -74,15 +78,18 @@ def get( self, ids_name: str, occurrence: int, - time_requested: float | None, - interpolation_method: int, + parameters: Union[None, GetSliceParameters, GetSampleParameters], destination: IDSToplevel, lazy: bool, - nbc_map: NBCPathMap | None, + nbc_map: Optional[NBCPathMap], ) -> None: # Feature compatibility checks - if time_requested is not None: - raise NotImplementedError("`get_slice` is not available for netCDF files.") + if parameters is not None: + if isinstance(parameters, GetSliceParameters): + func = "get_slice" + else: + func = "get_sample" + raise NotImplementedError(f"`{func}` is not available for netCDF files.") if lazy: raise NotImplementedError( "Lazy loading is not implemented for netCDF files." From 674460bbece63ccbd51f03e1d644a7916d250a6f Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Mon, 11 Nov 2024 10:49:14 +0100 Subject: [PATCH 05/42] Set `begin_timerange_action` as available since AL core 5.4 --- imaspy/backends/imas_core/imas_interface.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/imaspy/backends/imas_core/imas_interface.py b/imaspy/backends/imas_core/imas_interface.py index cca7d42f..b92438b1 100644 --- a/imaspy/backends/imas_core/imas_interface.py +++ b/imaspy/backends/imas_core/imas_interface.py @@ -215,12 +215,12 @@ def get_occurrences(self, ctx, ids_name): def get_al_version(self): return self._al_version_str - # New methods added in AL 5.3 + # New methods added in AL 5.4 def begin_timerange_action( self, ctx, path, rwmode, tmin, tmax, dtime, interpolation_method ): - raise self._minimal_version("5.3") + raise self._minimal_version("5.4") # Dummy documentation for interface: From 0a47f94e97467d6e57c65e335590daab25f021b0 Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Thu, 14 Nov 2024 10:17:49 +0100 Subject: [PATCH 06/42] Make the NC2IDS reader class-based In preparation for validating the NC data --- imaspy/backends/netcdf/nc2ids.py | 157 ++++++++++++++++++------------- 1 file changed, 89 insertions(+), 68 deletions(-) diff --git a/imaspy/backends/netcdf/nc2ids.py b/imaspy/backends/netcdf/nc2ids.py index d071a3ba..2877b297 100644 --- a/imaspy/backends/netcdf/nc2ids.py +++ b/imaspy/backends/netcdf/nc2ids.py @@ -54,10 +54,98 @@ def _tree_iter( yield from _tree_iter(node, paths, curindex + (i,)) +class NC2IDS: + """Class responsible for reading an IDS from a NetCDF group.""" + + def __init__(self, group: netCDF4.Group, ids: IDSToplevel) -> None: + """Initialize NC2IDS converter. + + Args: + group: NetCDF group that stores the IDS data. + ids: Corresponding IDS toplevel to store the data in. + """ + self.group = group + """NetCDF Group that the IDS is stored in.""" + self.ids = ids + """IDS to store the data in.""" + + self.ncmeta = NCMetadata(ids.metadata) + """NetCDF related metadata.""" + self.variables = list(group.variables) + """List of variable names stored in the netCDF group.""" + # TODO: validate ids_properties.homogeneous_time + self.homogeneous_time = ( + group["ids_properties.homogeneous_time"][()] == IDS_TIME_MODE_HOMOGENEOUS + ) + """True iff the IDS time mode is homogeneous.""" + + # Don't use masked arrays: they're slow and we'll handle most of the unset + # values through the `:shape` arrays + self.group.set_auto_mask(False) + + def run(self) -> None: + # FIXME: ensure that var_names are sorted properly + # Current assumption is that creation-order is fine + for var_name in self.variables: + if var_name.endswith(":shape"): + continue # TODO: validate that this is used + + # FIXME: error handling: + metadata = self.ids.metadata[var_name] + + # TODO: validate metadata (data type, units, etc.) conforms to DD + + if metadata.data_type is IDSDataType.STRUCTURE: + continue # This only contains DD metadata we already know + + var = self.group[var_name] + if metadata.data_type is IDSDataType.STRUCT_ARRAY: + if "sparse" in var.ncattrs(): + shapes = self.group[var_name + ":shape"][()] + for index, node in tree_iter(self.ids, metadata): + node.resize(shapes[index][0]) + + else: + # FIXME: extract dimension name from nc file? + dim = self.ncmeta.get_dimensions( + metadata.path_string, self.homogeneous_time + )[-1] + size = self.group.dimensions[dim].size + for _, node in tree_iter(self.ids, metadata): + node.resize(size) + + continue + + # FIXME: this may be a gigantic array, not required for sparse data + var = self.group[var_name] + data = var[()] + + if "sparse" in var.ncattrs(): + if metadata.ndim: + shapes = self.group[var_name + ":shape"][()] + for index, node in tree_iter(self.ids, metadata): + shape = shapes[index] + if shape.all(): + node.value = data[index + tuple(map(slice, shapes[index]))] + else: + for index, node in tree_iter(self.ids, metadata): + value = data[index] + if value != getattr(var, "_FillValue", None): + node.value = data[index] + + elif metadata.path_string not in self.ncmeta.aos: + # Shortcut for assigning untensorized data + self.ids[metadata.path] = data + + else: + for index, node in tree_iter(self.ids, metadata): + node.value = data[index] + + def nc2ids(group: netCDF4.Group, ids: IDSToplevel): """Get data from the netCDF group and store it in the provided IDS.""" try: - _nc2ids(group, ids) + NC2IDS(group, ids).run() except Exception as exc: raise RuntimeError( "An error occurred while reading data from the netCDF file " @@ -66,70 +154,3 @@ def nc2ids(group: netCDF4.Group, ids: IDSToplevel): "may cause errors in IMASPy. A more robust mechanism to load IDS data from " "netCDF files will be included in the next release of IMASPy." ) from exc - - -def _nc2ids(group: netCDF4.Group, ids: IDSToplevel): - var_names = list(group.variables) - # FIXME: ensure that var_names are sorted properly - # Current assumption is that creation-order is fine - homogeneous_time = ( - group["ids_properties.homogeneous_time"][()] == IDS_TIME_MODE_HOMOGENEOUS - ) - ncmeta = NCMetadata(ids.metadata) - - # Never return masked arrays, they're slow and we'll handle most of the unset values - # through the `:shape` arrays - group.set_auto_mask(False) - - for var_name in var_names: - if var_name.endswith(":shape"): - continue # TODO: validate that this is used - - # FIXME: error handling: - metadata = ids.metadata[var_name] - - # TODO: validate metadata (data type, units, etc.) conforms to DD - - if metadata.data_type is IDSDataType.STRUCTURE: - continue # This only contains DD metadata we already know - - var = group[var_name] - if metadata.data_type is IDSDataType.STRUCT_ARRAY: - if "sparse" in var.ncattrs(): - shapes = group[var_name + ":shape"][()] - for index, node in tree_iter(ids, metadata): - node.resize(shapes[index][0]) - - else: - # FIXME: extract dimension name from nc file? - dim = ncmeta.get_dimensions(metadata.path_string, homogeneous_time)[-1] - size = group.dimensions[dim].size - for _, node in tree_iter(ids, metadata): - node.resize(size) - - continue - - # FIXME: this may be a gigantic array, not required for sparse data - var = group[var_name] - data = var[()] - - if "sparse" in var.ncattrs(): - if metadata.ndim: - shapes = group[var_name + ":shape"][()] - for index, node in tree_iter(ids, metadata): - shape = shapes[index] - if shape.all(): - node.value = data[index + tuple(map(slice, shapes[index]))] - else: - for index, node in tree_iter(ids, metadata): - value = data[index] - if value != getattr(var, "_FillValue", None): - node.value = data[index] - - elif metadata.path_string not in ncmeta.aos: - # Shortcut for assigning untensorized data - ids[metadata.path] = data - - else: - for index, node in tree_iter(ids, metadata): - node.value = data[index] From d59fcabc29d5691004649bcf48016255010d7fa7 Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Thu, 14 Nov 2024 14:35:30 +0100 Subject: [PATCH 07/42] Add missing docstring --- imaspy/backends/netcdf/nc2ids.py | 1 + 1 file changed, 1 insertion(+) diff --git a/imaspy/backends/netcdf/nc2ids.py b/imaspy/backends/netcdf/nc2ids.py index 2877b297..cc3ebc25 100644 --- a/imaspy/backends/netcdf/nc2ids.py +++ b/imaspy/backends/netcdf/nc2ids.py @@ -84,6 +84,7 @@ def __init__(self, group: netCDF4.Group, ids: IDSToplevel) -> None: self.group.set_auto_mask(False) def run(self) -> None: + """Load the data from the netCDF group into the IDS.""" # FIXME: ensure that var_names are sorted properly # Current assumption is that creation-order is fine for var_name in self.variables: From ee385b736cb3f10a7378343f2de59e46815b26b7 Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Fri, 15 Nov 2024 11:46:33 +0100 Subject: [PATCH 08/42] Disable MDSplus backend tests for get_sample Feature not yet implemented, see IMAS-5593 --- imaspy/test/test_get_sample.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/imaspy/test/test_get_sample.py b/imaspy/test/test_get_sample.py index beffe52d..0f5fed3e 100644 --- a/imaspy/test/test_get_sample.py +++ b/imaspy/test/test_get_sample.py @@ -21,7 +21,8 @@ def test_db_uri(backend, worker_id, tmp_path_factory): if not hasattr(lowlevel, "al_begin_timerange_action"): pytest.skip("imas_core version doesn't support begin_timerange_action.") - if backend not in [HDF5_BACKEND, MDSPLUS_BACKEND]: + # TODO: add MDSPLUS_BACKEND once implemented, see IMAS-5593 + if backend not in [HDF5_BACKEND]: pytest.skip("Backend doesn't support time range operations.") tmp_path = tmp_path_factory.mktemp(f"testdb.{worker_id}") From b007316a44e07a80a8ccad67f62b017a537b2332 Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Wed, 13 Nov 2024 16:32:25 +0100 Subject: [PATCH 09/42] Add validation for the ids_properties.homogeneous time variable in netCDF IDSs. --- imaspy/backends/netcdf/ids2nc.py | 8 +++--- imaspy/backends/netcdf/nc2ids.py | 47 +++++++++++++++++++++++++++---- imaspy/exception.py | 4 +++ imaspy/test/test_nc_validation.py | 36 +++++++++++++++++++++++ 4 files changed, 85 insertions(+), 10 deletions(-) create mode 100644 imaspy/test/test_nc_validation.py diff --git a/imaspy/backends/netcdf/ids2nc.py b/imaspy/backends/netcdf/ids2nc.py index 9fad4044..34e63101 100644 --- a/imaspy/backends/netcdf/ids2nc.py +++ b/imaspy/backends/netcdf/ids2nc.py @@ -23,10 +23,10 @@ IDSDataType.CPX: netCDF4.default_fillvals["f8"] * (1 + 1j), } dtypes = { - IDSDataType.INT: numpy.int32, + IDSDataType.INT: numpy.dtype(numpy.int32), IDSDataType.STR: str, - IDSDataType.FLT: numpy.float64, - IDSDataType.CPX: numpy.complex128, + IDSDataType.FLT: numpy.dtype(numpy.float64), + IDSDataType.CPX: numpy.dtype(numpy.complex128), } SHAPE_DTYPE = numpy.int32 @@ -188,7 +188,7 @@ def create_variables(self) -> None: kwargs = {} if dtype is not str: # Enable compression: kwargs.update(compression="zlib", complevel=1) - if dtype is not numpy.complex128: # Set fillvalue + if dtype is not dtypes[IDSDataType.CPX]: # Set fillvalue kwargs.update(fill_value=default_fillvals[metadata.data_type]) # Create variable dimensions = get_dimensions(path, self.homogeneous_time) diff --git a/imaspy/backends/netcdf/nc2ids.py b/imaspy/backends/netcdf/nc2ids.py index cc3ebc25..24cbc7b3 100644 --- a/imaspy/backends/netcdf/nc2ids.py +++ b/imaspy/backends/netcdf/nc2ids.py @@ -2,7 +2,9 @@ import netCDF4 +from imaspy.backends.netcdf import ids2nc from imaspy.backends.netcdf.nc_metadata import NCMetadata +from imaspy.exception import InvalidNetCDFEntry from imaspy.ids_base import IDSBase from imaspy.ids_data_type import IDSDataType from imaspy.ids_defs import IDS_TIME_MODE_HOMOGENEOUS @@ -73,16 +75,27 @@ def __init__(self, group: netCDF4.Group, ids: IDSToplevel) -> None: """NetCDF related metadata.""" self.variables = list(group.variables) """List of variable names stored in the netCDF group.""" - # TODO: validate ids_properties.homogeneous_time - self.homogeneous_time = ( - group["ids_properties.homogeneous_time"][()] == IDS_TIME_MODE_HOMOGENEOUS - ) - """True iff the IDS time mode is homogeneous.""" - # Don't use masked arrays: they're slow and we'll handle most of the unset # values through the `:shape` arrays self.group.set_auto_mask(False) + # Validate and get value of ids_properties.homogeneous_time + self.homogeneous_time = True # Must be initialized for self._validate_variable + """True iff the IDS time mode is homogeneous.""" + + if "ids_properties.homogeneous_time" not in self.variables: + raise InvalidNetCDFEntry( + "Mandatory variable `ids_properties.homogeneous_time` does not exist." + ) + var = group["ids_properties.homogeneous_time"] + self._validate_variable(var, ids.ids_properties.homogeneous_time.metadata) + if var[()] not in [0, 1, 2]: + raise InvalidNetCDFEntry( + f"Invalid value for ids_properties.homogeneous_time: {var[()]}. " + "Was expecting: 0, 1 or 2." + ) + self.homogeneous_time = var[()] == IDS_TIME_MODE_HOMOGENEOUS + def run(self) -> None: """Load the data from the netCDF group into the IDS.""" # FIXME: ensure that var_names are sorted properly @@ -142,6 +155,28 @@ def run(self) -> None: for index, node in tree_iter(self.ids, metadata): node.value = data[index] + def _validate_variable(self, var: netCDF4.Variable, metadata: IDSMetadata) -> None: + """Validate that the variable has correct metadata, raise an exception if not. + + Args: + var: NetCDF variable + metadata: IDSMetadata of the corresponding IDS object + """ + if var.dtype != ids2nc.dtypes[metadata.data_type]: + raise InvalidNetCDFEntry( + f"Variable {var.name} has incorrect data type: {var.dtype}. " + f"Was expecting: {ids2nc.dtypes[metadata.data_type]}." + ) + # Dimensions + expected_dims = self.ncmeta.get_dimensions( + metadata.path_string, self.homogeneous_time + ) + if var.dimensions != expected_dims: + raise InvalidNetCDFEntry( + f"Variable {var.name} has incorrect dimensions: {var.dimensions}. " + f"Was expecting: {expected_dims}." + ) + def nc2ids(group: netCDF4.Group, ids: IDSToplevel): """Get data from the netCDF group and store it in the provided IDS.""" diff --git a/imaspy/exception.py b/imaspy/exception.py index 8377d13b..550ce2ed 100644 --- a/imaspy/exception.py +++ b/imaspy/exception.py @@ -101,3 +101,7 @@ def __init__(self, node, dimension, expected_size, coor_path): super().__init__( f"Element `{node._path}` has incorrect shape {node.shape}: {details}" ) + + +class InvalidNetCDFEntry(Exception): + """Error raised when loading an IDS from a NetCDF file that fails validation.""" diff --git a/imaspy/test/test_nc_validation.py b/imaspy/test/test_nc_validation.py new file mode 100644 index 00000000..dc5309f0 --- /dev/null +++ b/imaspy/test/test_nc_validation.py @@ -0,0 +1,36 @@ +import netCDF4 +import pytest + +from imaspy.backends.netcdf.nc2ids import NC2IDS +from imaspy.exception import InvalidNetCDFEntry +from imaspy.ids_factory import IDSFactory + + +@pytest.fixture() +def memfile(): + with netCDF4.Dataset("-", "w", diskless=True) as memfile: + yield memfile + + +def test_invalid_homogeneous_time(memfile): + empty_group = memfile.createGroup("empty_group") + # Invalid dtype + invalid_dtype = memfile.createGroup("invalid_dtype") + invalid_dtype.createVariable("ids_properties.homogeneous_time", float, ())[()] = 0 + # Invalid shape: 1D instead of 0D + invalid_shape = memfile.createGroup("invalid_shape") + invalid_shape.createDimension("dim") + invalid_shape.createVariable("ids_properties.homogeneous_time", "i4", ("dim",)) + # Invalid value: not 0, 1 or 2 + invalid_value = memfile.createGroup("invalid_value") + invalid_value.createVariable("ids_properties.homogeneous_time", "i4", ()) + + ids = IDSFactory().core_profiles() + with pytest.raises(InvalidNetCDFEntry): + NC2IDS(empty_group, ids) # ids_properties.homogeneous_time does not exist + with pytest.raises(InvalidNetCDFEntry): + NC2IDS(invalid_dtype, ids) + with pytest.raises(InvalidNetCDFEntry): + NC2IDS(invalid_shape, ids) + with pytest.raises(InvalidNetCDFEntry): + NC2IDS(invalid_value, ids) From f7be3845994c6510be29a35d47b58b7504fbaa7a Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Fri, 15 Nov 2024 16:05:27 +0100 Subject: [PATCH 10/42] Additional validation checks and tests --- imaspy/backends/netcdf/nc2ids.py | 109 ++++++++++++++++++++++++++---- imaspy/test/test_nc_validation.py | 69 ++++++++++++++++++- 2 files changed, 164 insertions(+), 14 deletions(-) diff --git a/imaspy/backends/netcdf/nc2ids.py b/imaspy/backends/netcdf/nc2ids.py index 24cbc7b3..e2cf65b3 100644 --- a/imaspy/backends/netcdf/nc2ids.py +++ b/imaspy/backends/netcdf/nc2ids.py @@ -1,3 +1,4 @@ +import logging from typing import Iterator, List, Tuple import netCDF4 @@ -12,6 +13,15 @@ from imaspy.ids_structure import IDSStructure from imaspy.ids_toplevel import IDSToplevel +logger = logging.getLogger(__name__) + + +def variable_error(var, issue, value, expected=None) -> InvalidNetCDFEntry: + return InvalidNetCDFEntry( + f"Variable `{var.name}` has incorrect {issue}: `{value}`." + + (f" Was expecting `{expected}`." if expected is not None else "") + ) + def split_on_aos(metadata: IDSMetadata): paths = [] @@ -98,6 +108,7 @@ def __init__(self, group: netCDF4.Group, ids: IDSToplevel) -> None: def run(self) -> None: """Load the data from the netCDF group into the IDS.""" + self._validate_variables() # FIXME: ensure that var_names are sorted properly # Current assumption is that creation-order is fine for var_name in self.variables: @@ -155,6 +166,42 @@ def run(self) -> None: for index, node in tree_iter(self.ids, metadata): node.value = data[index] + def _validate_variables(self) -> None: + """Validate that all variables in the netCDF Group exist and match the DD.""" + self.variables.sort() + for var_name in self.variables: + if var_name.endswith(":shape"): + # Check that there is a corresponding variable + data_var = var_name.rpartition(":shape")[0] + if data_var not in self.variables: + raise InvalidNetCDFEntry( + f"Invalid netCDF variable: {var_name}. " + f"Shape information provided for non-existing {data_var}." + ) + # Corresponding variable must be sparse + if "sparse" not in self.group[data_var].ncattrs(): + raise InvalidNetCDFEntry( + f"Shape information provided for {data_var}, but this variable " + "is not sparse." + ) + # That's all for :shape arrays + continue + + # Check that the DD defines this variable, and validate its metadata + var = self.group[var_name] + try: + metadata = self.ids.metadata[var_name] + except KeyError: + raise InvalidNetCDFEntry( + f"Invalid variable {var_name}: no such variable exists in the " + f"{self.ids.metadata.name} IDS." + ) + self._validate_variable(var, metadata) + + # Validate sparsity metadata + if "sparse" in var.ncattrs(): + ... # TODO + def _validate_variable(self, var: netCDF4.Variable, metadata: IDSMetadata) -> None: """Validate that the variable has correct metadata, raise an exception if not. @@ -162,20 +209,58 @@ def _validate_variable(self, var: netCDF4.Variable, metadata: IDSMetadata) -> No var: NetCDF variable metadata: IDSMetadata of the corresponding IDS object """ - if var.dtype != ids2nc.dtypes[metadata.data_type]: - raise InvalidNetCDFEntry( - f"Variable {var.name} has incorrect data type: {var.dtype}. " - f"Was expecting: {ids2nc.dtypes[metadata.data_type]}." + attrs: dict = vars(var).copy() + attrs.pop("_FillValue", None) + if metadata.data_type not in [IDSDataType.STRUCTURE, IDSDataType.STRUCT_ARRAY]: + # Data type + expected_dtype = ids2nc.dtypes[metadata.data_type] + if var.dtype != expected_dtype: + raise variable_error(var, "data type", var.dtype, expected_dtype) + + # Dimensions + expected_dims = self.ncmeta.get_dimensions( + metadata.path_string, self.homogeneous_time ) - # Dimensions - expected_dims = self.ncmeta.get_dimensions( - metadata.path_string, self.homogeneous_time - ) - if var.dimensions != expected_dims: - raise InvalidNetCDFEntry( - f"Variable {var.name} has incorrect dimensions: {var.dimensions}. " - f"Was expecting: {expected_dims}." + if var.dimensions != expected_dims: + raise variable_error(var, "dimensions", var.dimensions, expected_dims) + + # Coordinates + coordinates = str(attrs.pop("coordinates", "")) + expected_coordinates = self.ncmeta.get_coordinates( + metadata.path_string, self.homogeneous_time ) + if any(coord not in expected_coordinates for coord in coordinates.split()): + raise variable_error( + var, "coordinates", coordinates, " ".join(expected_coordinates) + ) + + # Ancillary variables + ancvar = attrs.pop("ancillary_variables", None) + if ancvar: + allowed_ancvar = [f"{var.name}_error_upper", f"{var.name}_error_lower"] + if any(var not in allowed_ancvar for var in ancvar.split()): + raise variable_error( + var, "ancillary_variables", ancvar, " ".join(allowed_ancvar) + ) + + # Units + units = attrs.pop("units", None) + if metadata.units and metadata.units != units: + raise variable_error(var, "units", units, metadata.units) + + # Sparse + sparse = attrs.pop("sparse", None) + if sparse is not None: + ... # TODO + + # Documentation + doc = attrs.pop("documentation", None) + if metadata.documentation != doc: + logger.warning("Documentation of variable %s differs from the DD", var.name) + + # Unknown attrs + if attrs: + raise variable_error(var, "attributes", list(attrs.keys())) def nc2ids(group: netCDF4.Group, ids: IDSToplevel): diff --git a/imaspy/test/test_nc_validation.py b/imaspy/test/test_nc_validation.py index dc5309f0..d3bf8c09 100644 --- a/imaspy/test/test_nc_validation.py +++ b/imaspy/test/test_nc_validation.py @@ -1,8 +1,10 @@ import netCDF4 import pytest +from imaspy.backends.netcdf.ids2nc import IDS2NC from imaspy.backends.netcdf.nc2ids import NC2IDS from imaspy.exception import InvalidNetCDFEntry +from imaspy.ids_defs import IDS_TIME_MODE_HOMOGENEOUS from imaspy.ids_factory import IDSFactory @@ -12,7 +14,26 @@ def memfile(): yield memfile -def test_invalid_homogeneous_time(memfile): +@pytest.fixture() +def factory(): + return IDSFactory("4.0.0") + + +@pytest.fixture() +def memfile_with_ids(memfile, factory): + ids = factory.core_profiles() + ids.ids_properties.homogeneous_time = IDS_TIME_MODE_HOMOGENEOUS + ids.time = [1.0, 2.0, 3.0] + ids.profiles_1d.resize(2) + for i in range(2): + ids.profiles_1d[i].grid.rho_tor_norm = [0.0, 0.2, 0.4, 0.6, 0.8, 1.0] + IDS2NC(ids, memfile).run() + # This one is valid: + NC2IDS(memfile, factory.core_profiles()).run() + return memfile + + +def test_invalid_homogeneous_time(memfile, factory): empty_group = memfile.createGroup("empty_group") # Invalid dtype invalid_dtype = memfile.createGroup("invalid_dtype") @@ -25,7 +46,7 @@ def test_invalid_homogeneous_time(memfile): invalid_value = memfile.createGroup("invalid_value") invalid_value.createVariable("ids_properties.homogeneous_time", "i4", ()) - ids = IDSFactory().core_profiles() + ids = factory.core_profiles() with pytest.raises(InvalidNetCDFEntry): NC2IDS(empty_group, ids) # ids_properties.homogeneous_time does not exist with pytest.raises(InvalidNetCDFEntry): @@ -34,3 +55,47 @@ def test_invalid_homogeneous_time(memfile): NC2IDS(invalid_shape, ids) with pytest.raises(InvalidNetCDFEntry): NC2IDS(invalid_value, ids) + + +def test_invalid_units(memfile_with_ids, factory): + memfile_with_ids["time"].units = "hours" + with pytest.raises(InvalidNetCDFEntry): + NC2IDS(memfile_with_ids, factory.core_profiles()).run() + + +def test_invalid_documentation(memfile_with_ids, factory, caplog): + with caplog.at_level("WARNING"): + NC2IDS(memfile_with_ids, factory.core_profiles()).run() + assert not caplog.records + # Invalid docstring logs a warning + memfile_with_ids["time"].documentation = "https://en.wikipedia.org/wiki/Time" + with caplog.at_level("WARNING"): + NC2IDS(memfile_with_ids, factory.core_profiles()).run() + assert len(caplog.records) == 1 + + +def test_invalid_dimension_name(memfile_with_ids, factory): + memfile_with_ids.renameDimension("time", "T") + with pytest.raises(InvalidNetCDFEntry): + NC2IDS(memfile_with_ids, factory.core_profiles()).run() + + +def test_invalid_coordinates(memfile_with_ids, factory): + memfile_with_ids["profiles_1d.grid.rho_tor_norm"].coordinates = "xyz" + with pytest.raises(InvalidNetCDFEntry): + NC2IDS(memfile_with_ids, factory.core_profiles()).run() + + +def test_invalid_ancillary_variables(memfile_with_ids, factory): + memfile_with_ids["time"].ancillary_variables = "xyz" + with pytest.raises(InvalidNetCDFEntry): + NC2IDS(memfile_with_ids, factory.core_profiles()).run() + + +def test_extra_attributes(memfile_with_ids, factory): + memfile_with_ids["time"].new_attribute = [1, 2, 3] + with pytest.raises(InvalidNetCDFEntry): + NC2IDS(memfile_with_ids, factory.core_profiles()).run() + + +# TODO: tests for sparsity information From e5246464d588069af3f0f25e5a0e00d41d7fd4ef Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Tue, 19 Nov 2024 10:21:18 +0100 Subject: [PATCH 11/42] Fix a bug with lazy loading multiple IDSs from the same HDF5 DBEntry Ensure lazy contexts belonging to a different IDS are always closed. See IMAS-5603 for more details. --- imaspy/backends/imas_core/al_context.py | 4 ++++ imaspy/test/test_lazy_loading.py | 20 ++++++++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/imaspy/backends/imas_core/al_context.py b/imaspy/backends/imas_core/al_context.py index 07f37dec..10c0bf45 100644 --- a/imaspy/backends/imas_core/al_context.py +++ b/imaspy/backends/imas_core/al_context.py @@ -299,6 +299,10 @@ def get_context(self) -> ALContext: # from the cache else: + # Purge the cache to close open contexts from other IDSs (IMAS-5603) + cache = self.dbentry._lazy_ctx_cache + while cache: + cache.pop().close() return self.dbentry_ctx @contextmanager diff --git a/imaspy/test/test_lazy_loading.py b/imaspy/test/test_lazy_loading.py index 8c3b2fef..c0e54aad 100644 --- a/imaspy/test/test_lazy_loading.py +++ b/imaspy/test/test_lazy_loading.py @@ -163,3 +163,23 @@ def test_lazy_load_with_new_aos(requires_imas): assert len(lazy_et.model[0].ggd[0].electrons.particles.d_radial) == 0 dbentry.close() + + +def test_lazy_load_multiple_ids(backend, worker_id, tmp_path): + if backend == ASCII_BACKEND: + pytest.skip("Lazy loading is not supported by the ASCII backend.") + + with open_dbentry(backend, "w", worker_id, tmp_path) as dbentry: + cp = dbentry.factory.core_profiles() + cp.ids_properties.homogeneous_time = 1 + cp.time = [0.0, 1.0] + dbentry.put(cp) + eq = dbentry.factory.equilibrium() + eq.ids_properties.homogeneous_time = 1 + eq.time = [1.0, 2.0] + dbentry.put(eq) + + lazy_cp = dbentry.get("core_profiles", lazy=True) + lazy_eq = dbentry.get("equilibrium", lazy=True) + assert all(cp.time - eq.time == -1) + assert all(lazy_cp.time - lazy_eq.time == -1) From d47566baca31d9b53ca46ed0dfd06dde70030211 Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Tue, 19 Nov 2024 13:26:29 +0100 Subject: [PATCH 12/42] Validate netCDF sparsity metadata --- imaspy/backends/netcdf/nc2ids.py | 60 +++++++++++++++++++++++-------- imaspy/test/test_nc_validation.py | 51 ++++++++++++++++++++++++-- 2 files changed, 93 insertions(+), 18 deletions(-) diff --git a/imaspy/backends/netcdf/nc2ids.py b/imaspy/backends/netcdf/nc2ids.py index e2cf65b3..3666d49e 100644 --- a/imaspy/backends/netcdf/nc2ids.py +++ b/imaspy/backends/netcdf/nc2ids.py @@ -1,5 +1,5 @@ import logging -from typing import Iterator, List, Tuple +from typing import Iterator, List, Optional, Tuple import netCDF4 @@ -108,18 +108,13 @@ def __init__(self, group: netCDF4.Group, ids: IDSToplevel) -> None: def run(self) -> None: """Load the data from the netCDF group into the IDS.""" + self.variables.sort() self._validate_variables() - # FIXME: ensure that var_names are sorted properly - # Current assumption is that creation-order is fine for var_name in self.variables: if var_name.endswith(":shape"): - continue # TODO: validate that this is used - - # FIXME: error handling: + continue metadata = self.ids.metadata[var_name] - # TODO: validate metadata (data type, units, etc.) conforms to DD - if metadata.data_type is IDSDataType.STRUCTURE: continue # This only contains DD metadata we already know @@ -168,7 +163,6 @@ def run(self) -> None: def _validate_variables(self) -> None: """Validate that all variables in the netCDF Group exist and match the DD.""" - self.variables.sort() for var_name in self.variables: if var_name.endswith(":shape"): # Check that there is a corresponding variable @@ -184,7 +178,8 @@ def _validate_variables(self) -> None: f"Shape information provided for {data_var}, but this variable " "is not sparse." ) - # That's all for :shape arrays + # That's all for :shape arrays here, rest is checked in + # _validate_variable (which defers to _validate_sparsity) continue # Check that the DD defines this variable, and validate its metadata @@ -198,10 +193,6 @@ def _validate_variables(self) -> None: ) self._validate_variable(var, metadata) - # Validate sparsity metadata - if "sparse" in var.ncattrs(): - ... # TODO - def _validate_variable(self, var: netCDF4.Variable, metadata: IDSMetadata) -> None: """Validate that the variable has correct metadata, raise an exception if not. @@ -251,7 +242,9 @@ def _validate_variable(self, var: netCDF4.Variable, metadata: IDSMetadata) -> No # Sparse sparse = attrs.pop("sparse", None) if sparse is not None: - ... # TODO + shape_name = f"{var.name}:shape" + shape_var = self.group[shape_name] if shape_name in self.variables else None + self._validate_sparsity(var, shape_var, metadata) # Documentation doc = attrs.pop("documentation", None) @@ -262,6 +255,43 @@ def _validate_variable(self, var: netCDF4.Variable, metadata: IDSMetadata) -> No if attrs: raise variable_error(var, "attributes", list(attrs.keys())) + def _validate_sparsity( + self, + var: netCDF4.Variable, + shape_var: Optional[netCDF4.Variable], + metadata: IDSMetadata, + ) -> None: + """Validate that the variable has correct sparsity. + + Args: + var: Variable with a "sparse" attribute + shape_var: Corresponding shape array (if it exists in the NC group) + metadata: IDSMetadata of the corresponding IDS object + """ + if metadata.ndim == 0: + return # Sparsity is stored with _Fillvalue, nothing to validate + + # Dimensions + aos_dimensions = self.ncmeta.get_dimensions( + self.ncmeta.aos.get(metadata.path_string), self.homogeneous_time + ) + shape_dimensions = shape_var.dimensions + if ( + len(shape_dimensions) != len(aos_dimensions) + 1 + or shape_dimensions[:-1] != aos_dimensions + or self.group.dimensions[shape_dimensions[-1]].size != metadata.ndim + ): + expected_dims = aos_dimensions + (f"{metadata.ndim}D",) + raise variable_error( + shape_var, "dimensions", shape_dimensions, expected_dims + ) + + # Data type + if shape_var.dtype.kind not in "ui": # should be (un)signed integer + raise variable_error( + shape_var, "dtype", shape_var.dtype, "any integer type" + ) + def nc2ids(group: netCDF4.Group, ids: IDSToplevel): """Get data from the netCDF group and store it in the provided IDS.""" diff --git a/imaspy/test/test_nc_validation.py b/imaspy/test/test_nc_validation.py index d3bf8c09..f7cc029f 100644 --- a/imaspy/test/test_nc_validation.py +++ b/imaspy/test/test_nc_validation.py @@ -1,4 +1,5 @@ import netCDF4 +import numpy as np import pytest from imaspy.backends.netcdf.ids2nc import IDS2NC @@ -24,9 +25,10 @@ def memfile_with_ids(memfile, factory): ids = factory.core_profiles() ids.ids_properties.homogeneous_time = IDS_TIME_MODE_HOMOGENEOUS ids.time = [1.0, 2.0, 3.0] - ids.profiles_1d.resize(2) - for i in range(2): + ids.profiles_1d.resize(3) + for i in range(3): ids.profiles_1d[i].grid.rho_tor_norm = [0.0, 0.2, 0.4, 0.6, 0.8, 1.0] + ids.profiles_1d[0].zeff = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0] IDS2NC(ids, memfile).run() # This one is valid: NC2IDS(memfile, factory.core_profiles()).run() @@ -98,4 +100,47 @@ def test_extra_attributes(memfile_with_ids, factory): NC2IDS(memfile_with_ids, factory.core_profiles()).run() -# TODO: tests for sparsity information +def test_shape_array_without_data(memfile_with_ids, factory): + memfile_with_ids.createVariable("profiles_1d.t_i_average:shape", int, ()) + with pytest.raises(InvalidNetCDFEntry): + NC2IDS(memfile_with_ids, factory.core_profiles()).run() + + +def test_shape_array_without_sparse_data(memfile_with_ids, factory): + memfile_with_ids.createVariable("profiles_1d.grid.rho_tor_norm:shape", int, ()) + with pytest.raises(InvalidNetCDFEntry): + NC2IDS(memfile_with_ids, factory.core_profiles()).run() + + +def test_shape_array_with_invalid_dimensions(memfile_with_ids, factory): + cp = factory.core_profiles() + t_i_average_meta = cp.metadata["profiles_1d.t_i_average"] + t_i_average = memfile_with_ids.createVariable( + "profiles_1d.t_i_average", float, ("time", "profiles_1d.grid.rho_tor_norm:i") + ) + t_i_average.units = t_i_average_meta.units + t_i_average.documentation = t_i_average_meta.documentation + t_i_average.sparse = "Contents don't matter" + memfile_with_ids.createVariable( + "profiles_1d.t_i_average:shape", + np.int32, + ("time", "profiles_1d.grid.rho_tor_norm:i"), + ) + with pytest.raises(InvalidNetCDFEntry): + NC2IDS(memfile_with_ids, cp).run() + + +def test_shape_array_with_invalid_dtype(memfile_with_ids, factory): + cp = factory.core_profiles() + t_i_average_meta = cp.metadata["profiles_1d.t_i_average"] + t_i_average = memfile_with_ids.createVariable( + "profiles_1d.t_i_average", float, ("time", "profiles_1d.grid.rho_tor_norm:i") + ) + t_i_average.units = t_i_average_meta.units + t_i_average.documentation = t_i_average_meta.documentation + t_i_average.sparse = "Contents don't matter" + memfile_with_ids.createVariable( + "profiles_1d.t_i_average:shape", float, ("time", "1D") + ) + with pytest.raises(InvalidNetCDFEntry): + NC2IDS(memfile_with_ids, cp).run() From 54d78d6e08fd44343578bb0ae13c8404f7951de2 Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Tue, 19 Nov 2024 13:49:33 +0100 Subject: [PATCH 13/42] Add environment variable to disable netCDF file validation. --- docs/source/configuring.rst | 7 +++++++ imaspy/backends/netcdf/nc2ids.py | 9 +++++++++ 2 files changed, 16 insertions(+) diff --git a/docs/source/configuring.rst b/docs/source/configuring.rst index 07073faf..dae11b6f 100644 --- a/docs/source/configuring.rst +++ b/docs/source/configuring.rst @@ -29,6 +29,13 @@ This page provides an overview of available variables. you can use :external:py:meth:`logging.getLogger("imaspy").setLevel(...) ` to change the log level programmatically. + +``IMASPY_DISABLE_NC_VALIDATE`` + Disables validation of netCDF files when loading an IDS from an IMAS netCDF file. + + .. caution:: + Disabling the validation may lead to errors when reading data from an IMAS netCDF file. + ``IMAS_VERSION`` Sets :ref:`The default Data Dictionary version` to use. diff --git a/imaspy/backends/netcdf/nc2ids.py b/imaspy/backends/netcdf/nc2ids.py index 3666d49e..0a69f964 100644 --- a/imaspy/backends/netcdf/nc2ids.py +++ b/imaspy/backends/netcdf/nc2ids.py @@ -1,4 +1,5 @@ import logging +import os from typing import Iterator, List, Optional, Tuple import netCDF4 @@ -163,6 +164,14 @@ def run(self) -> None: def _validate_variables(self) -> None: """Validate that all variables in the netCDF Group exist and match the DD.""" + disable_validate = os.environ.get("IMASPY_DISABLE_NC_VALIDATE") + if disable_validate and disable_validate != "0": + logger.info( + "NetCDF file validation disabled: " + "This may lead to errors when reading data!" + ) + return # validation checks are disabled + for var_name in self.variables: if var_name.endswith(":shape"): # Check that there is a corresponding variable From 5ccae5dec8e4994748a42f0e7d970049504cda95 Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Tue, 19 Nov 2024 15:50:32 +0100 Subject: [PATCH 14/42] Eliminate nc2ids function --- imaspy/backends/netcdf/db_entry_nc.py | 6 +++--- imaspy/backends/netcdf/nc2ids.py | 14 -------------- 2 files changed, 3 insertions(+), 17 deletions(-) diff --git a/imaspy/backends/netcdf/db_entry_nc.py b/imaspy/backends/netcdf/db_entry_nc.py index ba7334fc..9a0bf9c9 100644 --- a/imaspy/backends/netcdf/db_entry_nc.py +++ b/imaspy/backends/netcdf/db_entry_nc.py @@ -5,7 +5,7 @@ from imaspy.backends.db_entry_impl import DBEntryImpl from imaspy.backends.netcdf.ids2nc import IDS2NC -from imaspy.backends.netcdf.nc2ids import nc2ids +from imaspy.backends.netcdf.nc2ids import NC2IDS from imaspy.exception import DataEntryException from imaspy.ids_convert import NBCPathMap, convert_ids from imaspy.ids_factory import IDSFactory @@ -98,13 +98,13 @@ def get( # Load data into the destination IDS if self._ds_factory.dd_version == destination._dd_version: - nc2ids(group, destination) + NC2IDS(group, destination).run() else: # FIXME: implement automatic conversion using nbc_map # As a work-around: do an explicit conversion, but automatic conversion # will also be needed to implement lazy loading. ids = self._ds_factory.new(ids_name) - nc2ids(group, ids) + NC2IDS(group, ids).run() convert_ids(ids, None, target=destination) return destination diff --git a/imaspy/backends/netcdf/nc2ids.py b/imaspy/backends/netcdf/nc2ids.py index 0a69f964..b74b4676 100644 --- a/imaspy/backends/netcdf/nc2ids.py +++ b/imaspy/backends/netcdf/nc2ids.py @@ -300,17 +300,3 @@ def _validate_sparsity( raise variable_error( shape_var, "dtype", shape_var.dtype, "any integer type" ) - - -def nc2ids(group: netCDF4.Group, ids: IDSToplevel): - """Get data from the netCDF group and store it in the provided IDS.""" - try: - NC2IDS(group, ids).run() - except Exception as exc: - raise RuntimeError( - "An error occurred while reading data from the netCDF file " - f"'{group.filepath()}'. The netCDF functionality is currently in " - "preview status. Unexpected data in an otherwise valid netCDF file " - "may cause errors in IMASPy. A more robust mechanism to load IDS data from " - "netCDF files will be included in the next release of IMASPy." - ) from exc From afb8c292c6527f03354ac35fa43dfaebd8fb4ef3 Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Wed, 20 Nov 2024 10:50:59 +0100 Subject: [PATCH 15/42] Update `get_sample` docstring to clarify that the interpolation mode has no effect on the `ids.time` vector. See also https://git.iter.org/projects/IMAS/repos/al-matlab/pull-requests/29/overview?commentId=48957 --- imaspy/db_entry.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/imaspy/db_entry.py b/imaspy/db_entry.py index cb948fea..3834655d 100644 --- a/imaspy/db_entry.py +++ b/imaspy/db_entry.py @@ -459,7 +459,9 @@ def get_sample( :param:`dtime` must be a number or a numpy array of size 1. This mode will generate an IDS with a homogeneous time vector ``[tmin, tmin - + dtime, tmin + 2*dtime, ...`` up to ``tmax``. The returned IDS always has + + dtime, tmin + 2*dtime, ...`` up to ``tmax``. The chosen interpolation + method will have no effect on the time vector, but may have an impact on the + other dynamic values. The returned IDS always has ``ids_properties.homogeneous_time = 1``. 3. Interpolate dynamic data on an explicit time base. This method is selected @@ -468,7 +470,9 @@ def get_sample( This mode will generate an IDS with a homogeneous time vector equal to :param:`dtime`. :param:`tmin` and :param:`tmax` are ignored in this mode. - The returned IDS always has ``ids_properties.homogeneous_time = 1``. + The chosen interpolation method will have no effect on the time vector, but + may have an impact on the other dynamic values. The returned IDS always has + ``ids_properties.homogeneous_time = 1``. Args: ids_name: Name of the IDS to read from the backend From 6cd1e44f2ca06d9746a6e0722cf216da2e6fed62 Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Fri, 22 Nov 2024 15:05:37 +0100 Subject: [PATCH 16/42] Add `validate_nc` command to imaspy CLI Also fix a couple of bugs when opening invalid netCDF files and raise a proper exception. --- imaspy/backends/netcdf/db_entry_nc.py | 8 ++-- imaspy/backends/netcdf/nc2ids.py | 4 +- imaspy/backends/netcdf/nc_validate.py | 53 +++++++++++++++++++++++++++ imaspy/command/cli.py | 15 ++++++++ 4 files changed, 75 insertions(+), 5 deletions(-) create mode 100644 imaspy/backends/netcdf/nc_validate.py diff --git a/imaspy/backends/netcdf/db_entry_nc.py b/imaspy/backends/netcdf/db_entry_nc.py index 9a0bf9c9..3725c5a9 100644 --- a/imaspy/backends/netcdf/db_entry_nc.py +++ b/imaspy/backends/netcdf/db_entry_nc.py @@ -6,7 +6,7 @@ from imaspy.backends.db_entry_impl import DBEntryImpl from imaspy.backends.netcdf.ids2nc import IDS2NC from imaspy.backends.netcdf.nc2ids import NC2IDS -from imaspy.exception import DataEntryException +from imaspy.exception import DataEntryException, InvalidNetCDFEntry from imaspy.ids_convert import NBCPathMap, convert_ids from imaspy.ids_factory import IDSFactory from imaspy.ids_toplevel import IDSToplevel @@ -45,14 +45,16 @@ def __init__(self, fname: str, mode: str, factory: IDSFactory) -> None: # Check if there is already data in this dataset: if self._dataset.dimensions or self._dataset.variables or self._dataset.groups: if "data_dictionary_version" not in self._dataset.ncattrs(): - raise RuntimeError( + raise InvalidNetCDFEntry( "Invalid netCDF file: `data_dictionary_version` missing" ) dataset_dd_version = self._dataset.data_dictionary_version if dataset_dd_version != factory.dd_version: self._ds_factory = IDSFactory(dataset_dd_version) - # TODO: [validate] that the data contained in this file adheres to the DD + elif mode not in ["w", "r+", "a"]: + # Reading an empty file... + raise InvalidNetCDFEntry(f"Invalid netCDF file: `{fname}` is empty.") else: # This is an empty netCDF dataset: set global attributes self._dataset.Conventions = "IMAS" diff --git a/imaspy/backends/netcdf/nc2ids.py b/imaspy/backends/netcdf/nc2ids.py index b74b4676..50905ba8 100644 --- a/imaspy/backends/netcdf/nc2ids.py +++ b/imaspy/backends/netcdf/nc2ids.py @@ -110,7 +110,7 @@ def __init__(self, group: netCDF4.Group, ids: IDSToplevel) -> None: def run(self) -> None: """Load the data from the netCDF group into the IDS.""" self.variables.sort() - self._validate_variables() + self.validate_variables() for var_name in self.variables: if var_name.endswith(":shape"): continue @@ -162,7 +162,7 @@ def run(self) -> None: for index, node in tree_iter(self.ids, metadata): node.value = data[index] - def _validate_variables(self) -> None: + def validate_variables(self) -> None: """Validate that all variables in the netCDF Group exist and match the DD.""" disable_validate = os.environ.get("IMASPY_DISABLE_NC_VALIDATE") if disable_validate and disable_validate != "0": diff --git a/imaspy/backends/netcdf/nc_validate.py b/imaspy/backends/netcdf/nc_validate.py new file mode 100644 index 00000000..7b6a1eac --- /dev/null +++ b/imaspy/backends/netcdf/nc_validate.py @@ -0,0 +1,53 @@ +from imaspy.backends.netcdf.db_entry_nc import NCDBEntryImpl +from imaspy.backends.netcdf.nc2ids import NC2IDS +from imaspy.db_entry import DBEntry +from imaspy.exception import InvalidNetCDFEntry + + +def validate_netcdf_file(filename: str) -> None: + """Validate if the provided netCDF file adheres to the IMAS conventions.""" + if not filename.endswith(".nc"): + raise InvalidNetCDFEntry( + f"Invalid filename `{filename}` provided: " + "an IMAS netCDF file should end with `.nc`" + ) + + entry = DBEntry(filename, "r") + entry_impl: NCDBEntryImpl = entry._dbe_impl + dataset = entry_impl._dataset + factory = entry_impl._ds_factory + + ids_names = factory.ids_names() + + # Check that groups in the dataset correspond to an IDS/occurrence and no additional + # variables are smuggled inside: + groups = [dataset] + [dataset[group] for group in dataset.groups] + for group in groups: + if group.variables or group.dimensions: + raise InvalidNetCDFEntry( + "NetCDF file should not have variables or dimensions in the " + f"{group.name} group." + ) + if group is dataset: + continue + if group.name not in ids_names: + raise InvalidNetCDFEntry( + f"Invalid group name {group.name}: there is no IDS with this name." + ) + for subgroup in group.groups: + try: + int(subgroup) + except ValueError: + raise InvalidNetCDFEntry( + f"Invalid group name {group.name}/{subgroup}: " + f"{subgroup} is not a valid occurrence number." + ) + + for ids_name in ids_names: + for occurrence in entry.list_all_occurrences(ids_name): + group = dataset[f"{ids_name}/{occurrence}"] + try: + NC2IDS(group, factory.new(ids_name)).validate_variables() + except InvalidNetCDFEntry as exc: + occ = f":{occurrence}" if occurrence else "" + raise InvalidNetCDFEntry(f"Invalid IDS {ids_name}{occ}: {exc}") diff --git a/imaspy/command/cli.py b/imaspy/command/cli.py index 246922ce..f894f02d 100644 --- a/imaspy/command/cli.py +++ b/imaspy/command/cli.py @@ -218,5 +218,20 @@ def convert_ids( console.Console().print(timer.get_table("Time required per IDS")) +@cli.command("validate_nc", no_args_is_help=True) +@click.argument("filename", type=click.Path(exists=True, dir_okay=False)) +def validate_nc(filename): + """Validate if the provided netCDF file adheres to the IMAS conventions.""" + from imaspy.backends.netcdf.nc_validate import validate_netcdf_file + + try: + validate_netcdf_file(filename) + except Exception as exc: + click.echo(f"File `{filename}` does not adhere to the IMAS conventions:") + click.echo(exc) + sys.exit(1) + click.echo(f"File `{filename}` is a valid IMAS netCDF file.") + + if __name__ == "__main__": cli() From 6ec21c71dbda710bb9230525e2b2bb830b645d6b Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Mon, 25 Nov 2024 09:50:07 +0100 Subject: [PATCH 17/42] Fix incorrect exception when using mode="x" for netCDF files --- imaspy/backends/netcdf/db_entry_nc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/imaspy/backends/netcdf/db_entry_nc.py b/imaspy/backends/netcdf/db_entry_nc.py index 3725c5a9..da239745 100644 --- a/imaspy/backends/netcdf/db_entry_nc.py +++ b/imaspy/backends/netcdf/db_entry_nc.py @@ -52,7 +52,7 @@ def __init__(self, fname: str, mode: str, factory: IDSFactory) -> None: if dataset_dd_version != factory.dd_version: self._ds_factory = IDSFactory(dataset_dd_version) - elif mode not in ["w", "r+", "a"]: + elif mode not in ["w", "x", "r+", "a"]: # Reading an empty file... raise InvalidNetCDFEntry(f"Invalid netCDF file: `{fname}` is empty.") else: From 1f6c6fe9730dc063443408cca81fc5416d34a184 Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Mon, 25 Nov 2024 15:06:38 +0100 Subject: [PATCH 18/42] Close netCDF datasets when an exception is raised --- imaspy/backends/netcdf/db_entry_nc.py | 10 +++- imaspy/backends/netcdf/nc_validate.py | 68 +++++++++++++-------------- 2 files changed, 43 insertions(+), 35 deletions(-) diff --git a/imaspy/backends/netcdf/db_entry_nc.py b/imaspy/backends/netcdf/db_entry_nc.py index da239745..a66154f1 100644 --- a/imaspy/backends/netcdf/db_entry_nc.py +++ b/imaspy/backends/netcdf/db_entry_nc.py @@ -39,9 +39,17 @@ def __init__(self, fname: str, mode: str, factory: IDSFactory) -> None: """NetCDF4 dataset.""" self._factory = factory """Factory (DD version) that the user wishes to use.""" - self._ds_factory = factory # Overwritten if data exists, see below + self._ds_factory = factory # Overwritten if data exists, see _init_dd_version """Factory (DD version) that the data is stored in.""" + try: + self._init_dd_version(fname, mode, factory) + except Exception: + self._dataset.close() + raise + + def _init_dd_version(self, fname: str, mode: str, factory: IDSFactory) -> None: + """Check or setup data dictionary version.""" # Check if there is already data in this dataset: if self._dataset.dimensions or self._dataset.variables or self._dataset.groups: if "data_dictionary_version" not in self._dataset.ncattrs(): diff --git a/imaspy/backends/netcdf/nc_validate.py b/imaspy/backends/netcdf/nc_validate.py index 7b6a1eac..49a14283 100644 --- a/imaspy/backends/netcdf/nc_validate.py +++ b/imaspy/backends/netcdf/nc_validate.py @@ -12,42 +12,42 @@ def validate_netcdf_file(filename: str) -> None: "an IMAS netCDF file should end with `.nc`" ) - entry = DBEntry(filename, "r") - entry_impl: NCDBEntryImpl = entry._dbe_impl - dataset = entry_impl._dataset - factory = entry_impl._ds_factory + with DBEntry(filename, "r") as entry: + entry_impl: NCDBEntryImpl = entry._dbe_impl + dataset = entry_impl._dataset + factory = entry_impl._ds_factory - ids_names = factory.ids_names() + ids_names = factory.ids_names() - # Check that groups in the dataset correspond to an IDS/occurrence and no additional - # variables are smuggled inside: - groups = [dataset] + [dataset[group] for group in dataset.groups] - for group in groups: - if group.variables or group.dimensions: - raise InvalidNetCDFEntry( - "NetCDF file should not have variables or dimensions in the " - f"{group.name} group." - ) - if group is dataset: - continue - if group.name not in ids_names: - raise InvalidNetCDFEntry( - f"Invalid group name {group.name}: there is no IDS with this name." - ) - for subgroup in group.groups: - try: - int(subgroup) - except ValueError: + # Check that groups in the dataset correspond to an IDS/occurrence and no + # additional variables are smuggled inside: + groups = [dataset] + [dataset[group] for group in dataset.groups] + for group in groups: + if group.variables or group.dimensions: raise InvalidNetCDFEntry( - f"Invalid group name {group.name}/{subgroup}: " - f"{subgroup} is not a valid occurrence number." + "NetCDF file should not have variables or dimensions in the " + f"{group.name} group." ) + if group is dataset: + continue + if group.name not in ids_names: + raise InvalidNetCDFEntry( + f"Invalid group name {group.name}: there is no IDS with this name." + ) + for subgroup in group.groups: + try: + int(subgroup) + except ValueError: + raise InvalidNetCDFEntry( + f"Invalid group name {group.name}/{subgroup}: " + f"{subgroup} is not a valid occurrence number." + ) - for ids_name in ids_names: - for occurrence in entry.list_all_occurrences(ids_name): - group = dataset[f"{ids_name}/{occurrence}"] - try: - NC2IDS(group, factory.new(ids_name)).validate_variables() - except InvalidNetCDFEntry as exc: - occ = f":{occurrence}" if occurrence else "" - raise InvalidNetCDFEntry(f"Invalid IDS {ids_name}{occ}: {exc}") + for ids_name in ids_names: + for occurrence in entry.list_all_occurrences(ids_name): + group = dataset[f"{ids_name}/{occurrence}"] + try: + NC2IDS(group, factory.new(ids_name)).validate_variables() + except InvalidNetCDFEntry as exc: + occ = f":{occurrence}" if occurrence else "" + raise InvalidNetCDFEntry(f"Invalid IDS {ids_name}{occ}: {exc}") From b7d89635a270f623b0af2583c595b37c8a64420a Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Mon, 25 Nov 2024 15:06:54 +0100 Subject: [PATCH 19/42] Add unit tests for `nc_validate.py` --- imaspy/test/test_nc_validation.py | 53 ++++++++++++++++++++++++++++++- 1 file changed, 52 insertions(+), 1 deletion(-) diff --git a/imaspy/test/test_nc_validation.py b/imaspy/test/test_nc_validation.py index f7cc029f..efd25420 100644 --- a/imaspy/test/test_nc_validation.py +++ b/imaspy/test/test_nc_validation.py @@ -4,7 +4,8 @@ from imaspy.backends.netcdf.ids2nc import IDS2NC from imaspy.backends.netcdf.nc2ids import NC2IDS -from imaspy.exception import InvalidNetCDFEntry +from imaspy.backends.netcdf.nc_validate import validate_netcdf_file +from imaspy.exception import InvalidNetCDFEntry, UnknownDDVersion from imaspy.ids_defs import IDS_TIME_MODE_HOMOGENEOUS from imaspy.ids_factory import IDSFactory @@ -144,3 +145,53 @@ def test_shape_array_with_invalid_dtype(memfile_with_ids, factory): ) with pytest.raises(InvalidNetCDFEntry): NC2IDS(memfile_with_ids, cp).run() + + +def test_validate_nc(tmpdir): + fname = str(tmpdir / "test.nc") + + # Wrong extension + with pytest.raises(InvalidNetCDFEntry): + validate_netcdf_file("test.h5") # invalid extension + + # Empty file + netCDF4.Dataset(fname, "w").close() + with pytest.raises(InvalidNetCDFEntry): + validate_netcdf_file(fname) + + # Invalid DD version + with netCDF4.Dataset(fname, "w") as dataset: + dataset.data_dictionary_version = "invalid" + dataset.createGroup("core_profiles") + with pytest.raises(UnknownDDVersion): + validate_netcdf_file(fname) + + # Invalid group + with netCDF4.Dataset(fname, "w") as dataset: + dataset.data_dictionary_version = "4.0.0" + dataset.createGroup("X") + with pytest.raises(InvalidNetCDFEntry): + validate_netcdf_file(fname) + + # Invalid occurrence + with netCDF4.Dataset(fname, "w") as dataset: + dataset.data_dictionary_version = "4.0.0" + dataset.createGroup("core_profiles/a") + with pytest.raises(InvalidNetCDFEntry): + validate_netcdf_file(fname) + + # Invalid variable in root group + with netCDF4.Dataset(fname, "w") as dataset: + dataset.data_dictionary_version = "4.0.0" + dataset.createVariable("core_profiles", int, ()) + with pytest.raises(InvalidNetCDFEntry): + validate_netcdf_file(fname) + + # Missing ids_properties.homogeneous_time + with netCDF4.Dataset(fname, "w") as dataset: + dataset.data_dictionary_version = "4.0.0" + dataset.createGroup("core_profiles/1") + with pytest.raises(InvalidNetCDFEntry): + validate_netcdf_file(fname) + + # All other validations are handled by NC2IDS and tested above From fc2cbf20b3cf1c549761214109d3d89b4ee34091 Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Wed, 27 Nov 2024 16:31:59 +0100 Subject: [PATCH 20/42] Additional documentation for the `imaspy validate_nc` command line tool --- docs/source/netcdf.rst | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/docs/source/netcdf.rst b/docs/source/netcdf.rst index dd3bf431..7a7593e6 100644 --- a/docs/source/netcdf.rst +++ b/docs/source/netcdf.rst @@ -102,3 +102,11 @@ your directory. Let's open this file with ``xarray.load_dataset``: Attributes: Conventions: IMAS data_dictionary_version: 3.41.0 + + +Validating an IMAS netCDF file +------------------------------ + +IMAS netCDF files can be validated with IMASPy through the command line ``imaspy +validate_nc ``. See also :ref:`IMASPy Command Line tool` or type +``imaspy validate_nc --help`` in a command line. From 7c56b5f0713e7083fc3887884af3bf5dc2852f78 Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Wed, 4 Dec 2024 15:35:31 +0100 Subject: [PATCH 21/42] Explicitly set `IDSDEF_PATH` when opening a DBEntry with the UDA backend --- imaspy/backends/imas_core/db_entry_al.py | 19 ++++++++ imaspy/backends/imas_core/uda_support.py | 56 ++++++++++++++++++++++++ imaspy/test/test_uda_support.py | 12 +++++ 3 files changed, 87 insertions(+) create mode 100644 imaspy/backends/imas_core/uda_support.py create mode 100644 imaspy/test/test_uda_support.py diff --git a/imaspy/backends/imas_core/db_entry_al.py b/imaspy/backends/imas_core/db_entry_al.py index a90e4d6a..34a3ab32 100644 --- a/imaspy/backends/imas_core/db_entry_al.py +++ b/imaspy/backends/imas_core/db_entry_al.py @@ -41,6 +41,7 @@ from .db_entry_helpers import delete_children, get_children, put_children from .imas_interface import LLInterfaceError, has_imas, ll_interface from .mdsplus_model import ensure_data_dir, mdsplus_model_dir +from .uda_support import extract_idsdef, get_dd_version_from_idsdef_xml _BACKEND_NAME = { ASCII_BACKEND: "ascii", @@ -186,6 +187,24 @@ def _setup_backend( pass # nothing to set up elif backend == "uda": + # Set IDSDEF_PATH to point the UDA backend to the selected DD version + idsdef_path = None + + if factory._xml_path is not None: + # Factory was constructed with an explicit XML path, point UDA to that: + idsdef_path = factory._xml_path + + elif "IMAS_PREFIX" in os.environ: + # Check if UDA can use the IDSDef.xml stored in $IMAS_PREFIX/include/ + idsdef_path = os.environ["IMAS_PREFIX"] + "/include/IDSDef.xml" + if get_dd_version_from_idsdef_xml(idsdef_path) != factory.version: + idsdef_path = None + + if idsdef_path is None: + # Extract XML from the DD zip and point UDA to it + idsdef_path = extract_idsdef(factory.version) + + os.environ["IDSDEF_PATH"] = idsdef_path logger.warning( "The UDA backend is not tested with IMASPy and may not work properly. " "Please raise any issues you find." diff --git a/imaspy/backends/imas_core/uda_support.py b/imaspy/backends/imas_core/uda_support.py new file mode 100644 index 00000000..8b599faa --- /dev/null +++ b/imaspy/backends/imas_core/uda_support.py @@ -0,0 +1,56 @@ +import logging +from pathlib import Path +from typing import Union +from xml.etree import ElementTree as ET + +from imaspy import dd_zip + +from .mdsplus_model import _get_xdg_cache_dir + +logger = logging.getLogger(__name__) + + +def get_dd_version_from_idsdef_xml(path: Union[str, Path]) -> str: + """Parse the IDSDef.xml up to the point where the Data Dictionary version is set. + + Returns: + The Data Dictionary version for the provided file, or None if the file cannot be + parsed / contains no Data Dictionary version. + """ + try: + for _, elem in ET.iterparse(path): + if elem.tag == "version": + return elem.text + except OSError: + pass # File not found, etc. + except Exception: + logger.warning("Could not read DD version from file '%s'.", path, exc_info=True) + return None + + +def extract_idsdef(dd_version: str) -> str: + """Extract the IDSDef.xml for the given version and return its path. + + The IDSDef.xml is extracted to the imaspy cache folder: + + - If the file imaspy/uda/.xml already exists, we assume it is correct + """ + cache_dir_path = Path(_get_xdg_cache_dir()) / "imaspy" / "uda" + cache_dir_path.mkdir(parents=True, exist_ok=True) # ensure cache folder exists + idsdef_path = cache_dir_path / (dd_version + ".xml") + + if idsdef_path.exists(): + extract = False + # Check if the file is fine + if get_dd_version_from_idsdef_xml(idsdef_path) != dd_version: + # File is corrupt, I guess? We'll overwrite: + extract = True + else: + extract = True + + if extract: + # Extract XML from the dd_zip and store + data = dd_zip.get_dd_xml(dd_version) + idsdef_path.write_bytes(data) + + return str(idsdef_path) diff --git a/imaspy/test/test_uda_support.py b/imaspy/test/test_uda_support.py new file mode 100644 index 00000000..f623219a --- /dev/null +++ b/imaspy/test/test_uda_support.py @@ -0,0 +1,12 @@ +from pathlib import Path +from zlib import crc32 + +from imaspy import dd_zip +from imaspy.backends.imas_core.uda_support import extract_idsdef + + +def test_extract_idsdef(): + fname = extract_idsdef("4.0.0") + expected_crc = dd_zip.get_dd_xml_crc("4.0.0") + actual_crc = crc32(Path(fname).read_bytes()) + assert expected_crc == actual_crc From 74d2e3afe41a3fb907bc0a3729fcea29590aa4ca Mon Sep 17 00:00:00 2001 From: Olivier Hoenen Date: Mon, 9 Dec 2024 15:14:44 +0100 Subject: [PATCH 22/42] Make prepare_data_dictionaries compatible with DD>4.0.0 (change in schemas layout) --- imaspy/dd_helpers.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/imaspy/dd_helpers.py b/imaspy/dd_helpers.py index 0506482f..21a7775f 100644 --- a/imaspy/dd_helpers.py +++ b/imaspy/dd_helpers.py @@ -58,9 +58,14 @@ def prepare_data_dictionaries(): dd_zip.write(filename, arcname=arcname) # Include identifiers from latest tag in zip file repo.git.checkout(newest_version_and_tag[1], force=True) + # DD layout <= 4.0.0 for filename in Path("data-dictionary").glob("*/*identifier.xml"): arcname = Path("identifiers").joinpath(*filename.parts[1:]) dd_zip.write(filename, arcname=arcname) + # DD layout > 4.0.0 + for filename in Path("data-dictionary").glob("schemas/*/*identifier.xml"): + arcname = Path("identifiers").joinpath(*filename.parts[2:]) + dd_zip.write(filename, arcname=arcname) # pre 3.30.0 versions of the DD have the `saxon9he.jar` file path hardcoded From 77fb044a48b709ddaaef9091b1101484526338cd Mon Sep 17 00:00:00 2001 From: Olivier Hoenen Date: Wed, 11 Dec 2024 17:03:11 +0100 Subject: [PATCH 23/42] Updating the license and readme --- LICENSE.md | 46 --------------- LICENSE.txt | 165 ++++++++++++++++++++++++++++++++++++++++++++++++++++ README.md | 121 ++++++-------------------------------- 3 files changed, 182 insertions(+), 150 deletions(-) delete mode 100644 LICENSE.md create mode 100644 LICENSE.txt diff --git a/LICENSE.md b/LICENSE.md deleted file mode 100644 index ea4a5d46..00000000 --- a/LICENSE.md +++ /dev/null @@ -1,46 +0,0 @@ -Copyright (c) 2020-2023 ITER Organization, Route de Vinon-sur-Verdon, CS 90 046, - 13067 St-Paul-lez-Durance Cedex, France - -Copyright (c) 2020-2023 Karel Lucas van de Plassche - -Copyright (c) 2020 Dutch Institute for Fundamental Energy Research - -Copyright (c) 2020-2022 Daan van Vugt - -All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, -are permitted provided that the following conditions are met: - -* Use and redistribution, for peaceful purposes only, are granted solely to the - ITER Members (the People's Republic of China, the European Atomic Energy - Community, the Republic of India, Japan, the Republic of Korea, the Russian - Federation, and the United States of America), with the right to sub-license - within their territory for the purpose of fusion research and development. - Organizations, bodies or individuals of non-ITER Members shall seek specific - written permission from the ITER Organization before use or redistribution of - this software. - -* All modifications/derivatives shall be made available to the ITER Organization. - -* Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright notice, this - list of conditions and the following disclaimer in the documentation and/or - other materials provided with the distribution. - -* Neither the name of the ITER Organization nor the names of its contributors - may be used to endorse or promote products derived from this software without - specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE ITER ORGANIZATION OR ITS CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/LICENSE.txt b/LICENSE.txt new file mode 100644 index 00000000..33bb3680 --- /dev/null +++ b/LICENSE.txt @@ -0,0 +1,165 @@ + GNU LESSER GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + + This version of the GNU Lesser General Public License incorporates +the terms and conditions of version 3 of the GNU General Public +License, supplemented by the additional permissions listed below. + + 0. Additional Definitions. + + As used herein, "this License" refers to version 3 of the GNU Lesser +General Public License, and the "GNU GPL" refers to version 3 of the GNU +General Public License. + + "The Library" refers to a covered work governed by this License, +other than an Application or a Combined Work as defined below. + + An "Application" is any work that makes use of an interface provided +by the Library, but which is not otherwise based on the Library. +Defining a subclass of a class defined by the Library is deemed a mode +of using an interface provided by the Library. + + A "Combined Work" is a work produced by combining or linking an +Application with the Library. The particular version of the Library +with which the Combined Work was made is also called the "Linked +Version". + + The "Minimal Corresponding Source" for a Combined Work means the +Corresponding Source for the Combined Work, excluding any source code +for portions of the Combined Work that, considered in isolation, are +based on the Application, and not on the Linked Version. + + The "Corresponding Application Code" for a Combined Work means the +object code and/or source code for the Application, including any data +and utility programs needed for reproducing the Combined Work from the +Application, but excluding the System Libraries of the Combined Work. + + 1. Exception to Section 3 of the GNU GPL. + + You may convey a covered work under sections 3 and 4 of this License +without being bound by section 3 of the GNU GPL. + + 2. Conveying Modified Versions. + + If you modify a copy of the Library, and, in your modifications, a +facility refers to a function or data to be supplied by an Application +that uses the facility (other than as an argument passed when the +facility is invoked), then you may convey a copy of the modified +version: + + a) under this License, provided that you make a good faith effort to + ensure that, in the event an Application does not supply the + function or data, the facility still operates, and performs + whatever part of its purpose remains meaningful, or + + b) under the GNU GPL, with none of the additional permissions of + this License applicable to that copy. + + 3. Object Code Incorporating Material from Library Header Files. + + The object code form of an Application may incorporate material from +a header file that is part of the Library. You may convey such object +code under terms of your choice, provided that, if the incorporated +material is not limited to numerical parameters, data structure +layouts and accessors, or small macros, inline functions and templates +(ten or fewer lines in length), you do both of the following: + + a) Give prominent notice with each copy of the object code that the + Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the object code with a copy of the GNU GPL and this license + document. + + 4. Combined Works. + + You may convey a Combined Work under terms of your choice that, +taken together, effectively do not restrict modification of the +portions of the Library contained in the Combined Work and reverse +engineering for debugging such modifications, if you also do each of +the following: + + a) Give prominent notice with each copy of the Combined Work that + the Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the Combined Work with a copy of the GNU GPL and this license + document. + + c) For a Combined Work that displays copyright notices during + execution, include the copyright notice for the Library among + these notices, as well as a reference directing the user to the + copies of the GNU GPL and this license document. + + d) Do one of the following: + + 0) Convey the Minimal Corresponding Source under the terms of this + License, and the Corresponding Application Code in a form + suitable for, and under terms that permit, the user to + recombine or relink the Application with a modified version of + the Linked Version to produce a modified Combined Work, in the + manner specified by section 6 of the GNU GPL for conveying + Corresponding Source. + + 1) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (a) uses at run time + a copy of the Library already present on the user's computer + system, and (b) will operate properly with a modified version + of the Library that is interface-compatible with the Linked + Version. + + e) Provide Installation Information, but only if you would otherwise + be required to provide such information under section 6 of the + GNU GPL, and only to the extent that such information is + necessary to install and execute a modified version of the + Combined Work produced by recombining or relinking the + Application with a modified version of the Linked Version. (If + you use option 4d0, the Installation Information must accompany + the Minimal Corresponding Source and Corresponding Application + Code. If you use option 4d1, you must provide the Installation + Information in the manner specified by section 6 of the GNU GPL + for conveying Corresponding Source.) + + 5. Combined Libraries. + + You may place library facilities that are a work based on the +Library side by side in a single library together with other library +facilities that are not Applications and are not covered by this +License, and convey such a combined library under terms of your +choice, if you do both of the following: + + a) Accompany the combined library with a copy of the same work based + on the Library, uncombined with any other library facilities, + conveyed under the terms of this License. + + b) Give prominent notice with the combined library that part of it + is a work based on the Library, and explaining where to find the + accompanying uncombined form of the same work. + + 6. Revised Versions of the GNU Lesser General Public License. + + The Free Software Foundation may publish revised and/or new versions +of the GNU Lesser General Public License from time to time. Such new +versions will be similar in spirit to the present version, but may +differ in detail to address new problems or concerns. + + Each version is given a distinguishing version number. If the +Library as you received it specifies that a certain numbered version +of the GNU Lesser General Public License "or any later version" +applies to it, you have the option of following the terms and +conditions either of that published version or of any later version +published by the Free Software Foundation. If the Library as you +received it does not specify a version number of the GNU Lesser +General Public License, you may choose any version of the GNU Lesser +General Public License ever published by the Free Software Foundation. + + If the Library as you received it specifies that a proxy can decide +whether future versions of the GNU Lesser General Public License shall +apply, that proxy's public statement of acceptance of any version is +permanent authorization for you to choose that version for the +Library. diff --git a/README.md b/README.md index ce753f5a..23e83fde 100644 --- a/README.md +++ b/README.md @@ -1,99 +1,24 @@ # IMASPy IMASPy is a pure-python library to handle arbitrarily nested data structures. -IMASPy is designed for, but not necessarily bound to, interacting with -Interface Data Structures (IDSs) as defined by the -Integrated Modelling & Analysis Suite (IMAS) Data Model. +IMASPy is designed for, but not necessarily bound to, interacting with Interface +Data Structures (IDSs) as defined by the Integrated Modelling & Analysis Suite (IMAS) +Data Model. -It provides: -* An easy-to-install and easy-to-get started package by - * Not requiring an IMAS installation - * Not strictly requiring matching a Data Dictionary (DD) version -* An pythonic alternative to the IMAS Python High Level Interface (HLI) -* Checking of correctness on assign time, instead of database write time -* Dynamically created in-memory pre-filled data trees from DD XML specifications +## Install -This package is developed on [ITER bitbucket](https://git.iter.org/projects/IMAS/repos/imaspy). -For user support, contact the IMAS team on the [IMAS user slack](https://imasusers.slack.com), -open a [JIRA issue](https://jira.iter.org/projects/IMAS), or email the -support team on . +Install steps are described in the documentation generated from `/docs/source/installing.rst`. -## Installation - -### On ITER system, EuroFusion gateway - -There is a `module` available on ITER and the Gateway, so you can run - -```bash -module load IMASPy -``` - -IMASPy can work with either Access Layer versions 4 or 5 (the used version is -automatically detected when importing the `imaspy` module). IMASPy still works (with -limited functionality) when no IMAS module is loaded. - -### Local - -We recommend using a `venv`: - -```bash -python3 -m venv ./venv -. venv/bin/activate -``` - -Then clone this repository, and run `pip install`: - -```bash -git clone ssh://git@git.iter.org/imas/imaspy.git -cd imaspy -pip install . -# Optional: also install `imas-core` with the HDF5 backend in the venv: -pip install .[imas-core] -``` - -If you get strange errors you might want to upgrade your `setuptools` and `pip`. -(you might want to add the `--user` flag to your pip installs when not in a `venv`) - -### Development installation - -For development an installation in editable mode may be more convenient, and -you will need some extra dependencies to run the test suite and build -documentation. - -```bash -pip install -e .[test,docs] -``` +Documentation is autogenerated from the source using [Sphinx](http://sphinx-doc.org/) +and can be found at the [ITER sharepoint](https://sharepoint.iter.org/departments/POP/CM/IMDesign/Code%20Documentation/IMASPy-doc/index.html) -Test your installation by trying +The documentation can be manually generated by installing sphinx and running: ```bash -cd ~ -python -c "import imaspy; print(imaspy.__version__)" +make -C docs html ``` -which should return your just installed version number. - -### Installation without ITER access - -The installation script tries to access the [ITER IMAS Core Data Dictionary repository](https://git.iter.org/projects/IMAS/repos/data-dictionary/browse) -to fetch the latest versions. If you do not have git+ssh access there, you can -try to find this repository elsewhere, and do a `git fetch --tags`. - -Alternatively you could try to obtain an `IDSDef.zip` and place it in `~/.config/imaspy/`. - -Test your installation by trying - -```bash -python -c "import imaspy; factory = imaspy.IDSFactory()" -``` -If the following error is raised: -```bash -RuntimeError: Could not find any data dictionary definitions. -``` -it means that the Data Dictionary definitions weren't created during the install. -You can generate these definitions by executing `build_DD` in the command line. -Missing packages can include among others: [GitPython](https://github.com/gitpython-developers/GitPython), and Java. ## How to use @@ -106,32 +31,20 @@ print(equilibrium) equilibrium.ids_properties.homogeneous_time = imaspy.ids_defs.IDS_TIME_MODE_HETEROGENEOUS equilibrium.ids_properties.comment = "testing" -dbentry = imaspy.DBEntry(imaspy.ids_defs.HDF5_BACKEND, "ITER", 1, 1) -dbentry.create() -dbentry.put(equilibrium) - -# TODO: find an example with a significant change between versions (rename?) -older_dbentry = imaspy.DBEntry(imaspy.ids_defs.HDF5_BACKEND, "ITER", 1, 1, version="3.35.0") -equilibrium2 = older_root.get("equilibrium") -print(equilibrium2.ids_properties.comment) +with imaspy.DBEntry("imas:hdf5?path=./testdb","w") as dbentry: + dbentry.put(equilibrium) ``` -## Documentation - -Documentation is autogenerated from the source using [Sphinx](http://sphinx-doc.org/) -and can be found at the [ITER sharepoint](https://sharepoint.iter.org/departments/POP/CM/IMDesign/Code%20Documentation/IMASPy-doc/index.html) +A quick 5 minutes introduction is available in the documentation generated from `/docs/sources/intro.rst`. -The documentation can be manually generated by installing sphinx and running: -```bash -make -C docs html -``` +## Legal -## Interacting with IMAS AL +IMASPy is Copyright 2020-2024 ITER Organization, Copyright 2020-2023 Karel Lucas van de +Plassche , Copyright 2020-2022 Daan van Vugt , +and Copyright 2020 Dutch Institute for Fundamental Energy Research . +It is licensed under [LGPL 3.0](LICENSE.txt). -Interaction with the IMAS AL is provided by a Cython interface to the Access Layer. -As Cython code, it needs to be compiled on your local system. -To find the headers, the Access Layer `include` folder needs to be in your `INCLUDE_PATH`. On most HPC systems, a `module load IMAS` is enough. ## Acknowledgments From d80778fe6cdadf50255a50905a00ed9e2b458c8c Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Thu, 12 Dec 2024 10:23:31 +0100 Subject: [PATCH 24/42] Replace references to LICENSE.md to LICENSE.txt --- docs/source/index.rst | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/index.rst b/docs/source/index.rst index 19e3985b..c5a3f24c 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -77,5 +77,5 @@ Manual LICENSE ------- -.. literalinclude:: ../../LICENSE.md +.. literalinclude:: ../../LICENSE.txt :language: text diff --git a/pyproject.toml b/pyproject.toml index 1c1ce2cc..dccd6912 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,7 +15,7 @@ authors = [ description = "Pythonic wrappers for the IMAS Access Layer" readme = {file = "README.md", content-type = "text/markdown"} requires-python = ">=3.7" -license = {file = "LICENSE.md"} +license = {file = "LICENSE.txt"} classifiers = [ "Development Status :: 3 - Alpha", "Environment :: Console", From cef46674cc1f032d9ae65dfe4507060493a43ddf Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Tue, 17 Dec 2024 13:57:11 +0100 Subject: [PATCH 25/42] Fix a bug with lazy loading Bug: IMASPy runs into an attribute error when lazy loading a child quantity that was added in a newer DD version than stored on disk. Example: 1. Equilibrium IDS stored in DD 3.33.0 2. Lazy loading IDS with DD 4.0.0 3. Try to access `eq.time_slice[0].boundary.dr_dz_zero_point.r` resulted in an AttributeError Root cause: IMASPy did not handle correctly that the `dr_dz_zero_point` was added between 3.33.0 and 4.0.0. This commit fixes the bug. --- imaspy/backends/imas_core/db_entry_helpers.py | 6 +++++- imaspy/test/test_lazy_loading.py | 16 ++++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/imaspy/backends/imas_core/db_entry_helpers.py b/imaspy/backends/imas_core/db_entry_helpers.py index de1d9323..f69eafd3 100644 --- a/imaspy/backends/imas_core/db_entry_helpers.py +++ b/imaspy/backends/imas_core/db_entry_helpers.py @@ -77,11 +77,15 @@ def get_children( getattr(structure, name)._IDSPrimitive__value = data -def _get_child(child: IDSBase, ctx: LazyALContext): +def _get_child(child: IDSBase, ctx: Optional[LazyALContext]): """Get a single child when required (lazy loading).""" # NOTE: changes in this method must be propagated to _get_children and vice versa # Performance: this method is specialized for the lazy get + # ctx can be None when the parent structure does not exist in the on-disk DD version + if ctx is None: + return # There is no data to be loaded + time_mode = ctx.time_mode if time_mode == IDS_TIME_MODE_INDEPENDENT and child.metadata.type.is_dynamic: return # skip dynamic (time-dependent) nodes diff --git a/imaspy/test/test_lazy_loading.py b/imaspy/test/test_lazy_loading.py index c0e54aad..1d34e2a1 100644 --- a/imaspy/test/test_lazy_loading.py +++ b/imaspy/test/test_lazy_loading.py @@ -165,6 +165,22 @@ def test_lazy_load_with_new_aos(requires_imas): dbentry.close() +def test_lazy_load_with_new_structure(requires_imas): + dbentry = DBEntry(MEMORY_BACKEND, "ITER", 1, 1, dd_version="3.30.0") + dbentry.create() + + eq = dbentry.factory.equilibrium() + eq.ids_properties.homogeneous_time = IDS_TIME_MODE_HOMOGENEOUS + eq.time = [0.0] + eq.time_slice.resize(1) + dbentry.put(eq) + + entry2 = DBEntry(MEMORY_BACKEND, "ITER", 1, 1, data_version="3", dd_version="4.0.0") + entry2.open() + lazy_eq = entry2.get("equilibrium", lazy=True) + assert not lazy_eq.time_slice[0].boundary.dr_dz_zero_point.r.has_value + + def test_lazy_load_multiple_ids(backend, worker_id, tmp_path): if backend == ASCII_BACKEND: pytest.skip("Lazy loading is not supported by the ASCII backend.") From 4beab9fcbcf590356b0d92b7b65894f907157962 Mon Sep 17 00:00:00 2001 From: Olivier Hoenen Date: Fri, 10 Jan 2025 18:03:27 +0100 Subject: [PATCH 26/42] Add contributing guidelines --- CODE_OF_CONDUCT.md | 72 ++++++++++++++++++++++++++++++++++++++++++++++ CONTRIBUTING.md | 45 +++++++++++++++++++++++++++++ 2 files changed, 117 insertions(+) create mode 100644 CODE_OF_CONDUCT.md create mode 100644 CONTRIBUTING.md diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 00000000..df8ba3bd --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,72 @@ +# Contributor Covenant Code of Conduct + +## Our Pledge + +In the interest of fostering an open and welcoming environment, we as +contributors and maintainers pledge to make participation in our project and +our community a harassment-free experience for everyone, regardless of age, body +size, disability, ethnicity, sex characteristics, gender identity and expression, +level of experience, education, socio-economic status, nationality, personal +appearance, race, religion, or sexual identity and orientation. + +## Our Standards + +Examples of behavior that contributes to creating a positive environment +include: + +* Using welcoming and inclusive language +* Being respectful of differing viewpoints and experiences +* Gracefully accepting constructive criticism +* Focusing on what is best for the community +* Showing empathy towards other community members + +Examples of unacceptable behavior by participants include: + +* The use of sexualized language or imagery and unwelcome sexual attention or + advances +* Trolling, insulting/derogatory comments, and personal or political attacks +* Public or private harassment +* Publishing others' private information, such as a physical or electronic + address, without explicit permission +* Other conduct which could reasonably be considered inappropriate in a + professional setting + +## Our Responsibilities + +Project maintainers are responsible for clarifying the standards of acceptable +behavior and are expected to take appropriate and fair corrective action in +response to any instances of unacceptable behavior. + +Project maintainers have the right and responsibility to remove, edit, or +reject comments, commits, code, wiki edits, issues, and other contributions +that are not aligned to this Code of Conduct, or to ban temporarily or +permanently any contributor for other behaviors that they deem inappropriate, +threatening, offensive, or harmful. + +## Scope + +This Code of Conduct applies within all project spaces, and it also applies when +an individual is representing the project or its community in public spaces. +Examples of representing a project or community include using an official +project e-mail address, posting via an official social media account, or acting +as an appointed representative at an online or offline event. Representation of +a project may be further defined and clarified by project maintainers. + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported by contacting the project team at . All +complaints will be reviewed and investigated and will result in a response that +is deemed necessary and appropriate to the circumstances. The project team is +obligated to maintain confidentiality with regard to the reporter of an incident. +Further details of specific enforcement policies may be posted separately. + +Project maintainers who do not follow or enforce the Code of Conduct in good +faith may face temporary or permanent repercussions as determined by other +members of the project's leadership. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, +available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html + diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 00000000..ac28e400 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,45 @@ +# Contributing guidelines + +We welcome any kind of contribution to `imas-python`, +from a simple comment, a question or even a full fledged pull +request. +Please first make sure you read and follow the +[Code of Conduct](CODE_OF_CONDUCT.md). + +## You think you found a bug in the code, or have a question in its use +1. use the [issue search](https://github.com/iterorganization/ +imas-python/issues) to check if someone already created +a similar issue; +2. if not, make a **new issue** to describe your problem or question. +In the case of a bug suspiscion, please try to give all the relevant +information to allow reproducing the error or identifying +its root cause (version of the imas-python, OS and relevant +dependencies, snippet of code); +3. apply relevant labels to the issue. + +## You want to make or ask some change to the code +1. use the [issue search](https://github.com/iterorganization/ +imas-python/issues) to check if someone already proposed +a similar idea/change; +2. if not, create a **new issue** to describe what change you would like to see +implemented and specify it if you intend to work on it yourself or if some help +will be needed; +3. wait until some kind of consensus is reached about your idea being relevant, +at which time the issue will be assigned (to you or someone else who can work on +this topic); +4. if you do the development yourself, fork the repository to your own Github +profile and create your own feature branch off of the latest develop commit. +Make sure to regularly sync your branch with the latest commits from `develop` +(find instructions +[here](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/ +working-with-forks/syncing-a-fork); +5. when your development is ready, create a pull request (find instructions +[here](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/ +proposing-changes-to-your-work-with-pull-requests/ +creating-a-pull-request-from-a-fork)). + + +While we will try to answer questions quickly and to address issues in a timely +manner, it can may sometimes take longer than expected. A friendly ping in the +discussion or the issue thread can help draw attention if you find that it was +stalled. From 2eb385e77e953ffef5a46a274897da6f4fb52d87 Mon Sep 17 00:00:00 2001 From: gautambaabu Date: Fri, 13 Dec 2024 23:10:14 +0530 Subject: [PATCH 27/42] fixed readme.md for imas --- README.md | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 23e83fde..03f00ce5 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ -# IMASPy +# IMAS -IMASPy is a pure-python library to handle arbitrarily nested data structures. -IMASPy is designed for, but not necessarily bound to, interacting with Interface +IMAS is a pure-python library to handle arbitrarily nested data structures. +IMAS is designed for, but not necessarily bound to, interacting with Interface Data Structures (IDSs) as defined by the Integrated Modelling & Analysis Suite (IMAS) Data Model. @@ -11,7 +11,7 @@ Data Model. Install steps are described in the documentation generated from `/docs/source/installing.rst`. Documentation is autogenerated from the source using [Sphinx](http://sphinx-doc.org/) -and can be found at the [ITER sharepoint](https://sharepoint.iter.org/departments/POP/CM/IMDesign/Code%20Documentation/IMASPy-doc/index.html) +and can be found at the [ITER sharepoint](https://sharepoint.iter.org/departments/POP/CM/IMDesign/Code%20Documentation/IMAS-doc/index.html) The documentation can be manually generated by installing sphinx and running: @@ -23,15 +23,15 @@ make -C docs html ## How to use ```python -import imaspy -factory = imaspy.IDSFactory() +import imas +factory = imas.IDSFactory() equilibrium = factory.equilibrium() print(equilibrium) -equilibrium.ids_properties.homogeneous_time = imaspy.ids_defs.IDS_TIME_MODE_HETEROGENEOUS +equilibrium.ids_properties.homogeneous_time = imas.ids_defs.IDS_TIME_MODE_HETEROGENEOUS equilibrium.ids_properties.comment = "testing" -with imaspy.DBEntry("imas:hdf5?path=./testdb","w") as dbentry: +with imas.DBEntry("imas:hdf5?path=./testdb","w") as dbentry: dbentry.put(equilibrium) ``` @@ -40,7 +40,7 @@ A quick 5 minutes introduction is available in the documentation generated from ## Legal -IMASPy is Copyright 2020-2024 ITER Organization, Copyright 2020-2023 Karel Lucas van de +IMAS is Copyright 2020-2024 ITER Organization, Copyright 2020-2023 Karel Lucas van de Plassche , Copyright 2020-2022 Daan van Vugt , and Copyright 2020 Dutch Institute for Fundamental Energy Research . It is licensed under [LGPL 3.0](LICENSE.txt). From 133f78c30803cdb4ad8e3afab9f10e8b652c0d58 Mon Sep 17 00:00:00 2001 From: Olivier Hoenen Date: Tue, 17 Dec 2024 14:56:13 +0100 Subject: [PATCH 28/42] Apply suggestion on naming --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 03f00ce5..9fc27d68 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ -# IMAS +# IMAS-Python -IMAS is a pure-python library to handle arbitrarily nested data structures. -IMAS is designed for, but not necessarily bound to, interacting with Interface +IMAS-Python is a pure-python library to handle arbitrarily nested data structures. +IMAS-Python is designed for, but not necessarily bound to, interacting with Interface Data Structures (IDSs) as defined by the Integrated Modelling & Analysis Suite (IMAS) Data Model. @@ -40,7 +40,7 @@ A quick 5 minutes introduction is available in the documentation generated from ## Legal -IMAS is Copyright 2020-2024 ITER Organization, Copyright 2020-2023 Karel Lucas van de +IMAS-Python is Copyright 2020-2024 ITER Organization, Copyright 2020-2023 Karel Lucas van de Plassche , Copyright 2020-2022 Daan van Vugt , and Copyright 2020 Dutch Institute for Fundamental Energy Research . It is licensed under [LGPL 3.0](LICENSE.txt). From 6f871f5b98f268b5329310fcd0e572c109cb6539 Mon Sep 17 00:00:00 2001 From: Gautam raj Date: Tue, 17 Dec 2024 19:32:31 +0530 Subject: [PATCH 29/42] Update README.md Co-authored-by: Simon Pinches --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 9fc27d68..14d4b81e 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # IMAS-Python IMAS-Python is a pure-python library to handle arbitrarily nested data structures. -IMAS-Python is designed for, but not necessarily bound to, interacting with Interface +It is designed for, but not necessarily bound to, interacting with Interface Data Structures (IDSs) as defined by the Integrated Modelling & Analysis Suite (IMAS) Data Model. From 693f035cdfa1e71e3c9e3062703997211f2adb5f Mon Sep 17 00:00:00 2001 From: Maarten Sebregts <110895564+maarten-ic@users.noreply.github.com> Date: Mon, 20 Jan 2025 10:08:22 +0100 Subject: [PATCH 30/42] Update CONTRIBUTING.md Fix Markdown links: newlines inside a URL are not rendered properly --- CONTRIBUTING.md | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index ac28e400..661eedb0 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -7,36 +7,31 @@ Please first make sure you read and follow the [Code of Conduct](CODE_OF_CONDUCT.md). ## You think you found a bug in the code, or have a question in its use -1. use the [issue search](https://github.com/iterorganization/ -imas-python/issues) to check if someone already created -a similar issue; -2. if not, make a **new issue** to describe your problem or question. +1. use the [issue search](https://github.com/iterorganization/imas-python/issues) +to check if someone already created a similar issue; +3. if not, make a **new issue** to describe your problem or question. In the case of a bug suspiscion, please try to give all the relevant information to allow reproducing the error or identifying its root cause (version of the imas-python, OS and relevant dependencies, snippet of code); -3. apply relevant labels to the issue. +4. apply relevant labels to the issue. ## You want to make or ask some change to the code -1. use the [issue search](https://github.com/iterorganization/ -imas-python/issues) to check if someone already proposed -a similar idea/change; -2. if not, create a **new issue** to describe what change you would like to see +1. use the [issue search](https://github.com/iterorganization/imas-python/issues) +to check if someone already proposed a similar idea/change; +3. if not, create a **new issue** to describe what change you would like to see implemented and specify it if you intend to work on it yourself or if some help will be needed; -3. wait until some kind of consensus is reached about your idea being relevant, +4. wait until some kind of consensus is reached about your idea being relevant, at which time the issue will be assigned (to you or someone else who can work on this topic); -4. if you do the development yourself, fork the repository to your own Github +5. if you do the development yourself, fork the repository to your own Github profile and create your own feature branch off of the latest develop commit. Make sure to regularly sync your branch with the latest commits from `develop` (find instructions -[here](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/ -working-with-forks/syncing-a-fork); -5. when your development is ready, create a pull request (find instructions -[here](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/ -proposing-changes-to-your-work-with-pull-requests/ -creating-a-pull-request-from-a-fork)). +[here](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/working-with-forks/syncing-a-fork)); +6. when your development is ready, create a pull request (find instructions +[here](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request-from-a-fork)). While we will try to answer questions quickly and to address issues in a timely From 5b3d0fe61e368e8beb8c1bb1bff996e028f4fc0b Mon Sep 17 00:00:00 2001 From: Anushan Fernando Date: Mon, 20 Jan 2025 23:34:52 +0000 Subject: [PATCH 31/42] Modifications for compatibility with TORAX. --- imaspy/backends/netcdf/db_entry_nc.py | 7 +++++++ imaspy/backends/netcdf/ids2nc.py | 6 +++++- imaspy/backends/netcdf/nc_validate.py | 9 +++++---- imaspy/ids_primitive.py | 2 +- imaspy/test/test_cli.py | 6 ++++-- imaspy/test/test_dbentry.py | 1 + imaspy/test/test_ids_mixin.py | 3 +++ imaspy/test/test_ids_toplevel.py | 2 +- imaspy/test/test_minimal_types.py | 5 +++-- imaspy/test/test_nbc_change.py | 2 +- imaspy/test/test_static_ids.py | 2 +- imaspy/test/test_util.py | 6 +++--- 12 files changed, 35 insertions(+), 16 deletions(-) diff --git a/imaspy/backends/netcdf/db_entry_nc.py b/imaspy/backends/netcdf/db_entry_nc.py index 732eb97d..c008262c 100644 --- a/imaspy/backends/netcdf/db_entry_nc.py +++ b/imaspy/backends/netcdf/db_entry_nc.py @@ -33,12 +33,19 @@ def __init__(self, fname: str, mode: str, factory: IDSFactory) -> None: "The `netCDF4` python module is not available. Please install this " "module to read/write IMAS netCDF files with IMASPy." ) + # To support netcdf v1.4 (which has no mode "x") we map it to "w" with `clobber=True`. + if mode == "x": + mode = "w" + clobber = False + else: + clobber = True self._dataset = netCDF4.Dataset( fname, mode, format="NETCDF4", auto_complex=True, + clobber=clobber, ) """NetCDF4 dataset.""" self._factory = factory diff --git a/imaspy/backends/netcdf/ids2nc.py b/imaspy/backends/netcdf/ids2nc.py index 34e63101..61e42cf2 100644 --- a/imaspy/backends/netcdf/ids2nc.py +++ b/imaspy/backends/netcdf/ids2nc.py @@ -7,6 +7,7 @@ import netCDF4 import numpy +from packaging import version from imaspy.backends.netcdf.nc_metadata import NCMetadata from imaspy.ids_base import IDSBase @@ -187,7 +188,10 @@ def create_variables(self) -> None: dtype = dtypes[metadata.data_type] kwargs = {} if dtype is not str: # Enable compression: - kwargs.update(compression="zlib", complevel=1) + if version.parse(netCDF4.__version__) > version.parse("1.4.1"): + kwargs.update(compression="zlib", complevel=1) + else: + kwargs.update(zlib=True, complevel=1) if dtype is not dtypes[IDSDataType.CPX]: # Set fillvalue kwargs.update(fill_value=default_fillvals[metadata.data_type]) # Create variable diff --git a/imaspy/backends/netcdf/nc_validate.py b/imaspy/backends/netcdf/nc_validate.py index 49a14283..07a7ad78 100644 --- a/imaspy/backends/netcdf/nc_validate.py +++ b/imaspy/backends/netcdf/nc_validate.py @@ -23,23 +23,24 @@ def validate_netcdf_file(filename: str) -> None: # additional variables are smuggled inside: groups = [dataset] + [dataset[group] for group in dataset.groups] for group in groups: + group_name = group.path.split('/')[-1] if group.variables or group.dimensions: raise InvalidNetCDFEntry( "NetCDF file should not have variables or dimensions in the " - f"{group.name} group." + f"{group_name} group." ) if group is dataset: continue - if group.name not in ids_names: + if group_name not in ids_names: raise InvalidNetCDFEntry( - f"Invalid group name {group.name}: there is no IDS with this name." + f"Invalid group name {group_name}: there is no IDS with this name." ) for subgroup in group.groups: try: int(subgroup) except ValueError: raise InvalidNetCDFEntry( - f"Invalid group name {group.name}/{subgroup}: " + f"Invalid group name {group_name}/{subgroup}: " f"{subgroup} is not a valid occurrence number." ) diff --git a/imaspy/ids_primitive.py b/imaspy/ids_primitive.py index 94f865b6..e27eb93f 100644 --- a/imaspy/ids_primitive.py +++ b/imaspy/ids_primitive.py @@ -481,7 +481,7 @@ def _cast_value(self, value): value = np.asanyarray(value) if value.dtype != dtype: logger.info(_CONVERT_MSG, value.dtype, self) - value = np.array(value, dtype=dtype, copy=False) + value = np.asarray(value, dtype=dtype,) if value.ndim != self.metadata.ndim: raise ValueError(f"Trying to assign a {value.ndim}D value to {self!r}.") return value diff --git a/imaspy/test/test_cli.py b/imaspy/test/test_cli.py index 604a7f7e..f9ee5383 100644 --- a/imaspy/test/test_cli.py +++ b/imaspy/test/test_cli.py @@ -4,6 +4,7 @@ from click.testing import CliRunner from packaging.version import Version +from imaspy.backends.imas_core.imas_interface import has_imas from imaspy.backends.imas_core.imas_interface import ll_interface from imaspy.command.cli import print_version from imaspy.command.db_analysis import analyze_db, process_db_analysis @@ -12,6 +13,7 @@ @pytest.mark.cli +@pytest.mark.skipif(not has_imas, reason="Requires IMAS Core.") def test_imaspy_version(): runner = CliRunner() result = runner.invoke(print_version) @@ -19,8 +21,8 @@ def test_imaspy_version(): @pytest.mark.cli -@pytest.mark.skipif(ll_interface._al_version < Version("5.0"), reason="Needs AL >= 5") -def test_db_analysis(tmp_path): +@pytest.mark.skipif(not has_imas or ll_interface._al_version < Version("5.0"), reason="Needs AL >= 5 AND Requires IMAS Core.") +def test_db_analysis(tmp_path,): # This only tests the happy flow, error handling is not tested db_path = tmp_path / "test_db_analysis" with DBEntry(f"imas:hdf5?path={db_path}", "w") as entry: diff --git a/imaspy/test/test_dbentry.py b/imaspy/test/test_dbentry.py index 2d82af36..d67fae0d 100644 --- a/imaspy/test/test_dbentry.py +++ b/imaspy/test/test_dbentry.py @@ -82,6 +82,7 @@ def test_dbentry_constructor(): assert get_entry_attrs(entry) == (1, 2, 3, 4, None, 6) +@pytest.mark.skipif(not has_imas, reason="Requires IMAS Core.") def test_ignore_unknown_dd_version(monkeypatch, worker_id, tmp_path): entry = open_dbentry(imaspy.ids_defs.MEMORY_BACKEND, "w", worker_id, tmp_path) ids = entry.factory.core_profiles() diff --git a/imaspy/test/test_ids_mixin.py b/imaspy/test/test_ids_mixin.py index 164adcdd..2b3f7b03 100644 --- a/imaspy/test/test_ids_mixin.py +++ b/imaspy/test/test_ids_mixin.py @@ -1,7 +1,10 @@ # This file is part of IMASPy. # You should have received the IMASPy LICENSE file with this project. +import pytest +from imaspy.backends.imas_core.imas_interface import has_imas +@pytest.mark.skipif(has_imas, reason="Requires IMAS Core.") def test_toplevel(fake_filled_toplevel): top = fake_filled_toplevel assert top.wavevector._toplevel == top diff --git a/imaspy/test/test_ids_toplevel.py b/imaspy/test/test_ids_toplevel.py index 4721f3c3..0e8d8c32 100644 --- a/imaspy/test/test_ids_toplevel.py +++ b/imaspy/test/test_ids_toplevel.py @@ -46,7 +46,7 @@ def test_pretty_print(ids): assert pprint.pformat(ids) == "" -def test_serialize_nondefault_dd_version(): +def test_serialize_nondefault_dd_version(requires_imas): ids = IDSFactory("3.31.0").core_profiles() fill_with_random_data(ids) data = ids.serialize() diff --git a/imaspy/test/test_minimal_types.py b/imaspy/test/test_minimal_types.py index ee38761c..0bb9ac30 100644 --- a/imaspy/test/test_minimal_types.py +++ b/imaspy/test/test_minimal_types.py @@ -1,5 +1,6 @@ # A minimal testcase loading an IDS file and checking that the structure built is ok from numbers import Complex, Integral, Number, Real +from packaging import version import numpy as np import pytest @@ -61,7 +62,7 @@ def test_assign_str_1d(minimal, caplog): # Prevent the expected numpy ComplexWarnings from cluttering pytest output -@pytest.mark.filterwarnings("ignore::numpy.ComplexWarning") +@pytest.mark.filterwarnings("ignore::numpy.ComplexWarning" if version.parse(np.__version__) < version.parse("2.0.0") else "ignore::numpy.exceptions.ComplexWarning") @pytest.mark.parametrize("typ, max_dim", [("flt", 6), ("cpx", 6), ("int", 3)]) def test_assign_numeric_types(minimal, caplog, typ, max_dim): caplog.set_level("INFO", "imaspy") @@ -87,7 +88,7 @@ def test_assign_numeric_types(minimal, caplog, typ, max_dim): len(caplog.records) == 1 elif dim == other_ndim >= 1 and other_typ == "cpx": # np allows casting of complex to float or int, but warns: - with pytest.warns(np.ComplexWarning): + with pytest.warns(np.ComplexWarning if version.parse(np.__version__) < version.parse("2.0.0") else np.exceptions.ComplexWarning): caplog.clear() minimal[name].value = value assert len(caplog.records) == 1 diff --git a/imaspy/test/test_nbc_change.py b/imaspy/test/test_nbc_change.py index cbcf3f58..2e328982 100644 --- a/imaspy/test/test_nbc_change.py +++ b/imaspy/test/test_nbc_change.py @@ -54,7 +54,7 @@ def test_nbc_structure_to_aos(caplog): assert caplog.record_tuples[0][:2] == ("imaspy.ids_convert", logging.WARNING) -def test_nbc_0d_to_1d(caplog): +def test_nbc_0d_to_1d(caplog, requires_imas): # channel/filter_spectrometer/radiance_calibration in spectrometer visible changed # from FLT_0D to FLT_1D in DD 3.39.0 ids = IDSFactory("3.32.0").spectrometer_visible() diff --git a/imaspy/test/test_static_ids.py b/imaspy/test/test_static_ids.py index 1f430c10..680ecd2b 100644 --- a/imaspy/test/test_static_ids.py +++ b/imaspy/test/test_static_ids.py @@ -21,7 +21,7 @@ def test_ids_valid_type(): assert ids_types in ({IDSType.NONE}, {IDSType.CONSTANT, IDSType.DYNAMIC}) -def test_constant_ids(caplog): +def test_constant_ids(caplog, requires_imas): ids = imaspy.IDSFactory().new("amns_data") if ids.metadata.type is IDSType.NONE: pytest.skip("IDS definition has no constant IDSs") diff --git a/imaspy/test/test_util.py b/imaspy/test/test_util.py index 37c419a0..2c4dad97 100644 --- a/imaspy/test/test_util.py +++ b/imaspy/test/test_util.py @@ -54,7 +54,7 @@ def test_inspect(): inspect(cp.profiles_1d[1].grid.rho_tor_norm) # IDSPrimitive -def test_inspect_lazy(): +def test_inspect_lazy(requires_imas): with get_training_db_entry() as entry: cp = entry.get("core_profiles", lazy=True) inspect(cp) @@ -141,7 +141,7 @@ def test_idsdiffgen(): assert diff[0] == ("profiles_1d/time", -1, 0) -def test_idsdiff(): +def test_idsdiff(requires_imas): # Test the diff rendering for two sample IDSs with get_training_db_entry() as entry: imaspy.util.idsdiff(entry.get("core_profiles"), entry.get("equilibrium")) @@ -179,7 +179,7 @@ def test_get_toplevel(): assert get_toplevel(cp) is cp -def test_is_lazy_loaded(): +def test_is_lazy_loaded(requires_imas): with get_training_db_entry() as entry: assert is_lazy_loaded(entry.get("core_profiles")) is False assert is_lazy_loaded(entry.get("core_profiles", lazy=True)) is True From c1f7a968f1729c9aee5c7318062084d516b0dfec Mon Sep 17 00:00:00 2001 From: Anushan Fernando Date: Mon, 20 Jan 2025 23:54:44 +0000 Subject: [PATCH 32/42] Modify tests to use fixture. --- imaspy/test/test_cli.py | 3 +-- imaspy/test/test_dbentry.py | 3 +-- imaspy/test/test_ids_mixin.py | 5 +---- 3 files changed, 3 insertions(+), 8 deletions(-) diff --git a/imaspy/test/test_cli.py b/imaspy/test/test_cli.py index f9ee5383..db7c462f 100644 --- a/imaspy/test/test_cli.py +++ b/imaspy/test/test_cli.py @@ -13,8 +13,7 @@ @pytest.mark.cli -@pytest.mark.skipif(not has_imas, reason="Requires IMAS Core.") -def test_imaspy_version(): +def test_imaspy_version(requires_imas): runner = CliRunner() result = runner.invoke(print_version) assert result.exit_code == 0 diff --git a/imaspy/test/test_dbentry.py b/imaspy/test/test_dbentry.py index d67fae0d..cb7ebe12 100644 --- a/imaspy/test/test_dbentry.py +++ b/imaspy/test/test_dbentry.py @@ -82,8 +82,7 @@ def test_dbentry_constructor(): assert get_entry_attrs(entry) == (1, 2, 3, 4, None, 6) -@pytest.mark.skipif(not has_imas, reason="Requires IMAS Core.") -def test_ignore_unknown_dd_version(monkeypatch, worker_id, tmp_path): +def test_ignore_unknown_dd_version(monkeypatch, worker_id, tmp_path, requires_imas): entry = open_dbentry(imaspy.ids_defs.MEMORY_BACKEND, "w", worker_id, tmp_path) ids = entry.factory.core_profiles() ids.ids_properties.homogeneous_time = 0 diff --git a/imaspy/test/test_ids_mixin.py b/imaspy/test/test_ids_mixin.py index 2b3f7b03..675e2575 100644 --- a/imaspy/test/test_ids_mixin.py +++ b/imaspy/test/test_ids_mixin.py @@ -1,11 +1,8 @@ # This file is part of IMASPy. # You should have received the IMASPy LICENSE file with this project. -import pytest -from imaspy.backends.imas_core.imas_interface import has_imas -@pytest.mark.skipif(has_imas, reason="Requires IMAS Core.") -def test_toplevel(fake_filled_toplevel): +def test_toplevel(fake_filled_toplevel, requires_imas): top = fake_filled_toplevel assert top.wavevector._toplevel == top assert top.wavevector[0].radial_component_norm._toplevel == top From abcaf3f8df9c999e175f0125fcd88c2f8e94da47 Mon Sep 17 00:00:00 2001 From: Anushan Fernando <35841118+Nush395@users.noreply.github.com> Date: Tue, 21 Jan 2025 10:27:25 +0000 Subject: [PATCH 33/42] Update imaspy/test/test_ids_mixin.py Co-authored-by: Maarten Sebregts <110895564+maarten-ic@users.noreply.github.com> --- imaspy/test/test_ids_mixin.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/imaspy/test/test_ids_mixin.py b/imaspy/test/test_ids_mixin.py index 675e2575..164adcdd 100644 --- a/imaspy/test/test_ids_mixin.py +++ b/imaspy/test/test_ids_mixin.py @@ -2,7 +2,7 @@ # You should have received the IMASPy LICENSE file with this project. -def test_toplevel(fake_filled_toplevel, requires_imas): +def test_toplevel(fake_filled_toplevel): top = fake_filled_toplevel assert top.wavevector._toplevel == top assert top.wavevector[0].radial_component_norm._toplevel == top From aab7f664d3754e48192c73bb41cbd7e44b59ba62 Mon Sep 17 00:00:00 2001 From: Anushan Fernando <35841118+Nush395@users.noreply.github.com> Date: Tue, 21 Jan 2025 10:27:45 +0000 Subject: [PATCH 34/42] Update imaspy/test/test_cli.py Co-authored-by: Maarten Sebregts <110895564+maarten-ic@users.noreply.github.com> --- imaspy/test/test_cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/imaspy/test/test_cli.py b/imaspy/test/test_cli.py index db7c462f..e6a420c7 100644 --- a/imaspy/test/test_cli.py +++ b/imaspy/test/test_cli.py @@ -21,7 +21,7 @@ def test_imaspy_version(requires_imas): @pytest.mark.cli @pytest.mark.skipif(not has_imas or ll_interface._al_version < Version("5.0"), reason="Needs AL >= 5 AND Requires IMAS Core.") -def test_db_analysis(tmp_path,): +def test_db_analysis(tmp_path): # This only tests the happy flow, error handling is not tested db_path = tmp_path / "test_db_analysis" with DBEntry(f"imas:hdf5?path={db_path}", "w") as entry: From 006580a4cca1a8ffdb601c8ed62dfccf011dc92a Mon Sep 17 00:00:00 2001 From: Anushan Fernando Date: Tue, 21 Jan 2025 10:52:21 +0000 Subject: [PATCH 35/42] Add error message when attemtping to store complex number with netcdf<1.7.0. --- imaspy/backends/netcdf/ids2nc.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/imaspy/backends/netcdf/ids2nc.py b/imaspy/backends/netcdf/ids2nc.py index 61e42cf2..45a04d6b 100644 --- a/imaspy/backends/netcdf/ids2nc.py +++ b/imaspy/backends/netcdf/ids2nc.py @@ -10,6 +10,7 @@ from packaging import version from imaspy.backends.netcdf.nc_metadata import NCMetadata +from imaspy.exception import InvalidNetCDFEntry from imaspy.ids_base import IDSBase from imaspy.ids_data_type import IDSDataType from imaspy.ids_defs import IDS_TIME_MODE_HOMOGENEOUS @@ -186,6 +187,8 @@ def create_variables(self) -> None: else: dtype = dtypes[metadata.data_type] + if version.parse(netCDF4.__version__) < version.parse("1.7.0") and dtype is dtypes[IDSDataType.CPX]: + raise InvalidNetCDFEntry(f"Found complex data in {var_name}, NetCDF 1.7.0 or later is required for complex data types") kwargs = {} if dtype is not str: # Enable compression: if version.parse(netCDF4.__version__) > version.parse("1.4.1"): From b2afe0770030d6a6d2d1084288fabf24bf68260c Mon Sep 17 00:00:00 2001 From: Anushan Fernando Date: Tue, 21 Jan 2025 10:57:10 +0000 Subject: [PATCH 36/42] Formatting. --- imaspy/backends/netcdf/db_entry_nc.py | 3 ++- imaspy/backends/netcdf/ids2nc.py | 10 ++++++++-- imaspy/backends/netcdf/nc_validate.py | 2 +- imaspy/ids_primitive.py | 5 ++++- imaspy/test/test_cli.py | 5 ++++- imaspy/test/test_minimal_types.py | 12 ++++++++++-- 6 files changed, 29 insertions(+), 8 deletions(-) diff --git a/imaspy/backends/netcdf/db_entry_nc.py b/imaspy/backends/netcdf/db_entry_nc.py index c008262c..97d5dffe 100644 --- a/imaspy/backends/netcdf/db_entry_nc.py +++ b/imaspy/backends/netcdf/db_entry_nc.py @@ -33,7 +33,8 @@ def __init__(self, fname: str, mode: str, factory: IDSFactory) -> None: "The `netCDF4` python module is not available. Please install this " "module to read/write IMAS netCDF files with IMASPy." ) - # To support netcdf v1.4 (which has no mode "x") we map it to "w" with `clobber=True`. + # To support netcdf v1.4 (which has no mode "x") we map it to "w" with + # `clobber=True`. if mode == "x": mode = "w" clobber = False diff --git a/imaspy/backends/netcdf/ids2nc.py b/imaspy/backends/netcdf/ids2nc.py index 45a04d6b..0328b635 100644 --- a/imaspy/backends/netcdf/ids2nc.py +++ b/imaspy/backends/netcdf/ids2nc.py @@ -187,8 +187,14 @@ def create_variables(self) -> None: else: dtype = dtypes[metadata.data_type] - if version.parse(netCDF4.__version__) < version.parse("1.7.0") and dtype is dtypes[IDSDataType.CPX]: - raise InvalidNetCDFEntry(f"Found complex data in {var_name}, NetCDF 1.7.0 or later is required for complex data types") + if ( + version.parse(netCDF4.__version__) < version.parse("1.7.0") + and dtype is dtypes[IDSDataType.CPX] + ): + raise InvalidNetCDFEntry( + f"Found complex data in {var_name}, NetCDF 1.7.0 or" + f" later is required for complex data types" + ) kwargs = {} if dtype is not str: # Enable compression: if version.parse(netCDF4.__version__) > version.parse("1.4.1"): diff --git a/imaspy/backends/netcdf/nc_validate.py b/imaspy/backends/netcdf/nc_validate.py index 07a7ad78..f7528a8a 100644 --- a/imaspy/backends/netcdf/nc_validate.py +++ b/imaspy/backends/netcdf/nc_validate.py @@ -23,7 +23,7 @@ def validate_netcdf_file(filename: str) -> None: # additional variables are smuggled inside: groups = [dataset] + [dataset[group] for group in dataset.groups] for group in groups: - group_name = group.path.split('/')[-1] + group_name = group.path.split("/")[-1] if group.variables or group.dimensions: raise InvalidNetCDFEntry( "NetCDF file should not have variables or dimensions in the " diff --git a/imaspy/ids_primitive.py b/imaspy/ids_primitive.py index e27eb93f..71b1744a 100644 --- a/imaspy/ids_primitive.py +++ b/imaspy/ids_primitive.py @@ -481,7 +481,10 @@ def _cast_value(self, value): value = np.asanyarray(value) if value.dtype != dtype: logger.info(_CONVERT_MSG, value.dtype, self) - value = np.asarray(value, dtype=dtype,) + value = np.asarray( + value, + dtype=dtype, + ) if value.ndim != self.metadata.ndim: raise ValueError(f"Trying to assign a {value.ndim}D value to {self!r}.") return value diff --git a/imaspy/test/test_cli.py b/imaspy/test/test_cli.py index e6a420c7..fdea00f4 100644 --- a/imaspy/test/test_cli.py +++ b/imaspy/test/test_cli.py @@ -20,7 +20,10 @@ def test_imaspy_version(requires_imas): @pytest.mark.cli -@pytest.mark.skipif(not has_imas or ll_interface._al_version < Version("5.0"), reason="Needs AL >= 5 AND Requires IMAS Core.") +@pytest.mark.skipif( + not has_imas or ll_interface._al_version < Version("5.0"), + reason="Needs AL >= 5 AND Requires IMAS Core.", +) def test_db_analysis(tmp_path): # This only tests the happy flow, error handling is not tested db_path = tmp_path / "test_db_analysis" diff --git a/imaspy/test/test_minimal_types.py b/imaspy/test/test_minimal_types.py index 0bb9ac30..d4614de5 100644 --- a/imaspy/test/test_minimal_types.py +++ b/imaspy/test/test_minimal_types.py @@ -62,7 +62,11 @@ def test_assign_str_1d(minimal, caplog): # Prevent the expected numpy ComplexWarnings from cluttering pytest output -@pytest.mark.filterwarnings("ignore::numpy.ComplexWarning" if version.parse(np.__version__) < version.parse("2.0.0") else "ignore::numpy.exceptions.ComplexWarning") +@pytest.mark.filterwarnings( + "ignore::numpy.ComplexWarning" + if version.parse(np.__version__) < version.parse("2.0.0") + else "ignore::numpy.exceptions.ComplexWarning" +) @pytest.mark.parametrize("typ, max_dim", [("flt", 6), ("cpx", 6), ("int", 3)]) def test_assign_numeric_types(minimal, caplog, typ, max_dim): caplog.set_level("INFO", "imaspy") @@ -88,7 +92,11 @@ def test_assign_numeric_types(minimal, caplog, typ, max_dim): len(caplog.records) == 1 elif dim == other_ndim >= 1 and other_typ == "cpx": # np allows casting of complex to float or int, but warns: - with pytest.warns(np.ComplexWarning if version.parse(np.__version__) < version.parse("2.0.0") else np.exceptions.ComplexWarning): + with pytest.warns( + np.ComplexWarning + if version.parse(np.__version__) < version.parse("2.0.0") + else np.exceptions.ComplexWarning + ): caplog.clear() minimal[name].value = value assert len(caplog.records) == 1 From 816bbd43784c3cb13bd8481326bb7eab6693342b Mon Sep 17 00:00:00 2001 From: Anushan Fernando Date: Tue, 21 Jan 2025 13:56:46 +0000 Subject: [PATCH 37/42] Add tests for different versions of netcdf. --- conftest.py | 14 ++++++++++ imaspy/test/test_cli.py | 7 +++-- imaspy/test/test_dbentry.py | 3 ++- imaspy/test/test_helpers.py | 34 ++++++++++++++++++------- imaspy/test/test_nc_autofill.py | 45 +++++++++++++++++++++++++++++++-- pyproject.toml | 2 +- 6 files changed, 90 insertions(+), 15 deletions(-) diff --git a/conftest.py b/conftest.py index 20b26679..d1893f76 100644 --- a/conftest.py +++ b/conftest.py @@ -7,6 +7,7 @@ # - Fixtures that are useful across test modules import functools +import importlib import logging import os import sys @@ -72,6 +73,19 @@ def pytest_addoption(parser): } +# This is a dummy fixture, usually provided by pytest-xdist that isn't available +# in google3. +# The `worker_id` is only used by tests that require IMAS Core which we never +# run +try: + import pytest_xdist +except ImportError: + # If pytest-xdist is not available we provide a dummy worker_id fixture. + @pytest.fixture() + def worker_id(): + return "master" + + @pytest.fixture(params=_BACKENDS) def backend(pytestconfig: pytest.Config, request: pytest.FixtureRequest): backends_provided = any(map(pytestconfig.getoption, _BACKENDS)) diff --git a/imaspy/test/test_cli.py b/imaspy/test/test_cli.py index fdea00f4..810acda6 100644 --- a/imaspy/test/test_cli.py +++ b/imaspy/test/test_cli.py @@ -13,7 +13,8 @@ @pytest.mark.cli -def test_imaspy_version(requires_imas): +@pytest.mark.skipif(not has_imas, reason="Requires IMAS Core.") +def test_imaspy_version(): runner = CliRunner() result = runner.invoke(print_version) assert result.exit_code == 0 @@ -24,7 +25,9 @@ def test_imaspy_version(requires_imas): not has_imas or ll_interface._al_version < Version("5.0"), reason="Needs AL >= 5 AND Requires IMAS Core.", ) -def test_db_analysis(tmp_path): +def test_db_analysis( + tmp_path, +): # This only tests the happy flow, error handling is not tested db_path = tmp_path / "test_db_analysis" with DBEntry(f"imas:hdf5?path={db_path}", "w") as entry: diff --git a/imaspy/test/test_dbentry.py b/imaspy/test/test_dbentry.py index cb7ebe12..d67fae0d 100644 --- a/imaspy/test/test_dbentry.py +++ b/imaspy/test/test_dbentry.py @@ -82,7 +82,8 @@ def test_dbentry_constructor(): assert get_entry_attrs(entry) == (1, 2, 3, 4, None, 6) -def test_ignore_unknown_dd_version(monkeypatch, worker_id, tmp_path, requires_imas): +@pytest.mark.skipif(not has_imas, reason="Requires IMAS Core.") +def test_ignore_unknown_dd_version(monkeypatch, worker_id, tmp_path): entry = open_dbentry(imaspy.ids_defs.MEMORY_BACKEND, "w", worker_id, tmp_path) ids = entry.factory.core_profiles() ids.ids_properties.homogeneous_time = 0 diff --git a/imaspy/test/test_helpers.py b/imaspy/test/test_helpers.py index 63a1cf79..8a651d93 100644 --- a/imaspy/test/test_helpers.py +++ b/imaspy/test/test_helpers.py @@ -93,7 +93,9 @@ def fill_with_random_data(structure, max_children=3): child.value = random_data(child.metadata.data_type, child.metadata.ndim) -def maybe_set_random_value(primitive: IDSPrimitive, leave_empty: float) -> None: +def maybe_set_random_value( + primitive: IDSPrimitive, leave_empty: float, skip_complex: bool +) -> None: """Set the value of an IDS primitive with a certain chance. If the IDSPrimitive has coordinates, then the size of the coordinates is taken into @@ -153,7 +155,7 @@ def maybe_set_random_value(primitive: IDSPrimitive, leave_empty: float) -> None: # Scale chance of not setting a coordinate by our number of dimensions, # such that overall there is roughly a 50% chance that any coordinate # remains empty - maybe_set_random_value(coordinate_element, 0.5**ndim) + maybe_set_random_value(coordinate_element, 0.5**ndim, skip_complex) size = coordinate_element.shape[0 if coordinate.references else dim] if coordinate.size: # coordinateX = OR 1...1 @@ -176,13 +178,18 @@ def maybe_set_random_value(primitive: IDSPrimitive, leave_empty: float) -> None: elif primitive.metadata.data_type is IDSDataType.FLT: primitive.value = np.random.random_sample(size=shape) elif primitive.metadata.data_type is IDSDataType.CPX: + if skip_complex: + # If we are skipping complex numbers then leave the value empty. + return val = np.random.random_sample(shape) + 1j * np.random.random_sample(shape) primitive.value = val else: raise ValueError(f"Invalid IDS data type: {primitive.metadata.data_type}") -def fill_consistent(structure: IDSStructure, leave_empty: float = 0.2): +def fill_consistent( + structure: IDSStructure, leave_empty: float = 0.2, skip_complex: bool = False +): """Fill a structure with random data, such that coordinate sizes are consistent. Sets homogeneous_time to heterogeneous (always). @@ -196,6 +203,9 @@ def fill_consistent(structure: IDSStructure, leave_empty: float = 0.2): exclusive_coordinates: list of IDSPrimitives that have exclusive alternative coordinates. These are initially not filled, and only at the very end of filling an IDSToplevel, a choice is made between the exclusive coordinates. + skip_complex: Whether to skip over populating complex numbers. This is + useful for maintaining compatibility with older versions of netCDF4 + (<1.7.0) where complex numbers are not supported. """ if isinstance(structure, IDSToplevel): unsupported_ids_name = ( @@ -218,7 +228,9 @@ def fill_consistent(structure: IDSStructure, leave_empty: float = 0.2): for child in structure: if isinstance(child, IDSStructure): - exclusive_coordinates.extend(fill_consistent(child, leave_empty)) + exclusive_coordinates.extend( + fill_consistent(child, leave_empty, skip_complex) + ) elif isinstance(child, IDSStructArray): if child.metadata.coordinates[0].references: @@ -230,7 +242,7 @@ def fill_consistent(structure: IDSStructure, leave_empty: float = 0.2): if isinstance(coor, IDSPrimitive): # maybe fill with random data: try: - maybe_set_random_value(coor, leave_empty) + maybe_set_random_value(coor, leave_empty, skip_complex) except (RuntimeError, ValueError): pass child.resize(len(coor)) @@ -244,7 +256,9 @@ def fill_consistent(structure: IDSStructure, leave_empty: float = 0.2): else: child.resize(child.metadata.coordinates[0].size or 1) for ele in child: - exclusive_coordinates.extend(fill_consistent(ele, leave_empty)) + exclusive_coordinates.extend( + fill_consistent(ele, leave_empty, skip_complex) + ) else: # IDSPrimitive coordinates = child.metadata.coordinates @@ -256,7 +270,7 @@ def fill_consistent(structure: IDSStructure, leave_empty: float = 0.2): exclusive_coordinates.append(child) else: try: - maybe_set_random_value(child, leave_empty) + maybe_set_random_value(child, leave_empty, skip_complex) except (RuntimeError, ValueError): pass @@ -278,7 +292,7 @@ def fill_consistent(structure: IDSStructure, leave_empty: float = 0.2): coor = filled_refs.pop() unset_coordinate(coor) - maybe_set_random_value(element, leave_empty) + maybe_set_random_value(element, leave_empty, skip_complex) else: return exclusive_coordinates @@ -301,7 +315,9 @@ def callback(element): visit_children(callback, parent) -def compare_children(st1, st2, deleted_paths=set(), accept_lazy=False): +def compare_children( + st1, st2, deleted_paths=set(), accept_lazy=False, skip_complex=False +): """Perform a deep compare of two structures using asserts. All paths in ``deleted_paths`` are asserted that they are deleted in st2. diff --git a/imaspy/test/test_nc_autofill.py b/imaspy/test/test_nc_autofill.py index e0d3fe91..01280672 100644 --- a/imaspy/test/test_nc_autofill.py +++ b/imaspy/test/test_nc_autofill.py @@ -1,11 +1,52 @@ from imaspy.db_entry import DBEntry +from imaspy.exception import InvalidNetCDFEntry from imaspy.test.test_helpers import compare_children, fill_consistent +import re +import pytest +import netCDF4 +from packaging import version -def test_nc_latest_dd_autofill_put_get(ids_name, tmp_path): +def test_nc_latest_dd_autofill_put_get_skip_complex(ids_name, tmp_path): with DBEntry(f"{tmp_path}/test-{ids_name}.nc", "x") as entry: ids = entry.factory.new(ids_name) - fill_consistent(ids, 0.5) + fill_consistent(ids, leave_empty=0.5, skip_complex=True) + + entry.put(ids) + ids2 = entry.get(ids_name) + + compare_children(ids, ids2) + + +@pytest.mark.skipif( + version.parse(netCDF4.__version__) < version.parse("1.7.0"), + reason="NetCDF4 versions < 1.7.0 do not support complex numbers", +) +def test_nc_latest_dd_autofill_put_get_with_complex(ids_name, tmp_path): + with DBEntry(f"{tmp_path}/test-{ids_name}.nc", "x") as entry: + ids = entry.factory.new(ids_name) + fill_consistent(ids, leave_empty=0.5, skip_complex=False) + try: + entry.put(ids) + ids2 = entry.get(ids_name) + compare_children(ids, ids2) + except InvalidNetCDFEntry as e: + # This is expected, as these versions of NetCDF4 do not support + # complex numbers. + if not re.search( + r".*NetCDF 1.7.0 or later is required for complex data types", str(e) + ): + raise InvalidNetCDFEntry(e) from e + + +@pytest.mark.skipif( + version.parse(netCDF4.__version__) >= version.parse("1.7.0"), + reason="NetCDF4 versions >= 1.7.0 support complex numbers", +) +def test_nc_latest_dd_autofill_put_get_with_complex(ids_name, tmp_path): + with DBEntry(f"{tmp_path}/test-{ids_name}.nc", "x") as entry: + ids = entry.factory.new(ids_name) + fill_consistent(ids, leave_empty=0.5, skip_complex=False) entry.put(ids) ids2 = entry.get(ids_name) diff --git a/pyproject.toml b/pyproject.toml index dccd6912..36e5fffb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -79,7 +79,7 @@ docs = [ ] imas-core = [ "imas-core@git+ssh://git@git.iter.org/imas/al-core.git@main" ] netcdf = [ - "netCDF4>=1.7.0", + "netCDF4>=1.4.1", ] h5py = [ "h5py", From 2f7b591cd9a4b825d92d3a47677171c4fc5e8ff9 Mon Sep 17 00:00:00 2001 From: Anushan Fernando Date: Tue, 21 Jan 2025 14:00:44 +0000 Subject: [PATCH 38/42] Minor changes to tests. --- conftest.py | 5 ----- imaspy/test/test_cli.py | 3 +-- imaspy/test/test_dbentry.py | 3 +-- imaspy/test/test_helpers.py | 4 +--- 4 files changed, 3 insertions(+), 12 deletions(-) diff --git a/conftest.py b/conftest.py index d1893f76..91a9a046 100644 --- a/conftest.py +++ b/conftest.py @@ -7,7 +7,6 @@ # - Fixtures that are useful across test modules import functools -import importlib import logging import os import sys @@ -73,10 +72,6 @@ def pytest_addoption(parser): } -# This is a dummy fixture, usually provided by pytest-xdist that isn't available -# in google3. -# The `worker_id` is only used by tests that require IMAS Core which we never -# run try: import pytest_xdist except ImportError: diff --git a/imaspy/test/test_cli.py b/imaspy/test/test_cli.py index 810acda6..d3642410 100644 --- a/imaspy/test/test_cli.py +++ b/imaspy/test/test_cli.py @@ -13,8 +13,7 @@ @pytest.mark.cli -@pytest.mark.skipif(not has_imas, reason="Requires IMAS Core.") -def test_imaspy_version(): +def test_imaspy_version(requires_imas): runner = CliRunner() result = runner.invoke(print_version) assert result.exit_code == 0 diff --git a/imaspy/test/test_dbentry.py b/imaspy/test/test_dbentry.py index d67fae0d..cb7ebe12 100644 --- a/imaspy/test/test_dbentry.py +++ b/imaspy/test/test_dbentry.py @@ -82,8 +82,7 @@ def test_dbentry_constructor(): assert get_entry_attrs(entry) == (1, 2, 3, 4, None, 6) -@pytest.mark.skipif(not has_imas, reason="Requires IMAS Core.") -def test_ignore_unknown_dd_version(monkeypatch, worker_id, tmp_path): +def test_ignore_unknown_dd_version(monkeypatch, worker_id, tmp_path, requires_imas): entry = open_dbentry(imaspy.ids_defs.MEMORY_BACKEND, "w", worker_id, tmp_path) ids = entry.factory.core_profiles() ids.ids_properties.homogeneous_time = 0 diff --git a/imaspy/test/test_helpers.py b/imaspy/test/test_helpers.py index 8a651d93..0b7e2b43 100644 --- a/imaspy/test/test_helpers.py +++ b/imaspy/test/test_helpers.py @@ -315,9 +315,7 @@ def callback(element): visit_children(callback, parent) -def compare_children( - st1, st2, deleted_paths=set(), accept_lazy=False, skip_complex=False -): +def compare_children(st1, st2, deleted_paths=set(), accept_lazy=False): """Perform a deep compare of two structures using asserts. All paths in ``deleted_paths`` are asserted that they are deleted in st2. From 3591f176cc5a9642f0c5be9c85bf54e725c29823 Mon Sep 17 00:00:00 2001 From: Anushan Fernando Date: Tue, 21 Jan 2025 14:03:50 +0000 Subject: [PATCH 39/42] Rename test. --- imaspy/test/test_nc_autofill.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/imaspy/test/test_nc_autofill.py b/imaspy/test/test_nc_autofill.py index 01280672..806caa7a 100644 --- a/imaspy/test/test_nc_autofill.py +++ b/imaspy/test/test_nc_autofill.py @@ -22,7 +22,9 @@ def test_nc_latest_dd_autofill_put_get_skip_complex(ids_name, tmp_path): version.parse(netCDF4.__version__) < version.parse("1.7.0"), reason="NetCDF4 versions < 1.7.0 do not support complex numbers", ) -def test_nc_latest_dd_autofill_put_get_with_complex(ids_name, tmp_path): +def test_nc_latest_dd_autofill_put_get_with_complex_older_netCDF4( + ids_name, tmp_path +): with DBEntry(f"{tmp_path}/test-{ids_name}.nc", "x") as entry: ids = entry.factory.new(ids_name) fill_consistent(ids, leave_empty=0.5, skip_complex=False) @@ -43,7 +45,9 @@ def test_nc_latest_dd_autofill_put_get_with_complex(ids_name, tmp_path): version.parse(netCDF4.__version__) >= version.parse("1.7.0"), reason="NetCDF4 versions >= 1.7.0 support complex numbers", ) -def test_nc_latest_dd_autofill_put_get_with_complex(ids_name, tmp_path): +def test_nc_latest_dd_autofill_put_get_with_complex_newer_netCDF4( + ids_name, tmp_path +): with DBEntry(f"{tmp_path}/test-{ids_name}.nc", "x") as entry: ids = entry.factory.new(ids_name) fill_consistent(ids, leave_empty=0.5, skip_complex=False) From 0d48b0b5bf4bcdadb0580219685522f50b57c2cd Mon Sep 17 00:00:00 2001 From: Anushan Fernando Date: Tue, 21 Jan 2025 14:28:48 +0000 Subject: [PATCH 40/42] Update numpy exception version change to 1.25. --- imaspy/test/test_minimal_types.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/imaspy/test/test_minimal_types.py b/imaspy/test/test_minimal_types.py index d4614de5..07a51346 100644 --- a/imaspy/test/test_minimal_types.py +++ b/imaspy/test/test_minimal_types.py @@ -64,7 +64,7 @@ def test_assign_str_1d(minimal, caplog): # Prevent the expected numpy ComplexWarnings from cluttering pytest output @pytest.mark.filterwarnings( "ignore::numpy.ComplexWarning" - if version.parse(np.__version__) < version.parse("2.0.0") + if version.parse(np.__version__) < version.parse("1.25") else "ignore::numpy.exceptions.ComplexWarning" ) @pytest.mark.parametrize("typ, max_dim", [("flt", 6), ("cpx", 6), ("int", 3)]) @@ -94,7 +94,7 @@ def test_assign_numeric_types(minimal, caplog, typ, max_dim): # np allows casting of complex to float or int, but warns: with pytest.warns( np.ComplexWarning - if version.parse(np.__version__) < version.parse("2.0.0") + if version.parse(np.__version__) < version.parse("1.25") else np.exceptions.ComplexWarning ): caplog.clear() From 68c225dde4768c12a99492ab14de191125869944 Mon Sep 17 00:00:00 2001 From: Anushan Fernando Date: Tue, 21 Jan 2025 15:53:59 +0000 Subject: [PATCH 41/42] Fix bug in skip logic of tests. --- imaspy/test/test_nc_autofill.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/imaspy/test/test_nc_autofill.py b/imaspy/test/test_nc_autofill.py index 806caa7a..9bbc0f1e 100644 --- a/imaspy/test/test_nc_autofill.py +++ b/imaspy/test/test_nc_autofill.py @@ -19,7 +19,7 @@ def test_nc_latest_dd_autofill_put_get_skip_complex(ids_name, tmp_path): @pytest.mark.skipif( - version.parse(netCDF4.__version__) < version.parse("1.7.0"), + version.parse(netCDF4.__version__) >= version.parse("1.7.0"), reason="NetCDF4 versions < 1.7.0 do not support complex numbers", ) def test_nc_latest_dd_autofill_put_get_with_complex_older_netCDF4( @@ -42,7 +42,7 @@ def test_nc_latest_dd_autofill_put_get_with_complex_older_netCDF4( @pytest.mark.skipif( - version.parse(netCDF4.__version__) >= version.parse("1.7.0"), + version.parse(netCDF4.__version__) < version.parse("1.7.0"), reason="NetCDF4 versions >= 1.7.0 support complex numbers", ) def test_nc_latest_dd_autofill_put_get_with_complex_newer_netCDF4( From 95d587b0d10ae4554a941c9ead7e193df1755627 Mon Sep 17 00:00:00 2001 From: Olivier Hoenen Date: Wed, 22 Jan 2025 16:57:21 +0100 Subject: [PATCH 42/42] Adding release notes for tag 1.2.0 --- docs/source/changelog.rst | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst index 2601639a..ac0a1571 100644 --- a/docs/source/changelog.rst +++ b/docs/source/changelog.rst @@ -3,6 +3,26 @@ Changelog ========= +What's new in IMASPy 1.2.0 +-------------------------- + +New features and improvements +''''''''''''''''''''''''''''' + +- Add :py:func:`imaspy.DBEntry.get_sample` (requires imas_core >= 5.4.0) +- Improved validation of netCDF files +- Improve compatibility with the UDA backend in imas_core +- Extend the support of netCDF to >= 1.4.1 (without complex numbers) +- Allow running test without imas_core + +Bug fixes +''''''''' + +- Fix a bug when lazy loading multiple IDSs from the same HDF5 DBEntry +- Fix a bug when lazy loading a child quantity that was added in a newer DD version than stored on disk + + + What's new in IMASPy 1.1.1 --------------------------