diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 00000000..df8ba3bd --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,72 @@ +# Contributor Covenant Code of Conduct + +## Our Pledge + +In the interest of fostering an open and welcoming environment, we as +contributors and maintainers pledge to make participation in our project and +our community a harassment-free experience for everyone, regardless of age, body +size, disability, ethnicity, sex characteristics, gender identity and expression, +level of experience, education, socio-economic status, nationality, personal +appearance, race, religion, or sexual identity and orientation. + +## Our Standards + +Examples of behavior that contributes to creating a positive environment +include: + +* Using welcoming and inclusive language +* Being respectful of differing viewpoints and experiences +* Gracefully accepting constructive criticism +* Focusing on what is best for the community +* Showing empathy towards other community members + +Examples of unacceptable behavior by participants include: + +* The use of sexualized language or imagery and unwelcome sexual attention or + advances +* Trolling, insulting/derogatory comments, and personal or political attacks +* Public or private harassment +* Publishing others' private information, such as a physical or electronic + address, without explicit permission +* Other conduct which could reasonably be considered inappropriate in a + professional setting + +## Our Responsibilities + +Project maintainers are responsible for clarifying the standards of acceptable +behavior and are expected to take appropriate and fair corrective action in +response to any instances of unacceptable behavior. + +Project maintainers have the right and responsibility to remove, edit, or +reject comments, commits, code, wiki edits, issues, and other contributions +that are not aligned to this Code of Conduct, or to ban temporarily or +permanently any contributor for other behaviors that they deem inappropriate, +threatening, offensive, or harmful. + +## Scope + +This Code of Conduct applies within all project spaces, and it also applies when +an individual is representing the project or its community in public spaces. +Examples of representing a project or community include using an official +project e-mail address, posting via an official social media account, or acting +as an appointed representative at an online or offline event. Representation of +a project may be further defined and clarified by project maintainers. + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported by contacting the project team at . All +complaints will be reviewed and investigated and will result in a response that +is deemed necessary and appropriate to the circumstances. The project team is +obligated to maintain confidentiality with regard to the reporter of an incident. +Further details of specific enforcement policies may be posted separately. + +Project maintainers who do not follow or enforce the Code of Conduct in good +faith may face temporary or permanent repercussions as determined by other +members of the project's leadership. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, +available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html + diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 00000000..ac28e400 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,45 @@ +# Contributing guidelines + +We welcome any kind of contribution to `imas-python`, +from a simple comment, a question or even a full fledged pull +request. +Please first make sure you read and follow the +[Code of Conduct](CODE_OF_CONDUCT.md). + +## You think you found a bug in the code, or have a question in its use +1. use the [issue search](https://github.com/iterorganization/ +imas-python/issues) to check if someone already created +a similar issue; +2. if not, make a **new issue** to describe your problem or question. +In the case of a bug suspiscion, please try to give all the relevant +information to allow reproducing the error or identifying +its root cause (version of the imas-python, OS and relevant +dependencies, snippet of code); +3. apply relevant labels to the issue. + +## You want to make or ask some change to the code +1. use the [issue search](https://github.com/iterorganization/ +imas-python/issues) to check if someone already proposed +a similar idea/change; +2. if not, create a **new issue** to describe what change you would like to see +implemented and specify it if you intend to work on it yourself or if some help +will be needed; +3. wait until some kind of consensus is reached about your idea being relevant, +at which time the issue will be assigned (to you or someone else who can work on +this topic); +4. if you do the development yourself, fork the repository to your own Github +profile and create your own feature branch off of the latest develop commit. +Make sure to regularly sync your branch with the latest commits from `develop` +(find instructions +[here](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/ +working-with-forks/syncing-a-fork); +5. when your development is ready, create a pull request (find instructions +[here](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/ +proposing-changes-to-your-work-with-pull-requests/ +creating-a-pull-request-from-a-fork)). + + +While we will try to answer questions quickly and to address issues in a timely +manner, it can may sometimes take longer than expected. A friendly ping in the +discussion or the issue thread can help draw attention if you find that it was +stalled. diff --git a/LICENSE.md b/LICENSE.md deleted file mode 100644 index ea4a5d46..00000000 --- a/LICENSE.md +++ /dev/null @@ -1,46 +0,0 @@ -Copyright (c) 2020-2023 ITER Organization, Route de Vinon-sur-Verdon, CS 90 046, - 13067 St-Paul-lez-Durance Cedex, France - -Copyright (c) 2020-2023 Karel Lucas van de Plassche - -Copyright (c) 2020 Dutch Institute for Fundamental Energy Research - -Copyright (c) 2020-2022 Daan van Vugt - -All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, -are permitted provided that the following conditions are met: - -* Use and redistribution, for peaceful purposes only, are granted solely to the - ITER Members (the People's Republic of China, the European Atomic Energy - Community, the Republic of India, Japan, the Republic of Korea, the Russian - Federation, and the United States of America), with the right to sub-license - within their territory for the purpose of fusion research and development. - Organizations, bodies or individuals of non-ITER Members shall seek specific - written permission from the ITER Organization before use or redistribution of - this software. - -* All modifications/derivatives shall be made available to the ITER Organization. - -* Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright notice, this - list of conditions and the following disclaimer in the documentation and/or - other materials provided with the distribution. - -* Neither the name of the ITER Organization nor the names of its contributors - may be used to endorse or promote products derived from this software without - specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE ITER ORGANIZATION OR ITS CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/LICENSE.txt b/LICENSE.txt new file mode 100644 index 00000000..33bb3680 --- /dev/null +++ b/LICENSE.txt @@ -0,0 +1,165 @@ + GNU LESSER GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + + This version of the GNU Lesser General Public License incorporates +the terms and conditions of version 3 of the GNU General Public +License, supplemented by the additional permissions listed below. + + 0. Additional Definitions. + + As used herein, "this License" refers to version 3 of the GNU Lesser +General Public License, and the "GNU GPL" refers to version 3 of the GNU +General Public License. + + "The Library" refers to a covered work governed by this License, +other than an Application or a Combined Work as defined below. + + An "Application" is any work that makes use of an interface provided +by the Library, but which is not otherwise based on the Library. +Defining a subclass of a class defined by the Library is deemed a mode +of using an interface provided by the Library. + + A "Combined Work" is a work produced by combining or linking an +Application with the Library. The particular version of the Library +with which the Combined Work was made is also called the "Linked +Version". + + The "Minimal Corresponding Source" for a Combined Work means the +Corresponding Source for the Combined Work, excluding any source code +for portions of the Combined Work that, considered in isolation, are +based on the Application, and not on the Linked Version. + + The "Corresponding Application Code" for a Combined Work means the +object code and/or source code for the Application, including any data +and utility programs needed for reproducing the Combined Work from the +Application, but excluding the System Libraries of the Combined Work. + + 1. Exception to Section 3 of the GNU GPL. + + You may convey a covered work under sections 3 and 4 of this License +without being bound by section 3 of the GNU GPL. + + 2. Conveying Modified Versions. + + If you modify a copy of the Library, and, in your modifications, a +facility refers to a function or data to be supplied by an Application +that uses the facility (other than as an argument passed when the +facility is invoked), then you may convey a copy of the modified +version: + + a) under this License, provided that you make a good faith effort to + ensure that, in the event an Application does not supply the + function or data, the facility still operates, and performs + whatever part of its purpose remains meaningful, or + + b) under the GNU GPL, with none of the additional permissions of + this License applicable to that copy. + + 3. Object Code Incorporating Material from Library Header Files. + + The object code form of an Application may incorporate material from +a header file that is part of the Library. You may convey such object +code under terms of your choice, provided that, if the incorporated +material is not limited to numerical parameters, data structure +layouts and accessors, or small macros, inline functions and templates +(ten or fewer lines in length), you do both of the following: + + a) Give prominent notice with each copy of the object code that the + Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the object code with a copy of the GNU GPL and this license + document. + + 4. Combined Works. + + You may convey a Combined Work under terms of your choice that, +taken together, effectively do not restrict modification of the +portions of the Library contained in the Combined Work and reverse +engineering for debugging such modifications, if you also do each of +the following: + + a) Give prominent notice with each copy of the Combined Work that + the Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the Combined Work with a copy of the GNU GPL and this license + document. + + c) For a Combined Work that displays copyright notices during + execution, include the copyright notice for the Library among + these notices, as well as a reference directing the user to the + copies of the GNU GPL and this license document. + + d) Do one of the following: + + 0) Convey the Minimal Corresponding Source under the terms of this + License, and the Corresponding Application Code in a form + suitable for, and under terms that permit, the user to + recombine or relink the Application with a modified version of + the Linked Version to produce a modified Combined Work, in the + manner specified by section 6 of the GNU GPL for conveying + Corresponding Source. + + 1) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (a) uses at run time + a copy of the Library already present on the user's computer + system, and (b) will operate properly with a modified version + of the Library that is interface-compatible with the Linked + Version. + + e) Provide Installation Information, but only if you would otherwise + be required to provide such information under section 6 of the + GNU GPL, and only to the extent that such information is + necessary to install and execute a modified version of the + Combined Work produced by recombining or relinking the + Application with a modified version of the Linked Version. (If + you use option 4d0, the Installation Information must accompany + the Minimal Corresponding Source and Corresponding Application + Code. If you use option 4d1, you must provide the Installation + Information in the manner specified by section 6 of the GNU GPL + for conveying Corresponding Source.) + + 5. Combined Libraries. + + You may place library facilities that are a work based on the +Library side by side in a single library together with other library +facilities that are not Applications and are not covered by this +License, and convey such a combined library under terms of your +choice, if you do both of the following: + + a) Accompany the combined library with a copy of the same work based + on the Library, uncombined with any other library facilities, + conveyed under the terms of this License. + + b) Give prominent notice with the combined library that part of it + is a work based on the Library, and explaining where to find the + accompanying uncombined form of the same work. + + 6. Revised Versions of the GNU Lesser General Public License. + + The Free Software Foundation may publish revised and/or new versions +of the GNU Lesser General Public License from time to time. Such new +versions will be similar in spirit to the present version, but may +differ in detail to address new problems or concerns. + + Each version is given a distinguishing version number. If the +Library as you received it specifies that a certain numbered version +of the GNU Lesser General Public License "or any later version" +applies to it, you have the option of following the terms and +conditions either of that published version or of any later version +published by the Free Software Foundation. If the Library as you +received it does not specify a version number of the GNU Lesser +General Public License, you may choose any version of the GNU Lesser +General Public License ever published by the Free Software Foundation. + + If the Library as you received it specifies that a proxy can decide +whether future versions of the GNU Lesser General Public License shall +apply, that proxy's public statement of acceptance of any version is +permanent authorization for you to choose that version for the +Library. diff --git a/README.md b/README.md index ce753f5a..14d4b81e 100644 --- a/README.md +++ b/README.md @@ -1,137 +1,50 @@ -# IMASPy +# IMAS-Python -IMASPy is a pure-python library to handle arbitrarily nested data structures. -IMASPy is designed for, but not necessarily bound to, interacting with -Interface Data Structures (IDSs) as defined by the -Integrated Modelling & Analysis Suite (IMAS) Data Model. +IMAS-Python is a pure-python library to handle arbitrarily nested data structures. +It is designed for, but not necessarily bound to, interacting with Interface +Data Structures (IDSs) as defined by the Integrated Modelling & Analysis Suite (IMAS) +Data Model. -It provides: -* An easy-to-install and easy-to-get started package by - * Not requiring an IMAS installation - * Not strictly requiring matching a Data Dictionary (DD) version -* An pythonic alternative to the IMAS Python High Level Interface (HLI) -* Checking of correctness on assign time, instead of database write time -* Dynamically created in-memory pre-filled data trees from DD XML specifications +## Install -This package is developed on [ITER bitbucket](https://git.iter.org/projects/IMAS/repos/imaspy). -For user support, contact the IMAS team on the [IMAS user slack](https://imasusers.slack.com), -open a [JIRA issue](https://jira.iter.org/projects/IMAS), or email the -support team on . +Install steps are described in the documentation generated from `/docs/source/installing.rst`. -## Installation - -### On ITER system, EuroFusion gateway - -There is a `module` available on ITER and the Gateway, so you can run - -```bash -module load IMASPy -``` - -IMASPy can work with either Access Layer versions 4 or 5 (the used version is -automatically detected when importing the `imaspy` module). IMASPy still works (with -limited functionality) when no IMAS module is loaded. - -### Local - -We recommend using a `venv`: - -```bash -python3 -m venv ./venv -. venv/bin/activate -``` - -Then clone this repository, and run `pip install`: - -```bash -git clone ssh://git@git.iter.org/imas/imaspy.git -cd imaspy -pip install . -# Optional: also install `imas-core` with the HDF5 backend in the venv: -pip install .[imas-core] -``` - -If you get strange errors you might want to upgrade your `setuptools` and `pip`. -(you might want to add the `--user` flag to your pip installs when not in a `venv`) - -### Development installation - -For development an installation in editable mode may be more convenient, and -you will need some extra dependencies to run the test suite and build -documentation. - -```bash -pip install -e .[test,docs] -``` +Documentation is autogenerated from the source using [Sphinx](http://sphinx-doc.org/) +and can be found at the [ITER sharepoint](https://sharepoint.iter.org/departments/POP/CM/IMDesign/Code%20Documentation/IMAS-doc/index.html) -Test your installation by trying +The documentation can be manually generated by installing sphinx and running: ```bash -cd ~ -python -c "import imaspy; print(imaspy.__version__)" +make -C docs html ``` -which should return your just installed version number. - -### Installation without ITER access - -The installation script tries to access the [ITER IMAS Core Data Dictionary repository](https://git.iter.org/projects/IMAS/repos/data-dictionary/browse) -to fetch the latest versions. If you do not have git+ssh access there, you can -try to find this repository elsewhere, and do a `git fetch --tags`. - -Alternatively you could try to obtain an `IDSDef.zip` and place it in `~/.config/imaspy/`. - -Test your installation by trying - -```bash -python -c "import imaspy; factory = imaspy.IDSFactory()" -``` -If the following error is raised: -```bash -RuntimeError: Could not find any data dictionary definitions. -``` -it means that the Data Dictionary definitions weren't created during the install. -You can generate these definitions by executing `build_DD` in the command line. -Missing packages can include among others: [GitPython](https://github.com/gitpython-developers/GitPython), and Java. ## How to use ```python -import imaspy -factory = imaspy.IDSFactory() +import imas +factory = imas.IDSFactory() equilibrium = factory.equilibrium() print(equilibrium) -equilibrium.ids_properties.homogeneous_time = imaspy.ids_defs.IDS_TIME_MODE_HETEROGENEOUS +equilibrium.ids_properties.homogeneous_time = imas.ids_defs.IDS_TIME_MODE_HETEROGENEOUS equilibrium.ids_properties.comment = "testing" -dbentry = imaspy.DBEntry(imaspy.ids_defs.HDF5_BACKEND, "ITER", 1, 1) -dbentry.create() -dbentry.put(equilibrium) - -# TODO: find an example with a significant change between versions (rename?) -older_dbentry = imaspy.DBEntry(imaspy.ids_defs.HDF5_BACKEND, "ITER", 1, 1, version="3.35.0") -equilibrium2 = older_root.get("equilibrium") -print(equilibrium2.ids_properties.comment) +with imas.DBEntry("imas:hdf5?path=./testdb","w") as dbentry: + dbentry.put(equilibrium) ``` -## Documentation - -Documentation is autogenerated from the source using [Sphinx](http://sphinx-doc.org/) -and can be found at the [ITER sharepoint](https://sharepoint.iter.org/departments/POP/CM/IMDesign/Code%20Documentation/IMASPy-doc/index.html) +A quick 5 minutes introduction is available in the documentation generated from `/docs/sources/intro.rst`. -The documentation can be manually generated by installing sphinx and running: -```bash -make -C docs html -``` +## Legal -## Interacting with IMAS AL +IMAS-Python is Copyright 2020-2024 ITER Organization, Copyright 2020-2023 Karel Lucas van de +Plassche , Copyright 2020-2022 Daan van Vugt , +and Copyright 2020 Dutch Institute for Fundamental Energy Research . +It is licensed under [LGPL 3.0](LICENSE.txt). -Interaction with the IMAS AL is provided by a Cython interface to the Access Layer. -As Cython code, it needs to be compiled on your local system. -To find the headers, the Access Layer `include` folder needs to be in your `INCLUDE_PATH`. On most HPC systems, a `module load IMAS` is enough. ## Acknowledgments diff --git a/docs/source/configuring.rst b/docs/source/configuring.rst index 07073faf..dae11b6f 100644 --- a/docs/source/configuring.rst +++ b/docs/source/configuring.rst @@ -29,6 +29,13 @@ This page provides an overview of available variables. you can use :external:py:meth:`logging.getLogger("imaspy").setLevel(...) ` to change the log level programmatically. + +``IMASPY_DISABLE_NC_VALIDATE`` + Disables validation of netCDF files when loading an IDS from an IMAS netCDF file. + + .. caution:: + Disabling the validation may lead to errors when reading data from an IMAS netCDF file. + ``IMAS_VERSION`` Sets :ref:`The default Data Dictionary version` to use. diff --git a/docs/source/index.rst b/docs/source/index.rst index 19e3985b..c5a3f24c 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -77,5 +77,5 @@ Manual LICENSE ------- -.. literalinclude:: ../../LICENSE.md +.. literalinclude:: ../../LICENSE.txt :language: text diff --git a/docs/source/netcdf.rst b/docs/source/netcdf.rst index dd3bf431..7a7593e6 100644 --- a/docs/source/netcdf.rst +++ b/docs/source/netcdf.rst @@ -102,3 +102,11 @@ your directory. Let's open this file with ``xarray.load_dataset``: Attributes: Conventions: IMAS data_dictionary_version: 3.41.0 + + +Validating an IMAS netCDF file +------------------------------ + +IMAS netCDF files can be validated with IMASPy through the command line ``imaspy +validate_nc ``. See also :ref:`IMASPy Command Line tool` or type +``imaspy validate_nc --help`` in a command line. diff --git a/imaspy/backends/db_entry_impl.py b/imaspy/backends/db_entry_impl.py index 8263c3b6..bc8ca10d 100644 --- a/imaspy/backends/db_entry_impl.py +++ b/imaspy/backends/db_entry_impl.py @@ -2,13 +2,40 @@ # You should have received the IMASPy LICENSE file with this project. from abc import ABC, abstractmethod -from typing import Any, List, Optional +from dataclasses import dataclass +from typing import Any, List, Optional, Union + +import numpy from imaspy.ids_convert import NBCPathMap from imaspy.ids_factory import IDSFactory from imaspy.ids_toplevel import IDSToplevel +@dataclass +class GetSliceParameters: + """Helper class to store parameters to get_slice.""" + + time_requested: float + """See :param:`imaspy.db_entry.DBEntry.get_slice.time_requested`.""" + interpolation_method: int + """See :param:`imaspy.db_entry.DBEntry.get_slice.interpolation_method`.""" + + +@dataclass +class GetSampleParameters: + """Helper class to store parameters to get_sample.""" + + tmin: float + """See :param:`imaspy.db_entry.DBEntry.get_sample.tmin`.""" + tmax: float + """See :param:`imaspy.db_entry.DBEntry.get_sample.tmax`.""" + dtime: Optional[numpy.ndarray] + """See :param:`imaspy.db_entry.DBEntry.get_sample.dtime`.""" + interpolation_method: Optional[int] + """See :param:`imaspy.db_entry.DBEntry.get_sample.interpolation_method`.""" + + class DBEntryImpl(ABC): """Interface for DBEntry implementations.""" @@ -47,20 +74,17 @@ def get( self, ids_name: str, occurrence: int, - time_requested: Optional[float], - interpolation_method: int, + parameters: Union[None, GetSliceParameters, GetSampleParameters], destination: IDSToplevel, lazy: bool, nbc_map: Optional[NBCPathMap], ) -> None: - """Implement DBEntry.get()/get_slice(). Load data from the data source. + """Implement DBEntry.get/get_slice/get_sample. Load data from the data source. Args: ids_name: Name of the IDS to load. occurrence: Which occurence of the IDS to load. - time_requested: None for get(), requested time slice for get_slice(). - interpolation_method: Requested interpolation method (ignore when - time_requested is None). + parameters: Additional parameters for a get_slice/get_sample call. destination: IDS object to store data in. lazy: Use lazy loading. nbc_map: NBCPathMap to use for implicit conversion. When None, no implicit diff --git a/imaspy/backends/imas_core/al_context.py b/imaspy/backends/imas_core/al_context.py index 07f37dec..5d782fda 100644 --- a/imaspy/backends/imas_core/al_context.py +++ b/imaspy/backends/imas_core/al_context.py @@ -8,6 +8,8 @@ from contextlib import contextmanager from typing import TYPE_CHECKING, Any, Callable, Iterator, List, Optional, Tuple +import numpy + from imaspy.backends.imas_core.imas_interface import ll_interface from imaspy.exception import LowlevelError from imaspy.ids_defs import ( @@ -105,6 +107,21 @@ def slice_action( raise LowlevelError("slice_action", status) return ALContext(ctx) + def timerange_action( + self, + path: str, + rwmode: int, + tmin: float, + tmax: float, + dtime: Optional[numpy.ndarray], + interpolation_method: int, + ) -> "ALContext": + """Begin a new timerange action for use in a ``with`` context.""" + ctx = ll_interface.begin_timerange_action( + self.ctx, path, rwmode, tmin, tmax, dtime, interpolation_method + ) + return ALContext(ctx) + def arraystruct_action( self, path: str, timebase: str, size: int ) -> "ALArrayStructContext": @@ -299,6 +316,10 @@ def get_context(self) -> ALContext: # from the cache else: + # Purge the cache to close open contexts from other IDSs (IMAS-5603) + cache = self.dbentry._lazy_ctx_cache + while cache: + cache.pop().close() return self.dbentry_ctx @contextmanager @@ -317,6 +338,25 @@ def slice_action( (path, rwmode, time_requested, interpolation_method), ) + @contextmanager + def timerange_action( + self, + path: str, + rwmode: int, + tmin: float, + tmax: float, + dtime: Optional[numpy.ndarray], + interpolation_method: int, + ) -> Iterator["LazyALContext"]: + """Lazily start a lowlevel timerange action, see + :meth:`ALContext.timerange_action`. + """ + yield LazyALContext( + self, + ALContext.timerange_action, + (path, rwmode, tmin, tmax, dtime, interpolation_method), + ) + def arraystruct_action( self, path: str, timebase: str, size: int ) -> "LazyALArrayStructContext": diff --git a/imaspy/backends/imas_core/db_entry_al.py b/imaspy/backends/imas_core/db_entry_al.py index 8bead9ae..34a3ab32 100644 --- a/imaspy/backends/imas_core/db_entry_al.py +++ b/imaspy/backends/imas_core/db_entry_al.py @@ -5,9 +5,10 @@ import logging import os from collections import deque -from typing import Any, Deque, List, Optional +from typing import Any, Deque, List, Optional, Union from urllib.parse import urlparse +from imaspy.backends.db_entry_impl import GetSampleParameters, GetSliceParameters from imaspy.db_entry import DBEntryImpl from imaspy.exception import DataEntryException, LowlevelError from imaspy.ids_convert import NBCPathMap, dd_version_map_from_factories @@ -40,6 +41,7 @@ from .db_entry_helpers import delete_children, get_children, put_children from .imas_interface import LLInterfaceError, has_imas, ll_interface from .mdsplus_model import ensure_data_dir, mdsplus_model_dir +from .uda_support import extract_idsdef, get_dd_version_from_idsdef_xml _BACKEND_NAME = { ASCII_BACKEND: "ascii", @@ -185,6 +187,24 @@ def _setup_backend( pass # nothing to set up elif backend == "uda": + # Set IDSDEF_PATH to point the UDA backend to the selected DD version + idsdef_path = None + + if factory._xml_path is not None: + # Factory was constructed with an explicit XML path, point UDA to that: + idsdef_path = factory._xml_path + + elif "IMAS_PREFIX" in os.environ: + # Check if UDA can use the IDSDef.xml stored in $IMAS_PREFIX/include/ + idsdef_path = os.environ["IMAS_PREFIX"] + "/include/IDSDef.xml" + if get_dd_version_from_idsdef_xml(idsdef_path) != factory.version: + idsdef_path = None + + if idsdef_path is None: + # Extract XML from the DD zip and point UDA to it + idsdef_path = extract_idsdef(factory.version) + + os.environ["IDSDEF_PATH"] = idsdef_path logger.warning( "The UDA backend is not tested with IMASPy and may not work properly. " "Please raise any issues you find." @@ -219,8 +239,7 @@ def get( self, ids_name: str, occurrence: int, - time_requested: Optional[float], - interpolation_method: int, + parameters: Union[None, GetSliceParameters, GetSampleParameters], destination: IDSToplevel, lazy: bool, nbc_map: Optional[NBCPathMap], @@ -248,13 +267,28 @@ def get( else: context = self._db_ctx # Now fill the IDSToplevel - if time_requested is None or destination.metadata.type is IDSType.CONSTANT: + if parameters is None or destination.metadata.type is IDSType.CONSTANT: # called from get(), or when the IDS is constant (see IMAS-3330) manager = context.global_action(ll_path, READ_OP) - else: # get_slice + elif isinstance(parameters, GetSliceParameters): manager = context.slice_action( - ll_path, READ_OP, time_requested, interpolation_method + ll_path, + READ_OP, + parameters.time_requested, + parameters.interpolation_method, ) + elif isinstance(parameters, GetSampleParameters): + manager = context.timerange_action( + ll_path, + READ_OP, + parameters.tmin, + parameters.tmax, + parameters.dtime, + parameters.interpolation_method, + ) + else: + raise TypeError(f"Incorrect type for parameters: {type(parameters)}.") + with manager as read_ctx: if lazy: destination._set_lazy_context(read_ctx) diff --git a/imaspy/backends/imas_core/db_entry_helpers.py b/imaspy/backends/imas_core/db_entry_helpers.py index de1d9323..f69eafd3 100644 --- a/imaspy/backends/imas_core/db_entry_helpers.py +++ b/imaspy/backends/imas_core/db_entry_helpers.py @@ -77,11 +77,15 @@ def get_children( getattr(structure, name)._IDSPrimitive__value = data -def _get_child(child: IDSBase, ctx: LazyALContext): +def _get_child(child: IDSBase, ctx: Optional[LazyALContext]): """Get a single child when required (lazy loading).""" # NOTE: changes in this method must be propagated to _get_children and vice versa # Performance: this method is specialized for the lazy get + # ctx can be None when the parent structure does not exist in the on-disk DD version + if ctx is None: + return # There is no data to be loaded + time_mode = ctx.time_mode if time_mode == IDS_TIME_MODE_INDEPENDENT and child.metadata.type.is_dynamic: return # skip dynamic (time-dependent) nodes diff --git a/imaspy/backends/imas_core/imas_interface.py b/imaspy/backends/imas_core/imas_interface.py index 07f4783e..b92438b1 100644 --- a/imaspy/backends/imas_core/imas_interface.py +++ b/imaspy/backends/imas_core/imas_interface.py @@ -215,6 +215,13 @@ def get_occurrences(self, ctx, ids_name): def get_al_version(self): return self._al_version_str + # New methods added in AL 5.4 + + def begin_timerange_action( + self, ctx, path, rwmode, tmin, tmax, dtime, interpolation_method + ): + raise self._minimal_version("5.4") + # Dummy documentation for interface: for funcname in dir(LowlevelInterface): diff --git a/imaspy/backends/imas_core/uda_support.py b/imaspy/backends/imas_core/uda_support.py new file mode 100644 index 00000000..8b599faa --- /dev/null +++ b/imaspy/backends/imas_core/uda_support.py @@ -0,0 +1,56 @@ +import logging +from pathlib import Path +from typing import Union +from xml.etree import ElementTree as ET + +from imaspy import dd_zip + +from .mdsplus_model import _get_xdg_cache_dir + +logger = logging.getLogger(__name__) + + +def get_dd_version_from_idsdef_xml(path: Union[str, Path]) -> str: + """Parse the IDSDef.xml up to the point where the Data Dictionary version is set. + + Returns: + The Data Dictionary version for the provided file, or None if the file cannot be + parsed / contains no Data Dictionary version. + """ + try: + for _, elem in ET.iterparse(path): + if elem.tag == "version": + return elem.text + except OSError: + pass # File not found, etc. + except Exception: + logger.warning("Could not read DD version from file '%s'.", path, exc_info=True) + return None + + +def extract_idsdef(dd_version: str) -> str: + """Extract the IDSDef.xml for the given version and return its path. + + The IDSDef.xml is extracted to the imaspy cache folder: + + - If the file imaspy/uda/.xml already exists, we assume it is correct + """ + cache_dir_path = Path(_get_xdg_cache_dir()) / "imaspy" / "uda" + cache_dir_path.mkdir(parents=True, exist_ok=True) # ensure cache folder exists + idsdef_path = cache_dir_path / (dd_version + ".xml") + + if idsdef_path.exists(): + extract = False + # Check if the file is fine + if get_dd_version_from_idsdef_xml(idsdef_path) != dd_version: + # File is corrupt, I guess? We'll overwrite: + extract = True + else: + extract = True + + if extract: + # Extract XML from the dd_zip and store + data = dd_zip.get_dd_xml(dd_version) + idsdef_path.write_bytes(data) + + return str(idsdef_path) diff --git a/imaspy/backends/netcdf/db_entry_nc.py b/imaspy/backends/netcdf/db_entry_nc.py index ba7334fc..732eb97d 100644 --- a/imaspy/backends/netcdf/db_entry_nc.py +++ b/imaspy/backends/netcdf/db_entry_nc.py @@ -1,12 +1,16 @@ """DBEntry implementation using NetCDF as a backend.""" import logging -from typing import List +from typing import List, Optional, Union -from imaspy.backends.db_entry_impl import DBEntryImpl +from imaspy.backends.db_entry_impl import ( + DBEntryImpl, + GetSampleParameters, + GetSliceParameters, +) from imaspy.backends.netcdf.ids2nc import IDS2NC -from imaspy.backends.netcdf.nc2ids import nc2ids -from imaspy.exception import DataEntryException +from imaspy.backends.netcdf.nc2ids import NC2IDS +from imaspy.exception import DataEntryException, InvalidNetCDFEntry from imaspy.ids_convert import NBCPathMap, convert_ids from imaspy.ids_factory import IDSFactory from imaspy.ids_toplevel import IDSToplevel @@ -39,20 +43,30 @@ def __init__(self, fname: str, mode: str, factory: IDSFactory) -> None: """NetCDF4 dataset.""" self._factory = factory """Factory (DD version) that the user wishes to use.""" - self._ds_factory = factory # Overwritten if data exists, see below + self._ds_factory = factory # Overwritten if data exists, see _init_dd_version """Factory (DD version) that the data is stored in.""" + try: + self._init_dd_version(fname, mode, factory) + except Exception: + self._dataset.close() + raise + + def _init_dd_version(self, fname: str, mode: str, factory: IDSFactory) -> None: + """Check or setup data dictionary version.""" # Check if there is already data in this dataset: if self._dataset.dimensions or self._dataset.variables or self._dataset.groups: if "data_dictionary_version" not in self._dataset.ncattrs(): - raise RuntimeError( + raise InvalidNetCDFEntry( "Invalid netCDF file: `data_dictionary_version` missing" ) dataset_dd_version = self._dataset.data_dictionary_version if dataset_dd_version != factory.dd_version: self._ds_factory = IDSFactory(dataset_dd_version) - # TODO: [validate] that the data contained in this file adheres to the DD + elif mode not in ["w", "x", "r+", "a"]: + # Reading an empty file... + raise InvalidNetCDFEntry(f"Invalid netCDF file: `{fname}` is empty.") else: # This is an empty netCDF dataset: set global attributes self._dataset.Conventions = "IMAS" @@ -74,15 +88,18 @@ def get( self, ids_name: str, occurrence: int, - time_requested: float | None, - interpolation_method: int, + parameters: Union[None, GetSliceParameters, GetSampleParameters], destination: IDSToplevel, lazy: bool, - nbc_map: NBCPathMap | None, + nbc_map: Optional[NBCPathMap], ) -> None: # Feature compatibility checks - if time_requested is not None: - raise NotImplementedError("`get_slice` is not available for netCDF files.") + if parameters is not None: + if isinstance(parameters, GetSliceParameters): + func = "get_slice" + else: + func = "get_sample" + raise NotImplementedError(f"`{func}` is not available for netCDF files.") if lazy: raise NotImplementedError( "Lazy loading is not implemented for netCDF files." @@ -98,13 +115,13 @@ def get( # Load data into the destination IDS if self._ds_factory.dd_version == destination._dd_version: - nc2ids(group, destination) + NC2IDS(group, destination).run() else: # FIXME: implement automatic conversion using nbc_map # As a work-around: do an explicit conversion, but automatic conversion # will also be needed to implement lazy loading. ids = self._ds_factory.new(ids_name) - nc2ids(group, ids) + NC2IDS(group, ids).run() convert_ids(ids, None, target=destination) return destination diff --git a/imaspy/backends/netcdf/ids2nc.py b/imaspy/backends/netcdf/ids2nc.py index 9fad4044..34e63101 100644 --- a/imaspy/backends/netcdf/ids2nc.py +++ b/imaspy/backends/netcdf/ids2nc.py @@ -23,10 +23,10 @@ IDSDataType.CPX: netCDF4.default_fillvals["f8"] * (1 + 1j), } dtypes = { - IDSDataType.INT: numpy.int32, + IDSDataType.INT: numpy.dtype(numpy.int32), IDSDataType.STR: str, - IDSDataType.FLT: numpy.float64, - IDSDataType.CPX: numpy.complex128, + IDSDataType.FLT: numpy.dtype(numpy.float64), + IDSDataType.CPX: numpy.dtype(numpy.complex128), } SHAPE_DTYPE = numpy.int32 @@ -188,7 +188,7 @@ def create_variables(self) -> None: kwargs = {} if dtype is not str: # Enable compression: kwargs.update(compression="zlib", complevel=1) - if dtype is not numpy.complex128: # Set fillvalue + if dtype is not dtypes[IDSDataType.CPX]: # Set fillvalue kwargs.update(fill_value=default_fillvals[metadata.data_type]) # Create variable dimensions = get_dimensions(path, self.homogeneous_time) diff --git a/imaspy/backends/netcdf/nc2ids.py b/imaspy/backends/netcdf/nc2ids.py index d071a3ba..50905ba8 100644 --- a/imaspy/backends/netcdf/nc2ids.py +++ b/imaspy/backends/netcdf/nc2ids.py @@ -1,8 +1,12 @@ -from typing import Iterator, List, Tuple +import logging +import os +from typing import Iterator, List, Optional, Tuple import netCDF4 +from imaspy.backends.netcdf import ids2nc from imaspy.backends.netcdf.nc_metadata import NCMetadata +from imaspy.exception import InvalidNetCDFEntry from imaspy.ids_base import IDSBase from imaspy.ids_data_type import IDSDataType from imaspy.ids_defs import IDS_TIME_MODE_HOMOGENEOUS @@ -10,6 +14,15 @@ from imaspy.ids_structure import IDSStructure from imaspy.ids_toplevel import IDSToplevel +logger = logging.getLogger(__name__) + + +def variable_error(var, issue, value, expected=None) -> InvalidNetCDFEntry: + return InvalidNetCDFEntry( + f"Variable `{var.name}` has incorrect {issue}: `{value}`." + + (f" Was expecting `{expected}`." if expected is not None else "") + ) + def split_on_aos(metadata: IDSMetadata): paths = [] @@ -54,82 +67,236 @@ def _tree_iter( yield from _tree_iter(node, paths, curindex + (i,)) -def nc2ids(group: netCDF4.Group, ids: IDSToplevel): - """Get data from the netCDF group and store it in the provided IDS.""" - try: - _nc2ids(group, ids) - except Exception as exc: - raise RuntimeError( - "An error occurred while reading data from the netCDF file " - f"'{group.filepath()}'. The netCDF functionality is currently in " - "preview status. Unexpected data in an otherwise valid netCDF file " - "may cause errors in IMASPy. A more robust mechanism to load IDS data from " - "netCDF files will be included in the next release of IMASPy." - ) from exc - - -def _nc2ids(group: netCDF4.Group, ids: IDSToplevel): - var_names = list(group.variables) - # FIXME: ensure that var_names are sorted properly - # Current assumption is that creation-order is fine - homogeneous_time = ( - group["ids_properties.homogeneous_time"][()] == IDS_TIME_MODE_HOMOGENEOUS - ) - ncmeta = NCMetadata(ids.metadata) +class NC2IDS: + """Class responsible for reading an IDS from a NetCDF group.""" + + def __init__(self, group: netCDF4.Group, ids: IDSToplevel) -> None: + """Initialize NC2IDS converter. + + Args: + group: NetCDF group that stores the IDS data. + ids: Corresponding IDS toplevel to store the data in. + """ + self.group = group + """NetCDF Group that the IDS is stored in.""" + self.ids = ids + """IDS to store the data in.""" - # Never return masked arrays, they're slow and we'll handle most of the unset values - # through the `:shape` arrays - group.set_auto_mask(False) + self.ncmeta = NCMetadata(ids.metadata) + """NetCDF related metadata.""" + self.variables = list(group.variables) + """List of variable names stored in the netCDF group.""" + # Don't use masked arrays: they're slow and we'll handle most of the unset + # values through the `:shape` arrays + self.group.set_auto_mask(False) - for var_name in var_names: - if var_name.endswith(":shape"): - continue # TODO: validate that this is used + # Validate and get value of ids_properties.homogeneous_time + self.homogeneous_time = True # Must be initialized for self._validate_variable + """True iff the IDS time mode is homogeneous.""" - # FIXME: error handling: - metadata = ids.metadata[var_name] + if "ids_properties.homogeneous_time" not in self.variables: + raise InvalidNetCDFEntry( + "Mandatory variable `ids_properties.homogeneous_time` does not exist." + ) + var = group["ids_properties.homogeneous_time"] + self._validate_variable(var, ids.ids_properties.homogeneous_time.metadata) + if var[()] not in [0, 1, 2]: + raise InvalidNetCDFEntry( + f"Invalid value for ids_properties.homogeneous_time: {var[()]}. " + "Was expecting: 0, 1 or 2." + ) + self.homogeneous_time = var[()] == IDS_TIME_MODE_HOMOGENEOUS - # TODO: validate metadata (data type, units, etc.) conforms to DD + def run(self) -> None: + """Load the data from the netCDF group into the IDS.""" + self.variables.sort() + self.validate_variables() + for var_name in self.variables: + if var_name.endswith(":shape"): + continue + metadata = self.ids.metadata[var_name] - if metadata.data_type is IDSDataType.STRUCTURE: - continue # This only contains DD metadata we already know + if metadata.data_type is IDSDataType.STRUCTURE: + continue # This only contains DD metadata we already know + + var = self.group[var_name] + if metadata.data_type is IDSDataType.STRUCT_ARRAY: + if "sparse" in var.ncattrs(): + shapes = self.group[var_name + ":shape"][()] + for index, node in tree_iter(self.ids, metadata): + node.resize(shapes[index][0]) + + else: + # FIXME: extract dimension name from nc file? + dim = self.ncmeta.get_dimensions( + metadata.path_string, self.homogeneous_time + )[-1] + size = self.group.dimensions[dim].size + for _, node in tree_iter(self.ids, metadata): + node.resize(size) + + continue + + # FIXME: this may be a gigantic array, not required for sparse data + var = self.group[var_name] + data = var[()] - var = group[var_name] - if metadata.data_type is IDSDataType.STRUCT_ARRAY: if "sparse" in var.ncattrs(): - shapes = group[var_name + ":shape"][()] - for index, node in tree_iter(ids, metadata): - node.resize(shapes[index][0]) + if metadata.ndim: + shapes = self.group[var_name + ":shape"][()] + for index, node in tree_iter(self.ids, metadata): + shape = shapes[index] + if shape.all(): + node.value = data[index + tuple(map(slice, shapes[index]))] + else: + for index, node in tree_iter(self.ids, metadata): + value = data[index] + if value != getattr(var, "_FillValue", None): + node.value = data[index] + + elif metadata.path_string not in self.ncmeta.aos: + # Shortcut for assigning untensorized data + self.ids[metadata.path] = data else: - # FIXME: extract dimension name from nc file? - dim = ncmeta.get_dimensions(metadata.path_string, homogeneous_time)[-1] - size = group.dimensions[dim].size - for _, node in tree_iter(ids, metadata): - node.resize(size) - - continue - - # FIXME: this may be a gigantic array, not required for sparse data - var = group[var_name] - data = var[()] - - if "sparse" in var.ncattrs(): - if metadata.ndim: - shapes = group[var_name + ":shape"][()] - for index, node in tree_iter(ids, metadata): - shape = shapes[index] - if shape.all(): - node.value = data[index + tuple(map(slice, shapes[index]))] - else: - for index, node in tree_iter(ids, metadata): - value = data[index] - if value != getattr(var, "_FillValue", None): - node.value = data[index] + for index, node in tree_iter(self.ids, metadata): + node.value = data[index] - elif metadata.path_string not in ncmeta.aos: - # Shortcut for assigning untensorized data - ids[metadata.path] = data + def validate_variables(self) -> None: + """Validate that all variables in the netCDF Group exist and match the DD.""" + disable_validate = os.environ.get("IMASPY_DISABLE_NC_VALIDATE") + if disable_validate and disable_validate != "0": + logger.info( + "NetCDF file validation disabled: " + "This may lead to errors when reading data!" + ) + return # validation checks are disabled - else: - for index, node in tree_iter(ids, metadata): - node.value = data[index] + for var_name in self.variables: + if var_name.endswith(":shape"): + # Check that there is a corresponding variable + data_var = var_name.rpartition(":shape")[0] + if data_var not in self.variables: + raise InvalidNetCDFEntry( + f"Invalid netCDF variable: {var_name}. " + f"Shape information provided for non-existing {data_var}." + ) + # Corresponding variable must be sparse + if "sparse" not in self.group[data_var].ncattrs(): + raise InvalidNetCDFEntry( + f"Shape information provided for {data_var}, but this variable " + "is not sparse." + ) + # That's all for :shape arrays here, rest is checked in + # _validate_variable (which defers to _validate_sparsity) + continue + + # Check that the DD defines this variable, and validate its metadata + var = self.group[var_name] + try: + metadata = self.ids.metadata[var_name] + except KeyError: + raise InvalidNetCDFEntry( + f"Invalid variable {var_name}: no such variable exists in the " + f"{self.ids.metadata.name} IDS." + ) + self._validate_variable(var, metadata) + + def _validate_variable(self, var: netCDF4.Variable, metadata: IDSMetadata) -> None: + """Validate that the variable has correct metadata, raise an exception if not. + + Args: + var: NetCDF variable + metadata: IDSMetadata of the corresponding IDS object + """ + attrs: dict = vars(var).copy() + attrs.pop("_FillValue", None) + if metadata.data_type not in [IDSDataType.STRUCTURE, IDSDataType.STRUCT_ARRAY]: + # Data type + expected_dtype = ids2nc.dtypes[metadata.data_type] + if var.dtype != expected_dtype: + raise variable_error(var, "data type", var.dtype, expected_dtype) + + # Dimensions + expected_dims = self.ncmeta.get_dimensions( + metadata.path_string, self.homogeneous_time + ) + if var.dimensions != expected_dims: + raise variable_error(var, "dimensions", var.dimensions, expected_dims) + + # Coordinates + coordinates = str(attrs.pop("coordinates", "")) + expected_coordinates = self.ncmeta.get_coordinates( + metadata.path_string, self.homogeneous_time + ) + if any(coord not in expected_coordinates for coord in coordinates.split()): + raise variable_error( + var, "coordinates", coordinates, " ".join(expected_coordinates) + ) + + # Ancillary variables + ancvar = attrs.pop("ancillary_variables", None) + if ancvar: + allowed_ancvar = [f"{var.name}_error_upper", f"{var.name}_error_lower"] + if any(var not in allowed_ancvar for var in ancvar.split()): + raise variable_error( + var, "ancillary_variables", ancvar, " ".join(allowed_ancvar) + ) + + # Units + units = attrs.pop("units", None) + if metadata.units and metadata.units != units: + raise variable_error(var, "units", units, metadata.units) + + # Sparse + sparse = attrs.pop("sparse", None) + if sparse is not None: + shape_name = f"{var.name}:shape" + shape_var = self.group[shape_name] if shape_name in self.variables else None + self._validate_sparsity(var, shape_var, metadata) + + # Documentation + doc = attrs.pop("documentation", None) + if metadata.documentation != doc: + logger.warning("Documentation of variable %s differs from the DD", var.name) + + # Unknown attrs + if attrs: + raise variable_error(var, "attributes", list(attrs.keys())) + + def _validate_sparsity( + self, + var: netCDF4.Variable, + shape_var: Optional[netCDF4.Variable], + metadata: IDSMetadata, + ) -> None: + """Validate that the variable has correct sparsity. + + Args: + var: Variable with a "sparse" attribute + shape_var: Corresponding shape array (if it exists in the NC group) + metadata: IDSMetadata of the corresponding IDS object + """ + if metadata.ndim == 0: + return # Sparsity is stored with _Fillvalue, nothing to validate + + # Dimensions + aos_dimensions = self.ncmeta.get_dimensions( + self.ncmeta.aos.get(metadata.path_string), self.homogeneous_time + ) + shape_dimensions = shape_var.dimensions + if ( + len(shape_dimensions) != len(aos_dimensions) + 1 + or shape_dimensions[:-1] != aos_dimensions + or self.group.dimensions[shape_dimensions[-1]].size != metadata.ndim + ): + expected_dims = aos_dimensions + (f"{metadata.ndim}D",) + raise variable_error( + shape_var, "dimensions", shape_dimensions, expected_dims + ) + + # Data type + if shape_var.dtype.kind not in "ui": # should be (un)signed integer + raise variable_error( + shape_var, "dtype", shape_var.dtype, "any integer type" + ) diff --git a/imaspy/backends/netcdf/nc_validate.py b/imaspy/backends/netcdf/nc_validate.py new file mode 100644 index 00000000..49a14283 --- /dev/null +++ b/imaspy/backends/netcdf/nc_validate.py @@ -0,0 +1,53 @@ +from imaspy.backends.netcdf.db_entry_nc import NCDBEntryImpl +from imaspy.backends.netcdf.nc2ids import NC2IDS +from imaspy.db_entry import DBEntry +from imaspy.exception import InvalidNetCDFEntry + + +def validate_netcdf_file(filename: str) -> None: + """Validate if the provided netCDF file adheres to the IMAS conventions.""" + if not filename.endswith(".nc"): + raise InvalidNetCDFEntry( + f"Invalid filename `{filename}` provided: " + "an IMAS netCDF file should end with `.nc`" + ) + + with DBEntry(filename, "r") as entry: + entry_impl: NCDBEntryImpl = entry._dbe_impl + dataset = entry_impl._dataset + factory = entry_impl._ds_factory + + ids_names = factory.ids_names() + + # Check that groups in the dataset correspond to an IDS/occurrence and no + # additional variables are smuggled inside: + groups = [dataset] + [dataset[group] for group in dataset.groups] + for group in groups: + if group.variables or group.dimensions: + raise InvalidNetCDFEntry( + "NetCDF file should not have variables or dimensions in the " + f"{group.name} group." + ) + if group is dataset: + continue + if group.name not in ids_names: + raise InvalidNetCDFEntry( + f"Invalid group name {group.name}: there is no IDS with this name." + ) + for subgroup in group.groups: + try: + int(subgroup) + except ValueError: + raise InvalidNetCDFEntry( + f"Invalid group name {group.name}/{subgroup}: " + f"{subgroup} is not a valid occurrence number." + ) + + for ids_name in ids_names: + for occurrence in entry.list_all_occurrences(ids_name): + group = dataset[f"{ids_name}/{occurrence}"] + try: + NC2IDS(group, factory.new(ids_name)).validate_variables() + except InvalidNetCDFEntry as exc: + occ = f":{occurrence}" if occurrence else "" + raise InvalidNetCDFEntry(f"Invalid IDS {ids_name}{occ}: {exc}") diff --git a/imaspy/command/cli.py b/imaspy/command/cli.py index 246922ce..f894f02d 100644 --- a/imaspy/command/cli.py +++ b/imaspy/command/cli.py @@ -218,5 +218,20 @@ def convert_ids( console.Console().print(timer.get_table("Time required per IDS")) +@cli.command("validate_nc", no_args_is_help=True) +@click.argument("filename", type=click.Path(exists=True, dir_okay=False)) +def validate_nc(filename): + """Validate if the provided netCDF file adheres to the IMAS conventions.""" + from imaspy.backends.netcdf.nc_validate import validate_netcdf_file + + try: + validate_netcdf_file(filename) + except Exception as exc: + click.echo(f"File `{filename}` does not adhere to the IMAS conventions:") + click.echo(exc) + sys.exit(1) + click.echo(f"File `{filename}` is a valid IMAS netCDF file.") + + if __name__ == "__main__": cli() diff --git a/imaspy/db_entry.py b/imaspy/db_entry.py index 69956e95..3834655d 100644 --- a/imaspy/db_entry.py +++ b/imaspy/db_entry.py @@ -5,10 +5,16 @@ import logging import os -from typing import Any, List, Optional, Tuple, Type, overload +from typing import Any, List, Optional, Tuple, Type, Union, overload + +import numpy import imaspy -from imaspy.backends.db_entry_impl import DBEntryImpl +from imaspy.backends.db_entry_impl import ( + DBEntryImpl, + GetSampleParameters, + GetSliceParameters, +) from imaspy.dd_zip import dd_xml_versions from imaspy.exception import IDSNameError, UnknownDDVersion, ValidationError from imaspy.ids_base import IDSBase @@ -349,7 +355,6 @@ def get( ids_name, occurrence, None, - 0, destination, lazy, autoconvert, @@ -418,8 +423,121 @@ def get_slice( return self._get( ids_name, occurrence, - time_requested, - interpolation_method, + GetSliceParameters(time_requested, interpolation_method), + destination, + lazy, + autoconvert, + ignore_unknown_dd_version, + ) + + def get_sample( + self, + ids_name: str, + tmin: float, + tmax: float, + dtime: Optional[Union[float, numpy.ndarray]] = None, + interpolation_method: Optional[int] = None, + occurrence: int = 0, + *, + lazy: bool = False, + autoconvert: bool = True, + ignore_unknown_dd_version: bool = False, + destination: Optional[IDSToplevel] = None, + ) -> IDSToplevel: + """Read a range of time slices from an IDS in this Database Entry. + + This method has three different modes, depending on the provided arguments: + + 1. No interpolation. This method is selected when :param:`dtime` and + :param:`interpolation_method` are not provided. + + This mode returns an IDS object with all constant/static data filled. The + dynamic data is retrieved for the provided time range [tmin, tmax]. + + 2. Interpolate dynamic data on a uniform time base. This method is selected + when :param:`dtime` and :param:`interpolation_method` are provided. + :param:`dtime` must be a number or a numpy array of size 1. + + This mode will generate an IDS with a homogeneous time vector ``[tmin, tmin + + dtime, tmin + 2*dtime, ...`` up to ``tmax``. The chosen interpolation + method will have no effect on the time vector, but may have an impact on the + other dynamic values. The returned IDS always has + ``ids_properties.homogeneous_time = 1``. + + 3. Interpolate dynamic data on an explicit time base. This method is selected + when :param:`dtime` and :param:`interpolation_method` are provided. + :param:`dtime` must be a numpy array of size larger than 1. + + This mode will generate an IDS with a homogeneous time vector equal to + :param:`dtime`. :param:`tmin` and :param:`tmax` are ignored in this mode. + The chosen interpolation method will have no effect on the time vector, but + may have an impact on the other dynamic values. The returned IDS always has + ``ids_properties.homogeneous_time = 1``. + + Args: + ids_name: Name of the IDS to read from the backend + tmin: Lower bound of the requested time range + tmax: Upper bound of the requested time range, must be larger than or + equal to :param:`tmin` + dtime: Interval to use when interpolating, must be positive, or numpy array + containing an explicit time base to interpolate. + interpolation_method: Interpolation method to use. Available options: + + - :const:`~imaspy.ids_defs.CLOSEST_INTERP` + - :const:`~imaspy.ids_defs.PREVIOUS_INTERP` + - :const:`~imaspy.ids_defs.LINEAR_INTERP` + + occurrence: Which occurrence of the IDS to read. + + Keyword Args: + lazy: When set to ``True``, values in this IDS will be retrieved only when + needed (instead of getting the full IDS immediately). See :ref:`Lazy + loading` for more details. + autoconvert: Automatically convert IDSs. + + If enabled (default), a call to ``get_sample()`` will return + an IDS from the Data Dictionary version attached to this Data Entry. + Data is automatically converted between the on-disk version and the + in-memory version. + + When set to ``False``, the IDS will be returned in the DD version it was + stored in. + ignore_unknown_dd_version: When an IDS is stored with an unknown DD version, + do not attempt automatic conversion and fetch the data in the Data + Dictionary version attached to this Data Entry. + destination: Populate this IDSToplevel instead of creating an empty one. + + Returns: + The loaded IDS. + + Example: + .. code-block:: python + + import imaspy + import numpy + from imaspy import ids_defs + + imas_entry = imaspy.DBEntry( + "imas:mdsplus?user=public;pulse=131024;run=41;database=ITER", "r") + + # All time slices between t=200 and t=370 + core_profiles = imas_entry.get_sample("core_profiles", 200, 370) + + # Closest points to [0, 100, 200, ..., 1000] + core_profiles_interp = imas_entry.get_sample( + "core_profiles", 0, 1000, 100, ids_defs.CLOSEST_INTERP) + + # Linear interpolation for [10, 11, 12, 14, 16, 20, 30, 40, 50] + times = numpy.array([10, 11, 12, 14, 16, 20, 30, 40, 50]) + core_profiles_interp = imas_entry.get_sample( + "core_profiles", 0, 0, times, ids_defs.LINEAR_INTERP) + """ + if dtime is not None: + dtime = numpy.atleast_1d(dtime) # Convert floats and 0D arrays to 1D array + return self._get( + ids_name, + occurrence, + GetSampleParameters(tmin, tmax, dtime, interpolation_method), destination, lazy, autoconvert, @@ -430,8 +548,7 @@ def _get( self, ids_name: str, occurrence: int, - time_requested: Optional[float], - interpolation_method: int, + parameters: Union[None, GetSliceParameters, GetSampleParameters], destination: Optional[IDSToplevel], lazy: bool, autoconvert: bool, @@ -504,8 +621,7 @@ def _get( return self._dbe_impl.get( ids_name, occurrence, - time_requested, - interpolation_method, + parameters, destination, lazy, nbc_map, diff --git a/imaspy/dd_helpers.py b/imaspy/dd_helpers.py index 0506482f..21a7775f 100644 --- a/imaspy/dd_helpers.py +++ b/imaspy/dd_helpers.py @@ -58,9 +58,14 @@ def prepare_data_dictionaries(): dd_zip.write(filename, arcname=arcname) # Include identifiers from latest tag in zip file repo.git.checkout(newest_version_and_tag[1], force=True) + # DD layout <= 4.0.0 for filename in Path("data-dictionary").glob("*/*identifier.xml"): arcname = Path("identifiers").joinpath(*filename.parts[1:]) dd_zip.write(filename, arcname=arcname) + # DD layout > 4.0.0 + for filename in Path("data-dictionary").glob("schemas/*/*identifier.xml"): + arcname = Path("identifiers").joinpath(*filename.parts[2:]) + dd_zip.write(filename, arcname=arcname) # pre 3.30.0 versions of the DD have the `saxon9he.jar` file path hardcoded diff --git a/imaspy/exception.py b/imaspy/exception.py index 8377d13b..550ce2ed 100644 --- a/imaspy/exception.py +++ b/imaspy/exception.py @@ -101,3 +101,7 @@ def __init__(self, node, dimension, expected_size, coor_path): super().__init__( f"Element `{node._path}` has incorrect shape {node.shape}: {details}" ) + + +class InvalidNetCDFEntry(Exception): + """Error raised when loading an IDS from a NetCDF file that fails validation.""" diff --git a/imaspy/test/test_get_sample.py b/imaspy/test/test_get_sample.py new file mode 100644 index 00000000..0f5fed3e --- /dev/null +++ b/imaspy/test/test_get_sample.py @@ -0,0 +1,388 @@ +import numpy as np +import pytest + +import imaspy +from imaspy.backends.imas_core.imas_interface import lowlevel +from imaspy.exception import DataEntryException +from imaspy.ids_defs import ( + CLOSEST_INTERP, + HDF5_BACKEND, + IDS_TIME_MODE_HETEROGENEOUS, + IDS_TIME_MODE_HOMOGENEOUS, + LINEAR_INTERP, + MDSPLUS_BACKEND, + PREVIOUS_INTERP, +) + + +@pytest.fixture() +def test_db_uri(backend, worker_id, tmp_path_factory): + # Check if begin_timerange_action is available in imas_core + if not hasattr(lowlevel, "al_begin_timerange_action"): + pytest.skip("imas_core version doesn't support begin_timerange_action.") + + # TODO: add MDSPLUS_BACKEND once implemented, see IMAS-5593 + if backend not in [HDF5_BACKEND]: + pytest.skip("Backend doesn't support time range operations.") + + tmp_path = tmp_path_factory.mktemp(f"testdb.{worker_id}") + backend_str = {HDF5_BACKEND: "hdf5", MDSPLUS_BACKEND: "mdsplus"}[backend] + uri = f"imas:{backend_str}?path={tmp_path}" + entry = imaspy.DBEntry(uri, "x", dd_version="4.0.0") + + # Homogeneous core profiles: + cp = entry.factory.core_profiles() + cp.ids_properties.homogeneous_time = IDS_TIME_MODE_HOMOGENEOUS + N_time = 32 + cp.time = np.linspace(0, 1, N_time) + cp.profiles_1d.resize(N_time) + for i in range(N_time): + # FLT_1D: + cp.profiles_1d[i].grid.rho_tor_norm = np.array([0.0, 1.0]) + cp.profiles_1d[i].t_i_average = np.array([2.0, 1.0]) * (i + 1) + cp.profiles_1d[i].ion.resize(1) + # STR_0D: + cp.profiles_1d[i].ion[0].name = "D" + # FLT_0D + cp.profiles_1d[i].ion[0].z_ion = 1.0 + cp.profiles_1d[i].ion[0].temperature = cp.profiles_1d[i].t_i_average + # INT_0D + cp.profiles_1d[i].ion[0].temperature_validity = 0 + cp.global_quantities.ip = (2 - cp.time) ** 0.5 + entry.put(cp) + + # Inhomogeneous equilibrium + eq = entry.factory.equilibrium() + eq.ids_properties.homogeneous_time = IDS_TIME_MODE_HETEROGENEOUS + eq.time = np.linspace(0, 2, 512) + # GGD Grid with 1 time slice + eq.grids_ggd.resize(1) + eq.grids_ggd[0].time = 0.0 + eq.grids_ggd[0].grid.resize(1) + eq.grids_ggd[0].grid[0].path = "wall:0/description_ggd(1)/grid_ggd" + # multiple time slices with data + N_time = 6 + eq.time_slice.resize(N_time) + for i in range(N_time): + # FLT_0D + eq.time_slice[i].time = i / 5.0 + eq.time_slice[i].profiles_2d.resize(1) + # FLT_1D + eq.time_slice[i].profiles_2d[0].grid.dim1 = np.array([0.0, 1.0]) + eq.time_slice[i].profiles_2d[0].grid.dim2 = np.array([3.0, 4.0]) + # STR_0D + eq.time_slice[i].profiles_2d[0].grid_type.name = f"test {i}" + eq.time_slice[i].profiles_2d[0].grid_type.description = "test description" + # INT_0D + eq.time_slice[i].profiles_2d[0].grid_type.index = -1 + # FLT_2D + eq.time_slice[i].profiles_2d[0].r = np.array([[0.0, 0.0], [1.0, 1.0]]) + eq.time_slice[i].profiles_2d[0].z = np.array([[3.0, 4.0], [3.0, 4.0]]) + eq.time_slice[i].profiles_2d[0].psi = ( + eq.time_slice[i].profiles_2d[0].r - eq.time_slice[i].profiles_2d[0].z + ) * (1 + eq.time_slice[i].time) ** 2 + entry.put(eq) + + # Equilibrium only has dynamic AOS and no other non-homogenous time nodes + # Use magnetics to test that case: + mag = entry.factory.magnetics() + mag.ids_properties.homogeneous_time = IDS_TIME_MODE_HETEROGENEOUS + mag.time = np.array([0.0]) + mag.flux_loop.resize(3) + for i in range(3): + mag.flux_loop[i].flux.time = np.linspace(0.0123, 1, 5 + i) + mag.flux_loop[i].flux.data = 2 + 2 * mag.flux_loop[i].flux.time + mag.flux_loop[i].voltage.time = np.linspace(0.0123, 1, 8 + i) + mag.flux_loop[i].voltage.data = 2 - 5 * mag.flux_loop[i].voltage.time + entry.put(mag) + + entry.close() + return uri + + +@pytest.fixture() +def entry(test_db_uri): + return imaspy.DBEntry(test_db_uri, "r", dd_version="4.0.0") + + +def test_invalid_arguments(entry): + with pytest.raises(ValueError): + entry.get_sample("core_profiles", 0.3, 0.2) # tmin > tmax + with pytest.raises(DataEntryException): + entry.get_sample("core_profiles", 0.1, 0.2, occurrence="invalid") + with pytest.raises(ValueError): + entry.get_sample("core_profiles", 0.1, 0.2, 0.05) # no interpolation method + + +def test_get_sample_homogeneous(entry): + cp = entry.get_sample("core_profiles", 0.3, 14 / 31) + assert np.array_equal(cp.time, np.linspace(0, 1, 32)[10:15]) + + for i, p1d in enumerate(cp.profiles_1d): + assert np.array_equal(p1d.grid.rho_tor_norm, [0.0, 1.0]) + assert np.array_equal(p1d.t_i_average, np.array([2.0, 1.0]) * (i + 11)) + assert len(p1d.ion) == 1 + assert p1d.ion[0].name == "D" + assert p1d.ion[0].z_ion == 1 + assert np.array_equal(p1d.ion[0].temperature, p1d.t_i_average) + assert p1d.ion[0].temperature_validity == 0 + + assert np.array_equal(cp.global_quantities.ip, (2 - cp.time) ** 0.5) + + +def test_get_sample_heterogeneous(entry): + eq = entry.get_sample("equilibrium", -1.0, 0.2) + # Main time array + assert np.array_equal(eq.time, np.linspace(0, 2, 512)[:52]) + # grids_ggd AoS + assert len(eq.grids_ggd) == 1 + assert eq.grids_ggd[0].time == 0.0 + assert eq.grids_ggd[0].grid[0].path == "wall:0/description_ggd(1)/grid_ggd" + # time_slice AoS + assert len(eq.time_slice) == 2 + assert eq.time_slice[0].time == 0.0 + assert eq.time_slice[1].time == 0.2 + + for i in range(2): + p2d = eq.time_slice[i].profiles_2d[0] + assert np.array_equal(p2d.grid.dim1, [0.0, 1.0]) + assert np.array_equal(p2d.grid.dim2, [3.0, 4.0]) + assert p2d.grid_type.name == f"test {i}" + assert p2d.grid_type.index == -1 + assert np.array_equal(p2d.r, [[0.0, 0.0], [1.0, 1.0]]) + assert np.array_equal(p2d.z, [[3.0, 4.0], [3.0, 4.0]]) + expected_psi = (p2d.r - p2d.z) * (1 + eq.time_slice[i].time) ** 2 + assert np.array_equal(p2d.psi, expected_psi) + + mag = entry.get_sample("magnetics", 0.25, 0.75) + assert mag.ids_properties.homogeneous_time == IDS_TIME_MODE_HETEROGENEOUS + assert len(mag.time) == 0 + assert len(mag.flux_loop) == 3 + for i in range(3): + fl = mag.flux_loop[i] + + flux_time = np.linspace(0.0123, 1, 5 + i) + flux_time = flux_time[0.25 <= flux_time] + flux_time = flux_time[flux_time <= 0.75] + assert np.array_equal(fl.flux.time, flux_time) + assert np.array_equal(fl.flux.data, 2 + 2 * flux_time) + + voltage_time = np.linspace(0.0123, 1, 8 + i) + voltage_time = voltage_time[0.25 <= voltage_time] + voltage_time = voltage_time[voltage_time <= 0.75] + assert np.array_equal(fl.voltage.time, voltage_time) + assert np.array_equal(fl.voltage.data, 2 - 5 * voltage_time) + + +def test_get_sample_homogeneous_linear_interp(entry): + # Note requesting 0.401 and not 0.4, since + # (0.3 + 0.02 + 0.02 + 0.02 + 0.02 + 0.02) = 0.4 + 5e-17 + cp = entry.get_sample("core_profiles", 0.3, 0.401, 0.02, LINEAR_INTERP) + assert np.allclose(cp.time, np.linspace(0.3, 0.4, 6), rtol=1e-14, atol=0) + + assert len(cp.profiles_1d) == 6 + # Check some interpolated values + for i in range(6): + # Check rho_tor_norm + rho_tor_norm = cp.profiles_1d[i].grid.rho_tor_norm + assert np.array_equal(rho_tor_norm, np.array([0.0, 1.0])) + # Check t_i_average + expected = np.array([2.0, 1.0]) * (1 + 31 * cp.time[i]) + t_i_average = cp.profiles_1d[i].t_i_average + assert np.allclose(t_i_average, expected, rtol=1e-14, atol=0) + + +def test_get_sample_homogeneous_explicit_timebase(entry): + times = [0.1, 0.2345, 0.5, np.sqrt(2) / 2] + cp = entry.get_sample("core_profiles", 0, 0, times, LINEAR_INTERP) + assert np.allclose(cp.time, times, rtol=1e-14, atol=0) + + assert len(cp.profiles_1d) == 4 + # Check some interpolated values + for i in range(4): + # Check rho_tor_norm + rho_tor_norm = cp.profiles_1d[i].grid.rho_tor_norm + assert np.array_equal(rho_tor_norm, np.array([0.0, 1.0])) + # Check t_i_average + expected = np.array([2.0, 1.0]) * (1 + 31 * cp.time[i]) + t_i_average = cp.profiles_1d[i].t_i_average + assert np.allclose(t_i_average, expected, rtol=1e-14, atol=0) + + +def test_get_sample_homogeneous_previous_interp(entry): + # Note requesting 0.401 and not 0.4, since + # (0.3 + 0.02 + 0.02 + 0.02 + 0.02 + 0.02) = 0.4 + 5e-17 + cp = entry.get_sample("core_profiles", 0.3, 0.401, 0.02, PREVIOUS_INTERP) + assert np.allclose(cp.time, np.linspace(0.3, 0.4, 6), rtol=1e-14, atol=0) + + assert len(cp.profiles_1d) == 6 + # Check some interpolated values + for i in range(6): + # Check rho_tor_norm + rho_tor_norm = cp.profiles_1d[i].grid.rho_tor_norm + assert np.array_equal(rho_tor_norm, np.array([0.0, 1.0])) + # Check t_i_average + expected = np.array([2.0, 1.0]) * [10, 10, 11, 12, 12, 13][i] + t_i_average = cp.profiles_1d[i].t_i_average + assert np.allclose(t_i_average, expected, rtol=1e-14, atol=0) + + +def test_get_sample_homogeneous_closest_interp(entry): + # Note requesting 0.401 and not 0.4, since + # (0.3 + 0.02 + 0.02 + 0.02 + 0.02 + 0.02) = 0.4 + 5e-17 + cp = entry.get_sample("core_profiles", 0.3, 0.401, 0.02, CLOSEST_INTERP) + assert np.allclose(cp.time, np.linspace(0.3, 0.4, 6), rtol=1e-14, atol=0) + + assert len(cp.profiles_1d) == 6 + # Check some interpolated values + for i in range(6): + # Check rho_tor_norm + rho_tor_norm = cp.profiles_1d[i].grid.rho_tor_norm + assert np.array_equal(rho_tor_norm, np.array([0.0, 1.0])) + # Check t_i_average + expected = np.array([2.0, 1.0]) * [10, 11, 12, 12, 13, 13][i] + t_i_average = cp.profiles_1d[i].t_i_average + assert np.allclose(t_i_average, expected, rtol=1e-14, atol=0) + + +def test_get_sample_heterogeneous_linear_interp(entry): + eq = entry.get_sample("equilibrium", 0.2, 0.501, 0.05, LINEAR_INTERP) + N_samples = 7 + # IDS becomes homogeneous after resampling + assert np.allclose(eq.time, np.linspace(0.2, 0.5, N_samples)) + assert eq.ids_properties.homogeneous_time == IDS_TIME_MODE_HOMOGENEOUS + + # Check interpolated grids_ggd + assert len(eq.grids_ggd) == N_samples + for i in range(N_samples): + # assert eq.grids_ggd[i].time == EMPTY_FLOAT + assert len(eq.grids_ggd[i].grid) == 1 + assert eq.grids_ggd[i].grid[0].path == "wall:0/description_ggd(1)/grid_ggd" + + # Check interpolated time_slice + assert len(eq.time_slice) == N_samples + for i in range(N_samples): + # assert eq.time_slice[i].time == EMPTY_FLOAT + assert len(eq.time_slice[i].profiles_2d) == 1 + p2d = eq.time_slice[i].profiles_2d[0] + assert np.array_equal(p2d.grid.dim1, [0.0, 1.0]) + assert np.array_equal(p2d.grid.dim2, [3.0, 4.0]) + + # Determine the data as we have stored it in test_db_uri() + time = eq.time[i] + original_times = [0, 0.2, 0.4, 0.6, 0.8, 1.0] + index = np.searchsorted(original_times, time) + prevtime = original_times[index - 1] + nexttime = original_times[index] + prevpsi = (p2d.r - p2d.z) * (1 + prevtime) ** 2 + nextpsi = (p2d.r - p2d.z) * (1 + nexttime) ** 2 + # Linear interpolation + expected_psi = (nextpsi * (time - prevtime) + prevpsi * (nexttime - time)) / ( + nexttime - prevtime + ) + assert np.allclose(p2d.psi, expected_psi, rtol=1e-14, atol=0) + + mag = entry.get_sample("magnetics", 0.2, 0.501, 0.05, LINEAR_INTERP) + assert mag.ids_properties.homogeneous_time == IDS_TIME_MODE_HOMOGENEOUS + assert np.allclose(mag.time, np.linspace(0.2, 0.5, N_samples)) + + assert len(mag.flux_loop) == 3 + for i in range(3): + fl = mag.flux_loop[i] + assert np.allclose(fl.flux.data, 2 + 2 * mag.time, rtol=1e-14, atol=0) + assert np.allclose(fl.voltage.data, 2 - 5 * mag.time, rtol=1e-14, atol=2e-16) + + +def test_get_sample_heterogeneous_previous_interp(entry): + eq = entry.get_sample("equilibrium", 0.2, 0.501, 0.05, PREVIOUS_INTERP) + N_samples = 7 + # IDS becomes homogeneous after resampling + assert np.allclose(eq.time, np.linspace(0.2, 0.5, N_samples)) + assert eq.ids_properties.homogeneous_time == IDS_TIME_MODE_HOMOGENEOUS + + # Check interpolated grids_ggd + assert len(eq.grids_ggd) == N_samples + for i in range(N_samples): + # assert eq.grids_ggd[i].time == EMPTY_FLOAT + assert len(eq.grids_ggd[i].grid) == 1 + assert eq.grids_ggd[i].grid[0].path == "wall:0/description_ggd(1)/grid_ggd" + + # Check interpolated time_slice + assert len(eq.time_slice) == N_samples + for i in range(N_samples): + # assert eq.time_slice[i].time == EMPTY_FLOAT + assert len(eq.time_slice[i].profiles_2d) == 1 + p2d = eq.time_slice[i].profiles_2d[0] + assert np.array_equal(p2d.grid.dim1, [0.0, 1.0]) + assert np.array_equal(p2d.grid.dim2, [3.0, 4.0]) + + origtime = [0.2, 0.2, 0.2, 0.2, 0.4, 0.4, 0.4][i] + expected_psi = (p2d.r - p2d.z) * (1 + origtime) ** 2 + assert np.allclose(p2d.psi, expected_psi, rtol=1e-14, atol=0) + + mag = entry.get_sample("magnetics", 0.2, 0.501, 0.05, PREVIOUS_INTERP) + assert mag.ids_properties.homogeneous_time == IDS_TIME_MODE_HOMOGENEOUS + assert np.allclose(mag.time, np.linspace(0.2, 0.5, N_samples)) + + assert len(mag.flux_loop) == 3 + for i in range(3): + fl = mag.flux_loop[i] + + flux_time = np.linspace(0.0123, 1, 5 + i) + flux_time = flux_time[np.searchsorted(flux_time, mag.time, side="right") - 1] + assert np.array_equal(fl.flux.data, 2 + 2 * flux_time) + + voltage_time = np.linspace(0.0123, 1, 8 + i) + voltage_time = voltage_time[ + np.searchsorted(voltage_time, mag.time, side="right") - 1 + ] + assert np.array_equal(fl.voltage.data, 2 - 5 * voltage_time) + + +def test_get_sample_heterogeneous_closest_interp(entry): + eq = entry.get_sample("equilibrium", 0.2, 0.501, 0.05, CLOSEST_INTERP) + N_samples = 7 + # IDS becomes homogeneous after resampling + assert np.allclose(eq.time, np.linspace(0.2, 0.5, N_samples)) + assert eq.ids_properties.homogeneous_time == IDS_TIME_MODE_HOMOGENEOUS + + # Check interpolated grids_ggd + assert len(eq.grids_ggd) == N_samples + for i in range(N_samples): + # assert eq.grids_ggd[i].time == EMPTY_FLOAT + assert len(eq.grids_ggd[i].grid) == 1 + assert eq.grids_ggd[i].grid[0].path == "wall:0/description_ggd(1)/grid_ggd" + + # Check interpolated time_slice + assert len(eq.time_slice) == N_samples + for i in range(N_samples): + # assert eq.time_slice[i].time == EMPTY_FLOAT + assert len(eq.time_slice[i].profiles_2d) == 1 + p2d = eq.time_slice[i].profiles_2d[0] + assert np.array_equal(p2d.grid.dim1, [0.0, 1.0]) + assert np.array_equal(p2d.grid.dim2, [3.0, 4.0]) + + # Note: CLOSEST appears to round up: 0.4 is closer to 0.3 than 0.2 + origtime = [0.2, 0.2, 0.4, 0.4, 0.4, 0.4, 0.6][i] + expected_psi = (p2d.r - p2d.z) * (1 + origtime) ** 2 + assert np.allclose(p2d.psi, expected_psi, rtol=1e-14, atol=0) + + mag = entry.get_sample("magnetics", 0.2, 0.501, 0.05, CLOSEST_INTERP) + assert mag.ids_properties.homogeneous_time == IDS_TIME_MODE_HOMOGENEOUS + assert np.allclose(mag.time, np.linspace(0.2, 0.5, N_samples)) + + assert len(mag.flux_loop) == 3 + for i in range(3): + fl = mag.flux_loop[i] + + flux_time = np.linspace(0.0123, 1, 5 + i) + flux_time = flux_time[ + np.argmin(np.abs(flux_time[None, :] - mag.time[:, None]), axis=1) + ] + assert np.array_equal(fl.flux.data, 2 + 2 * flux_time) + + voltage_time = np.linspace(0.0123, 1, 8 + i) + voltage_time = voltage_time[ + np.argmin(np.abs(voltage_time[None, :] - mag.time[:, None]), axis=1) + ] + assert np.array_equal(fl.voltage.data, 2 - 5 * voltage_time) diff --git a/imaspy/test/test_lazy_loading.py b/imaspy/test/test_lazy_loading.py index 8c3b2fef..1d34e2a1 100644 --- a/imaspy/test/test_lazy_loading.py +++ b/imaspy/test/test_lazy_loading.py @@ -163,3 +163,39 @@ def test_lazy_load_with_new_aos(requires_imas): assert len(lazy_et.model[0].ggd[0].electrons.particles.d_radial) == 0 dbentry.close() + + +def test_lazy_load_with_new_structure(requires_imas): + dbentry = DBEntry(MEMORY_BACKEND, "ITER", 1, 1, dd_version="3.30.0") + dbentry.create() + + eq = dbentry.factory.equilibrium() + eq.ids_properties.homogeneous_time = IDS_TIME_MODE_HOMOGENEOUS + eq.time = [0.0] + eq.time_slice.resize(1) + dbentry.put(eq) + + entry2 = DBEntry(MEMORY_BACKEND, "ITER", 1, 1, data_version="3", dd_version="4.0.0") + entry2.open() + lazy_eq = entry2.get("equilibrium", lazy=True) + assert not lazy_eq.time_slice[0].boundary.dr_dz_zero_point.r.has_value + + +def test_lazy_load_multiple_ids(backend, worker_id, tmp_path): + if backend == ASCII_BACKEND: + pytest.skip("Lazy loading is not supported by the ASCII backend.") + + with open_dbentry(backend, "w", worker_id, tmp_path) as dbentry: + cp = dbentry.factory.core_profiles() + cp.ids_properties.homogeneous_time = 1 + cp.time = [0.0, 1.0] + dbentry.put(cp) + eq = dbentry.factory.equilibrium() + eq.ids_properties.homogeneous_time = 1 + eq.time = [1.0, 2.0] + dbentry.put(eq) + + lazy_cp = dbentry.get("core_profiles", lazy=True) + lazy_eq = dbentry.get("equilibrium", lazy=True) + assert all(cp.time - eq.time == -1) + assert all(lazy_cp.time - lazy_eq.time == -1) diff --git a/imaspy/test/test_nc_validation.py b/imaspy/test/test_nc_validation.py new file mode 100644 index 00000000..efd25420 --- /dev/null +++ b/imaspy/test/test_nc_validation.py @@ -0,0 +1,197 @@ +import netCDF4 +import numpy as np +import pytest + +from imaspy.backends.netcdf.ids2nc import IDS2NC +from imaspy.backends.netcdf.nc2ids import NC2IDS +from imaspy.backends.netcdf.nc_validate import validate_netcdf_file +from imaspy.exception import InvalidNetCDFEntry, UnknownDDVersion +from imaspy.ids_defs import IDS_TIME_MODE_HOMOGENEOUS +from imaspy.ids_factory import IDSFactory + + +@pytest.fixture() +def memfile(): + with netCDF4.Dataset("-", "w", diskless=True) as memfile: + yield memfile + + +@pytest.fixture() +def factory(): + return IDSFactory("4.0.0") + + +@pytest.fixture() +def memfile_with_ids(memfile, factory): + ids = factory.core_profiles() + ids.ids_properties.homogeneous_time = IDS_TIME_MODE_HOMOGENEOUS + ids.time = [1.0, 2.0, 3.0] + ids.profiles_1d.resize(3) + for i in range(3): + ids.profiles_1d[i].grid.rho_tor_norm = [0.0, 0.2, 0.4, 0.6, 0.8, 1.0] + ids.profiles_1d[0].zeff = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0] + IDS2NC(ids, memfile).run() + # This one is valid: + NC2IDS(memfile, factory.core_profiles()).run() + return memfile + + +def test_invalid_homogeneous_time(memfile, factory): + empty_group = memfile.createGroup("empty_group") + # Invalid dtype + invalid_dtype = memfile.createGroup("invalid_dtype") + invalid_dtype.createVariable("ids_properties.homogeneous_time", float, ())[()] = 0 + # Invalid shape: 1D instead of 0D + invalid_shape = memfile.createGroup("invalid_shape") + invalid_shape.createDimension("dim") + invalid_shape.createVariable("ids_properties.homogeneous_time", "i4", ("dim",)) + # Invalid value: not 0, 1 or 2 + invalid_value = memfile.createGroup("invalid_value") + invalid_value.createVariable("ids_properties.homogeneous_time", "i4", ()) + + ids = factory.core_profiles() + with pytest.raises(InvalidNetCDFEntry): + NC2IDS(empty_group, ids) # ids_properties.homogeneous_time does not exist + with pytest.raises(InvalidNetCDFEntry): + NC2IDS(invalid_dtype, ids) + with pytest.raises(InvalidNetCDFEntry): + NC2IDS(invalid_shape, ids) + with pytest.raises(InvalidNetCDFEntry): + NC2IDS(invalid_value, ids) + + +def test_invalid_units(memfile_with_ids, factory): + memfile_with_ids["time"].units = "hours" + with pytest.raises(InvalidNetCDFEntry): + NC2IDS(memfile_with_ids, factory.core_profiles()).run() + + +def test_invalid_documentation(memfile_with_ids, factory, caplog): + with caplog.at_level("WARNING"): + NC2IDS(memfile_with_ids, factory.core_profiles()).run() + assert not caplog.records + # Invalid docstring logs a warning + memfile_with_ids["time"].documentation = "https://en.wikipedia.org/wiki/Time" + with caplog.at_level("WARNING"): + NC2IDS(memfile_with_ids, factory.core_profiles()).run() + assert len(caplog.records) == 1 + + +def test_invalid_dimension_name(memfile_with_ids, factory): + memfile_with_ids.renameDimension("time", "T") + with pytest.raises(InvalidNetCDFEntry): + NC2IDS(memfile_with_ids, factory.core_profiles()).run() + + +def test_invalid_coordinates(memfile_with_ids, factory): + memfile_with_ids["profiles_1d.grid.rho_tor_norm"].coordinates = "xyz" + with pytest.raises(InvalidNetCDFEntry): + NC2IDS(memfile_with_ids, factory.core_profiles()).run() + + +def test_invalid_ancillary_variables(memfile_with_ids, factory): + memfile_with_ids["time"].ancillary_variables = "xyz" + with pytest.raises(InvalidNetCDFEntry): + NC2IDS(memfile_with_ids, factory.core_profiles()).run() + + +def test_extra_attributes(memfile_with_ids, factory): + memfile_with_ids["time"].new_attribute = [1, 2, 3] + with pytest.raises(InvalidNetCDFEntry): + NC2IDS(memfile_with_ids, factory.core_profiles()).run() + + +def test_shape_array_without_data(memfile_with_ids, factory): + memfile_with_ids.createVariable("profiles_1d.t_i_average:shape", int, ()) + with pytest.raises(InvalidNetCDFEntry): + NC2IDS(memfile_with_ids, factory.core_profiles()).run() + + +def test_shape_array_without_sparse_data(memfile_with_ids, factory): + memfile_with_ids.createVariable("profiles_1d.grid.rho_tor_norm:shape", int, ()) + with pytest.raises(InvalidNetCDFEntry): + NC2IDS(memfile_with_ids, factory.core_profiles()).run() + + +def test_shape_array_with_invalid_dimensions(memfile_with_ids, factory): + cp = factory.core_profiles() + t_i_average_meta = cp.metadata["profiles_1d.t_i_average"] + t_i_average = memfile_with_ids.createVariable( + "profiles_1d.t_i_average", float, ("time", "profiles_1d.grid.rho_tor_norm:i") + ) + t_i_average.units = t_i_average_meta.units + t_i_average.documentation = t_i_average_meta.documentation + t_i_average.sparse = "Contents don't matter" + memfile_with_ids.createVariable( + "profiles_1d.t_i_average:shape", + np.int32, + ("time", "profiles_1d.grid.rho_tor_norm:i"), + ) + with pytest.raises(InvalidNetCDFEntry): + NC2IDS(memfile_with_ids, cp).run() + + +def test_shape_array_with_invalid_dtype(memfile_with_ids, factory): + cp = factory.core_profiles() + t_i_average_meta = cp.metadata["profiles_1d.t_i_average"] + t_i_average = memfile_with_ids.createVariable( + "profiles_1d.t_i_average", float, ("time", "profiles_1d.grid.rho_tor_norm:i") + ) + t_i_average.units = t_i_average_meta.units + t_i_average.documentation = t_i_average_meta.documentation + t_i_average.sparse = "Contents don't matter" + memfile_with_ids.createVariable( + "profiles_1d.t_i_average:shape", float, ("time", "1D") + ) + with pytest.raises(InvalidNetCDFEntry): + NC2IDS(memfile_with_ids, cp).run() + + +def test_validate_nc(tmpdir): + fname = str(tmpdir / "test.nc") + + # Wrong extension + with pytest.raises(InvalidNetCDFEntry): + validate_netcdf_file("test.h5") # invalid extension + + # Empty file + netCDF4.Dataset(fname, "w").close() + with pytest.raises(InvalidNetCDFEntry): + validate_netcdf_file(fname) + + # Invalid DD version + with netCDF4.Dataset(fname, "w") as dataset: + dataset.data_dictionary_version = "invalid" + dataset.createGroup("core_profiles") + with pytest.raises(UnknownDDVersion): + validate_netcdf_file(fname) + + # Invalid group + with netCDF4.Dataset(fname, "w") as dataset: + dataset.data_dictionary_version = "4.0.0" + dataset.createGroup("X") + with pytest.raises(InvalidNetCDFEntry): + validate_netcdf_file(fname) + + # Invalid occurrence + with netCDF4.Dataset(fname, "w") as dataset: + dataset.data_dictionary_version = "4.0.0" + dataset.createGroup("core_profiles/a") + with pytest.raises(InvalidNetCDFEntry): + validate_netcdf_file(fname) + + # Invalid variable in root group + with netCDF4.Dataset(fname, "w") as dataset: + dataset.data_dictionary_version = "4.0.0" + dataset.createVariable("core_profiles", int, ()) + with pytest.raises(InvalidNetCDFEntry): + validate_netcdf_file(fname) + + # Missing ids_properties.homogeneous_time + with netCDF4.Dataset(fname, "w") as dataset: + dataset.data_dictionary_version = "4.0.0" + dataset.createGroup("core_profiles/1") + with pytest.raises(InvalidNetCDFEntry): + validate_netcdf_file(fname) + + # All other validations are handled by NC2IDS and tested above diff --git a/imaspy/test/test_uda_support.py b/imaspy/test/test_uda_support.py new file mode 100644 index 00000000..f623219a --- /dev/null +++ b/imaspy/test/test_uda_support.py @@ -0,0 +1,12 @@ +from pathlib import Path +from zlib import crc32 + +from imaspy import dd_zip +from imaspy.backends.imas_core.uda_support import extract_idsdef + + +def test_extract_idsdef(): + fname = extract_idsdef("4.0.0") + expected_crc = dd_zip.get_dd_xml_crc("4.0.0") + actual_crc = crc32(Path(fname).read_bytes()) + assert expected_crc == actual_crc diff --git a/pyproject.toml b/pyproject.toml index 1c1ce2cc..dccd6912 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,7 +15,7 @@ authors = [ description = "Pythonic wrappers for the IMAS Access Layer" readme = {file = "README.md", content-type = "text/markdown"} requires-python = ">=3.7" -license = {file = "LICENSE.md"} +license = {file = "LICENSE.txt"} classifiers = [ "Development Status :: 3 - Alpha", "Environment :: Console",