diff --git a/docs/source/identifiers.rst b/docs/source/identifiers.rst index 312749e..47d5b0d 100644 --- a/docs/source/identifiers.rst +++ b/docs/source/identifiers.rst @@ -11,13 +11,16 @@ enumerated list of options for defining, for example: a neutron, or a photon. - Plasma heating may come from neutral beam injection, electron cyclotron heating, ion cyclotron heating, lower hybrid heating, alpha particles. +- These may have alternative naming conventions supported through aliases + (e.g., "235U" and "U_235" for Uranium 235). -Identifiers are a list of possible valid labels. Each label has three +Identifiers are a list of possible valid labels. Each label has up to four representations: 1. An index (integer) 2. A name (short string) 3. A description (long string) +4. List of aliases (list of short strings) Identifiers in IMAS-Python @@ -44,6 +47,14 @@ the available identifiers is stored as ``imas.identifiers.identifiers``. print(csid.total.index) print(csid.total.description) + # Access identifiers with aliases (when available) + mid = imas.identifiers.materials_identifier + print(mid["235U"]) # Access by canonical name + print(mid["U_235"]) # Access by alias + + # Both return the same object + assert mid["235U"] is mid["U_235"] + # Item access is also possible print(identifiers["edge_source_identifier"]) @@ -64,8 +75,8 @@ Assigning identifiers in IMAS-Python IMAS-Python implements smart assignment of identifiers. You may assign an identifier enum value (for example ``imas.identifiers.core_source_identifier.total``), a -string (for example ``"total"``) or an integer (for example ``"1"``) to an -identifier structure (for example ``core_profiles.source[0].identifier``) to set +string (for example ``"total"`` or its alias), or an integer (for example ``"1"``) +to an identifier structure (for example ``core_profiles.source[0].identifier``) to set all three child nodes ``name``, ``index`` and ``description`` in one go. See below example: @@ -86,6 +97,15 @@ below example: # 3. Assign an integer. This looks up the index in the identifier enum: core_sources.source[0].identifier = 1 + # Identifiers can still be assigned with the old alias name for backward compatibility: + materials = imas.IDSFactory().materials() + materials.material.resize(1) + mid = imas.identifiers.materials_identifier + # Assign using canonical name + materials.material[0].identifier = "235U" + # Or assign using alias (equivalent to above) + materials.material[0].identifier = "U_235" + # Inspect the contents of the structure imas.util.inspect(core_sources.source[0].identifier) @@ -101,6 +121,59 @@ below example: imas.util.inspect(core_sources.source[1].identifier) +Identifier aliases +------------------ + +Some identifiers may have multiple aliases defined in the Data Dictionary. Aliases are +former names kept as an option to ensure better backward compatibility after a change +and support multiple naming conventions. An identifier can have any number of +comma-separated aliases. + +Aliases can be accessed in the same ways as canonical names, and all aliases for an +identifier point to the same object. + +Aliases that begin with a number (e.g., 235U) cannot be accessed using dot notation +(e.g., material_identifier.235U) due to Python's syntax restrictions. Instead, such +aliases must be accessed using dictionary-style indexing, for example: +material_identifier["235U"]. + +.. code-block:: python + :caption: Working with identifier aliases + + import imas + + # Get materials identifier which has some aliases defined + mid = imas.identifiers.materials_identifier + + # Access by canonical name + uranium235_by_name = mid["235U"] + print(f"Name: {uranium235_by_name.name}") + print(f"Aliases: {uranium235_by_name.aliases}") # List of all aliases + print(f"First alias: {uranium235_by_name.alias}") # First alias for compatibility + print(f"Index: {uranium235_by_name.index}") + print(f"Description: {uranium235_by_name.description}") + + # Access by any alias - all return the same object + uranium235_by_alias1 = mid["U_235"] + uranium235_by_alias2 = mid["Uranium_235"] + print(f"Same objects: {uranium235_by_name is uranium235_by_alias1 is uranium235_by_alias2}") + + # You can also use attribute access for aliases (when valid Python identifiers) + uranium235_by_attr = mid.U_235 + print(f"Same object: {uranium235_by_name is uranium235_by_attr}") + + # When assigning to IDS structures, any alias works the same way + materials = imas.IDSFactory().materials() + materials.material.resize(1) + + # These assignments are all equivalent: + materials.material[0].identifier = "235U" # canonical name + materials.material[0].identifier = "U_235" # first alias + materials.material[0].identifier = "Uranium_235" # another alias + materials.material[0].identifier = mid["235U"] # enum value + materials.material[0].identifier = mid.U_235 # enum value via alias + + Compare identifiers ------------------- @@ -108,11 +181,12 @@ Identifier structures can be compared against the identifier enum as well. They compare equal when: 1. ``index`` is an exact match -2. ``name`` is an exact match, or ``name`` is not filled in the IDS node +2. ``name`` is an exact match, or ``name`` matches an alias, or ``name`` is not filled in the IDS node The ``description`` does not have to match with the Data Dictionary definition, but a warning is logged if the description in the IDS node does not match with -the Data Dictionary description: +the Data Dictionary description. The comparison also takes aliases into account, +so an identifier will match both its canonical name and any defined alias: .. code-block:: python :caption: Comparing identifiers @@ -139,6 +213,15 @@ the Data Dictionary description: >>> core_sources.source[0].identifier.name = "totalX" >>> core_sources.source[0].identifier == csid.total False + >>> # Alias comparison example with materials identifier + >>> mid = imas.identifiers.materials_identifier + >>> materials = imas.IDSFactory().materials() + >>> materials.material.resize(1) + >>> materials.material[0].identifier.index = 20 + >>> materials.material[0].identifier.name = "U_235" # Using alias + >>> # Compares equal to the canonical identifier even though name is alias + >>> materials.material[0].identifier == mid["235U"] + True .. seealso:: diff --git a/imas/ids_identifiers.py b/imas/ids_identifiers.py index a64dd87..1525a07 100644 --- a/imas/ids_identifiers.py +++ b/imas/ids_identifiers.py @@ -1,7 +1,6 @@ # This file is part of IMAS-Python. # You should have received the IMAS-Python LICENSE file with this project. -"""IMAS-Python module to support Data Dictionary identifiers. -""" +"""IMAS-Python module to support Data Dictionary identifiers.""" import logging from enum import Enum @@ -16,16 +15,18 @@ class IDSIdentifier(Enum): """Base class for all identifier enums.""" - def __new__(self, value: int, description: str): - obj = object.__new__(self) + def __new__(cls, value: int, description: str, aliases: list = []): + obj = object.__new__(cls) obj._value_ = value return obj - def __init__(self, value: int, description: str) -> None: + def __init__(self, value: int, description: str, aliases: list = []) -> None: self.index = value """Unique index for this identifier value.""" self.description = description """Description for this identifier value.""" + self.aliases = aliases + """Alternative names for this identifier value.""" def __eq__(self, other): if self is other: @@ -37,35 +38,49 @@ def __eq__(self, other): except (AttributeError, TypeError, ValueError): # Attribute doesn't exist, or failed to convert return NotImplemented + # Index must match if other_index == self.index: - # Name may be left empty - if other_name == self.name or other_name == "": + # Name may be left empty, or match name or alias + if ( + other_name == self.name + or other_name == "" + or other_name in self.aliases + ): # Description doesn't have to match, though we will warn when it doesn't - if other_description != self.description and other_description != "": + if other_description not in (self.description, ""): logger.warning( "Description of %r does not match identifier description %r", other.description, self.description, ) return True - else: - logger.warning( - "Name %r does not match identifier name %r, but indexes are equal.", - other.name, - self.name, - ) + + # If we get here with matching indexes but no name/alias match, warn + logger.warning( + "Name %r does not match identifier name %r, but indexes are equal.", + other.name, + self.name, + ) return False @classmethod def _from_xml(cls, identifier_name, xml) -> Type["IDSIdentifier"]: element = fromstring(xml) enum_values = {} + aliases = {} for int_element in element.iterfind("int"): name = int_element.get("name") value = int_element.text description = int_element.get("description") - enum_values[name] = (int(value), description) + # alias attribute may contain multiple comma-separated aliases + alias_attr = int_element.get("alias", "") + aliases = [a.strip() for a in alias_attr.split(",") if a.strip()] + # Canonical entry: use the canonical 'name' as key + enum_values[name] = (int(value), description, aliases) + # Also add alias names as enum *aliases* (they become enum attributes) + for alias in aliases: + enum_values[alias] = (int(value), description, aliases) # Create the enumeration enum = cls( identifier_name, diff --git a/imas/test/test_identifiers.py b/imas/test/test_identifiers.py index 263a6cc..72036cb 100644 --- a/imas/test/test_identifiers.py +++ b/imas/test/test_identifiers.py @@ -1,9 +1,18 @@ -import pytest +import importlib.metadata +from packaging.version import Version +import pytest from imas.dd_zip import dd_identifiers from imas.ids_factory import IDSFactory from imas.ids_identifiers import IDSIdentifier, identifiers +has_aliases = Version(importlib.metadata.version("imas_data_dictionaries")) >= Version( + "4.1.0" +) +requires_aliases = pytest.mark.skipif( + not has_aliases, reason="Requires DD 4.1.0 for identifier aliases" +) + def test_list_identifiers(): assert identifiers.identifiers == dd_identifiers() @@ -70,6 +79,70 @@ def test_identifier_struct_assignment(caplog): assert source.identifier != csid.total +def test_identifiers_with_aliases(): + # Custom identifier XML, based on materials identifier, with some more features + custom_identifier_xml = """\ + + +
+Materials used in the device mechanical structures +
+20 +21 +22 +23 +
+""" + identifier = IDSIdentifier._from_xml("custom_identifier", custom_identifier_xml) + + assert len(identifier) == 4 + + # no aliases + assert identifier.Diamond.aliases == [] + # 1 alias + assert identifier["235U"] is identifier.U_235 + assert identifier["235U"].aliases == ["U_235"] + # 3 aliases + assert ( + identifier.CxHy + is identifier.alias1 + is identifier.alias2 + is identifier["3alias"] + ) + assert identifier.CxHy.aliases == ["alias1", "alias2", "3alias"] + + +@requires_aliases +def test_identifier_struct_assignment_with_aliases(): + """Test identifier struct assignment with aliases using materials_identifier.""" + mid = identifiers.materials_identifier + + # Create an actual IDS structure + factory = IDSFactory("4.0.0").camera_x_rays() + mat = factory.filter_window.material + mat.name = "235U" + mat.index = 20 + mat.description = "Uranium 235 isotope" + mat.alias = "U_235" + + # Basic attribute checks + assert mat.name == mid["235U"].name + assert mat.alias == mid.U_235.alias + assert mat.index == mid.U_235.index + + # Test various equality scenarios + assert mat == mid.U_235 + assert mat == mid["235U"] + + # Modify material properties and test equality + mat.name = "some_name" + mat.alias = "U_235" + assert mat == mid.U_235 + + mat.alias = "235U" + assert mat == mid.U_235 + + def test_identifier_aos_assignment(): cfid = identifiers.pf_active_coil_function_identifier pfa = IDSFactory("3.39.0").pf_active() @@ -103,3 +176,140 @@ def test_invalid_identifier_assignment(): with pytest.raises(ValueError): # negative identifiers are reserved for user-defined identifiers cs.source[0].identifier = -1 + + +@requires_aliases +def test_identifier_aliases(): + """Test identifier enum aliases functionality.""" + mid = identifiers.materials_identifier + + # Test that alias points to the same object as the canonical name + assert mid.U_235 is mid["235U"] + assert mid.U_238 is mid["238U"] + assert mid.In_115 is mid["115In"] + assert mid.He_4 is mid["4He"] + + # Test that both name and alias have the same properties + assert mid.U_235.name == "235U" + assert mid.U_235.index == mid["235U"].index + assert mid.U_235.description == mid["235U"].description + assert mid.U_235.alias == "U_235" + + # Test accessing by alias via bracket notation + assert mid["U_235"] is mid.U_235 + assert mid["U_238"] is mid.U_238 + assert mid["In_115"] is mid.In_115 + assert mid["He_4"] is mid.He_4 + + +@requires_aliases +def test_identifier_alias_equality(): + """Test that identifiers with aliases are equal when comparing names and aliases.""" + mid = identifiers.materials_identifier + target = mid.U_235 + + # Test equality with canonical name + factory1 = IDSFactory("4.0.0").camera_x_rays() + mat1 = factory1.filter_window.material + mat1.name = "235U" + mat1.index = 20 + mat1.description = "Uranium 235 isotope" + assert mat1 == target + + # Test equality with alias name + factory2 = IDSFactory("4.0.0").camera_x_rays() + mat2 = factory2.filter_window.material + mat2.name = "U_235" + mat2.index = 20 + mat2.description = "Uranium 235 isotope" + assert mat2 == target + + # Test equality when material has alias matching canonical name + factory3 = IDSFactory("4.0.0").camera_x_rays() + mat3 = factory3.filter_window.material + mat3.name = "test_name" + mat3.index = 20 + mat3.description = "Uranium 235 isotope" + mat3.alias = "235U" + assert mat3 == target + + # Test inequality when index doesn't match + factory4 = IDSFactory("4.0.0").camera_x_rays() + mat4 = factory4.filter_window.material + mat4.name = "235U" + mat4.index = 999 + mat4.description = "Uranium 235 isotope" + assert mat4 != target + + # Test inequality when neither name nor alias matches + factory5 = IDSFactory("4.0.0").camera_x_rays() + mat5 = factory5.filter_window.material + mat5.name = "wrong_name" + mat5.index = 20 + mat5.description = "Uranium 235 isotope" + mat5.alias = "wrong_alias" + assert mat5 != target + + # Test equality with material having alias matching canonical name + factory6 = IDSFactory("4.0.0").camera_x_rays() + mat6 = factory6.filter_window.material + mat6.name = "test_name" + mat6.index = 20 + mat6.description = "Uranium 235 isotope" + mat6.alias = "235U" + assert mat6 == target + + # Test equality when both have matching aliases + factory7 = IDSFactory("4.0.0").camera_x_rays() + mat7 = factory7.filter_window.material + mat7.name = "sample_name" + mat7.index = 20 + mat7.description = "Uranium 235 isotope" + mat7.alias = "U_235" + assert mat7 == target + + # Test inequality when index doesn't match + factory8 = IDSFactory("4.0.0").camera_x_rays() + mat8 = factory8.filter_window.material + mat8.name = "235U" + mat8.index = 999 + mat8.description = "Uranium 235 isotope" + assert mat8 != target + + # Test inequality when neither name nor alias matches + factory9 = IDSFactory("4.0.0").camera_x_rays() + mat9 = factory9.filter_window.material + mat9.name = "wrong_name" + mat9.index = 20 + mat9.description = "Uranium 235 isotope" + mat9.alias = "wrong_alias" + assert mat9 != target + + # Test equality when material has list of multiple aliases + factory10 = IDSFactory("4.0.0").camera_x_rays() + mat10 = factory10.filter_window.material + mat10.name = "test_name" + mat10.index = 20 + mat10.description = "Uranium 235 isotope" + mat10.alias = "235U,U_235,Uranium_235" + assert mat10 == target + assert mat10.alias[0] == target[0] + assert mat10.alias[1] == target[0] + assert mat10.alias[2] == target[0] + assert mat10.alias[1] == target[2] + assert mat10.alias[2] == target[1] + + # Test equality when material has multiple aliases + factory11 = IDSFactory("4.0.0").camera_x_rays() + mat0 = factory11.filter_window.material + mat0.name = "test_name" + mat0.index = 20 + mat0.description = "Uranium 235 isotope" + mat0.alias = "U_235" + + mat1 = factory11.filter_window.material + mat1.name = "test_name" + mat1.index = 20 + mat1.description = "Uranium 235 isotope" + mat1.alias = "Uranium_235" + assert mat0 == mat1 == target