From b750c7ff85838bc2feb50c5d6f6ff7a465be031e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Ingvar=20Dahlgren?= Date: Mon, 22 Apr 2024 08:28:32 +0200 Subject: [PATCH 1/5] Add (currently failing) test for crystal water parsing for 0.8.x --- chempy/tests/test_chemistry.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/chempy/tests/test_chemistry.py b/chempy/tests/test_chemistry.py index 2307be57..752c4eb6 100644 --- a/chempy/tests/test_chemistry.py +++ b/chempy/tests/test_chemistry.py @@ -570,3 +570,16 @@ def test_balance_stoichiometry__duplicates(): underdetermined=None, ) assert bal_Mn_COx == ({"CO": 2}, {"C": 1, "CO2": 1}) + + +def test_composition_dot_as_crystal_water_chempy08x(): + """In Chempy v0.8.x a dot will signify crystal water. But an asterisk '*' + or and interpunct (·) is also accepted (and preferred). + From Chempy v0.9.x on-wards, only interpunct and asterisk will be + interpreted as crystal water delimiters, and a dot will be interpreted + as floating point delimiter in fractional stoichiometric coefficients.""" + assert Substance.from_formula('Zn(NO3)2·6H2O').composition == {30: 1, 7: 2, 8: 12, 1: 12} + assert Substance.from_formula('Zn(NO3)2*6H2O').composition == {30: 1, 7: 2, 8: 12, 1: 12} + # https://docs.pytest.org/en/7.1.x/how-to/capture-warnings.html#ensuring-code-triggers-a-deprecation-warning + with pytest.deprecated_call(): + assert Substance.from_formula('Zn(NO3)2.6H2O').composition == {30: 1, 7: 2, 8: 12, 1: 12} From 8bf81f9c84d62e584bc776c4e06c44cab268f4e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Ingvar=20Dahlgren?= Date: Mon, 22 Apr 2024 09:01:52 +0200 Subject: [PATCH 2/5] latest cython --- .woodpecker.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.woodpecker.yaml b/.woodpecker.yaml index 3aca31ce..803e7910 100644 --- a/.woodpecker.yaml +++ b/.woodpecker.yaml @@ -27,6 +27,7 @@ steps: - export CPATH=$SUNDBASE/include:$CPATH - export LIBRARY_PATH=$SUNDBASE/lib - export LD_LIBRARY_PATH=$SUNDBASE/lib + - python3 -m pip install --user --upgrade-strategy=eager --upgrade cython - python3 -m pip install --cache-dir $CACHE_ROOT/pip_cache --user -e .[all] - python3 -c "import pycvodes; import pyodesys; import pygslodeiv2" # debug this CI config - git fetch -tq From 4ed766e58610e1fd50b951aece5e7caeea7e3538 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Ingvar=20Dahlgren?= Date: Mon, 22 Apr 2024 09:34:30 +0200 Subject: [PATCH 3/5] apply old parsing behavior to 0.8.x --- .woodpecker.yaml | 2 +- chempy/util/parsing.py | 30 ++++++++++++++++++++---------- 2 files changed, 21 insertions(+), 11 deletions(-) diff --git a/.woodpecker.yaml b/.woodpecker.yaml index 803e7910..115497fb 100644 --- a/.woodpecker.yaml +++ b/.woodpecker.yaml @@ -27,7 +27,7 @@ steps: - export CPATH=$SUNDBASE/include:$CPATH - export LIBRARY_PATH=$SUNDBASE/lib - export LD_LIBRARY_PATH=$SUNDBASE/lib - - python3 -m pip install --user --upgrade-strategy=eager --upgrade cython + - python3 -m pip install --cache-dir $CACHE_ROOT/pip_cache --user --upgrade-strategy=eager --upgrade cython - python3 -m pip install --cache-dir $CACHE_ROOT/pip_cache --user -e .[all] - python3 -c "import pycvodes; import pyodesys; import pygslodeiv2" # debug this CI config - git fetch -tq diff --git a/chempy/util/parsing.py b/chempy/util/parsing.py index 0313545c..b0b80a10 100644 --- a/chempy/util/parsing.py +++ b/chempy/util/parsing.py @@ -5,8 +5,9 @@ from collections import defaultdict import re +import warnings -from .pyutil import memoize +from .pyutil import memoize, ChemPyDeprecationWarning from .periodic import symbols parsing_library = "pyparsing" # info used for selective testing. @@ -95,7 +96,7 @@ def _get_formula_parser(): | '{' formula '}' | '[' formula ']' ) count prime charge? formula :: term+ - hydrate :: '.' count? formula + hydrate :: ( '.' | '·' | '*' ) count? formula state :: '(' ( 's' | 'l' | 'g' | 'aq' | 'cr' ) ')' compound :: count formula hydrate? state? @@ -114,7 +115,7 @@ def _get_formula_parser(): | '{' formula '}' | '[' formula ']' ) count prime charge? formula :: term+ - hydrate :: '..' count? formula + hydrate :: ( '..' | '·' | '*' ) count? formula state :: '(' ( 's' | 'l' | 'g' | 'aq' | 'cr' ) ')' compound :: count formula hydrate? state? """ @@ -330,16 +331,16 @@ def _parse_stoich(stoich): _latex_mapping["epsilon-"] = "\\varepsilon-" _latex_mapping["omicron-"] = "o-" _latex_mapping["."] = "^\\bullet " -_latex_infix_mapping = {"..": "\\cdot "} +_latex_infix_mapping = {"·": "\\cdot "} _unicode_mapping = {k + "-": v + "-" for k, v in zip(_greek_letters, _greek_u)} _unicode_mapping["."] = "⋅" -_unicode_infix_mapping = {"..": "·"} +_unicode_infix_mapping = {"·": "·"} _html_mapping = {k + "-": "&" + k + ";-" for k in _greek_letters} _html_mapping["."] = "⋅" # _html_infix_mapping = _html_mapping -_html_infix_mapping = {"..": "⋅"} +_html_infix_mapping = {"·": "⋅"} def _get_leading_integer(s): @@ -377,7 +378,7 @@ def formula_to_composition( True >>> formula_to_composition('.NHO-(aq)') == {0: -1, 1: 1, 7: 1, 8: 1} True - >>> formula_to_composition('Na2CO3..7H2O') == {11: 2, 6: 1, 8: 10, 1: 14} + >>> formula_to_composition('Na2CO3·7H2O'.format(s)) == {11: 2, 6: 1, 8: 10, 1: 14} True """ @@ -386,7 +387,7 @@ def formula_to_composition( stoich_tok, chg_tok = _formula_to_parts(formula, prefixes, suffixes)[:2] tot_comp = {} - parts = stoich_tok.split("..") + parts = stoich_tok.split("·") for idx, stoich in enumerate(parts): if idx == 0: @@ -532,14 +533,23 @@ def _formula_to_format( suffixes=("(s)", "(l)", "(g)", "(aq)"), ): parts = _formula_to_parts(formula, prefixes.keys(), suffixes) - stoichs = parts[0].split("..") + parts0 = parts[0].replace("..", '·').replace('*', '·') + if '.' in parts0: + warnings.warn( + ("dot is ambiguous in chempy-0.8.x, prefer '*' or '' for complexes." + " Dot will be interpreted as floating point in chempy-0.9+"), + ChemPyDeprecationWarning + ) + parts0 = parts0.replace('.', '·') + stoichs = parts0.split("·") + string = "" for idx, stoich in enumerate(stoichs): if idx == 0: m = 1 else: m, stoich = _get_leading_integer(stoich) - string += _subs("..", infixes) + string += _subs("·", infixes) if m != 1: string += str(m) string += re.sub(r"([0-9]+\.[0-9]+|[0-9]+)", lambda m: sub(m.group(1)), stoich) From 6ca8737217be55a6e0b8f297b40ae2bd7fcddfc9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Ingvar=20Dahlgren?= Date: Mon, 22 Apr 2024 10:53:45 +0200 Subject: [PATCH 4/5] tweak tests in 0.8.x --- chempy/tests/test_chemistry.py | 13 ---- chempy/util/parsing.py | 37 +++++++--- chempy/util/tests/test_parsing.py | 118 +++++++++++++++++------------- 3 files changed, 93 insertions(+), 75 deletions(-) diff --git a/chempy/tests/test_chemistry.py b/chempy/tests/test_chemistry.py index 752c4eb6..2307be57 100644 --- a/chempy/tests/test_chemistry.py +++ b/chempy/tests/test_chemistry.py @@ -570,16 +570,3 @@ def test_balance_stoichiometry__duplicates(): underdetermined=None, ) assert bal_Mn_COx == ({"CO": 2}, {"C": 1, "CO2": 1}) - - -def test_composition_dot_as_crystal_water_chempy08x(): - """In Chempy v0.8.x a dot will signify crystal water. But an asterisk '*' - or and interpunct (·) is also accepted (and preferred). - From Chempy v0.9.x on-wards, only interpunct and asterisk will be - interpreted as crystal water delimiters, and a dot will be interpreted - as floating point delimiter in fractional stoichiometric coefficients.""" - assert Substance.from_formula('Zn(NO3)2·6H2O').composition == {30: 1, 7: 2, 8: 12, 1: 12} - assert Substance.from_formula('Zn(NO3)2*6H2O').composition == {30: 1, 7: 2, 8: 12, 1: 12} - # https://docs.pytest.org/en/7.1.x/how-to/capture-warnings.html#ensuring-code-triggers-a-deprecation-warning - with pytest.deprecated_call(): - assert Substance.from_formula('Zn(NO3)2.6H2O').composition == {30: 1, 7: 2, 8: 12, 1: 12} diff --git a/chempy/util/parsing.py b/chempy/util/parsing.py index b0b80a10..66d6bdb3 100644 --- a/chempy/util/parsing.py +++ b/chempy/util/parsing.py @@ -96,7 +96,7 @@ def _get_formula_parser(): | '{' formula '}' | '[' formula ']' ) count prime charge? formula :: term+ - hydrate :: ( '.' | '·' | '*' ) count? formula + hydrate :: ( '.' | '\u00B7' | '*' ) count? formula state :: '(' ( 's' | 'l' | 'g' | 'aq' | 'cr' ) ')' compound :: count formula hydrate? state? @@ -115,7 +115,7 @@ def _get_formula_parser(): | '{' formula '}' | '[' formula ']' ) count prime charge? formula :: term+ - hydrate :: ( '..' | '·' | '*' ) count? formula + hydrate :: ( '..' | '\u00B7' | '*' ) count? formula state :: '(' ( 's' | 'l' | 'g' | 'aq' | 'cr' ) ')' compound :: count formula hydrate? state? """ @@ -331,16 +331,16 @@ def _parse_stoich(stoich): _latex_mapping["epsilon-"] = "\\varepsilon-" _latex_mapping["omicron-"] = "o-" _latex_mapping["."] = "^\\bullet " -_latex_infix_mapping = {"·": "\\cdot "} +_latex_infix_mapping = {"..": "\\cdot "} _unicode_mapping = {k + "-": v + "-" for k, v in zip(_greek_letters, _greek_u)} _unicode_mapping["."] = "⋅" -_unicode_infix_mapping = {"·": "·"} +_unicode_infix_mapping = {"..": "\u00b7"} _html_mapping = {k + "-": "&" + k + ";-" for k in _greek_letters} _html_mapping["."] = "⋅" # _html_infix_mapping = _html_mapping -_html_infix_mapping = {"·": "⋅"} +_html_infix_mapping = {"..": "⋅"} def _get_leading_integer(s): @@ -378,7 +378,7 @@ def formula_to_composition( True >>> formula_to_composition('.NHO-(aq)') == {0: -1, 1: 1, 7: 1, 8: 1} True - >>> formula_to_composition('Na2CO3·7H2O'.format(s)) == {11: 2, 6: 1, 8: 10, 1: 14} + >>> formula_to_composition('Na2CO3*7H2O') == {11: 2, 6: 1, 8: 10, 1: 14} True """ @@ -387,7 +387,19 @@ def formula_to_composition( stoich_tok, chg_tok = _formula_to_parts(formula, prefixes, suffixes)[:2] tot_comp = {} - parts = stoich_tok.split("·") + if ".." in stoich_tok: + parts = stoich_tok.split("..") + elif "\u00b7" in stoich_tok: + parts = stoich_tok.split('\u00b7') + elif '.' in stoich_tok: + warnings.warn( + ("dot is ambiguous in chempy-0.8.x, prefer '*' or '\u00b7' for complexes." + " Dot will be interpreted as floating point in chempy-0.9+"), + ChemPyDeprecationWarning + ) + parts = stoich_tok.split('.') + else: + parts = list(filter(len, internal_asterisk.split(stoich_tok))) for idx, stoich in enumerate(parts): if idx == 0: @@ -524,6 +536,8 @@ def to_reaction(line, substance_keys, token, Cls, globals_=None, **kwargs): ) +internal_asterisk = re.compile(r"([^\s\*]+)\*([a-zA-Z0-9]+)") + def _formula_to_format( sub, sup, @@ -533,15 +547,16 @@ def _formula_to_format( suffixes=("(s)", "(l)", "(g)", "(aq)"), ): parts = _formula_to_parts(formula, prefixes.keys(), suffixes) - parts0 = parts[0].replace("..", '·').replace('*', '·') + parts0 = parts[0].replace("..", "\u00B7") + parts0 = internal_asterisk.sub("\u00B7", parts0) if '.' in parts0: warnings.warn( ("dot is ambiguous in chempy-0.8.x, prefer '*' or '' for complexes." " Dot will be interpreted as floating point in chempy-0.9+"), ChemPyDeprecationWarning ) - parts0 = parts0.replace('.', '·') - stoichs = parts0.split("·") + parts0 = parts0.replace('.', "\u00B7") + stoichs = parts0.split("\u00B7") string = "" for idx, stoich in enumerate(stoichs): @@ -549,7 +564,7 @@ def _formula_to_format( m = 1 else: m, stoich = _get_leading_integer(stoich) - string += _subs("·", infixes) + string += _subs("..", infixes) if m != 1: string += str(m) string += re.sub(r"([0-9]+\.[0-9]+|[0-9]+)", lambda m: sub(m.group(1)), stoich) diff --git a/chempy/util/tests/test_parsing.py b/chempy/util/tests/test_parsing.py index 67386278..d6418e67 100644 --- a/chempy/util/tests/test_parsing.py +++ b/chempy/util/tests/test_parsing.py @@ -303,45 +303,46 @@ def test_formula_to_composition_bad_complexes(species): formula_to_composition(species) -@pytest.mark.parametrize( - "species, composition", - [ - ( - "Ca2.832Fe0.6285Mg5.395(CO3)6", - { - 6: 6, - 8: 18, - 12: 5.395, - 20: 2.832, - 26: 0.6285, - }, - ), - ( - "Ca2.832Fe0.6285Mg5.395(CO3)6(s)", - { - 6: 6, - 8: 18, - 12: 5.395, - 20: 2.832, - 26: 0.6285, - }, - ), - ( - "Ca2.832Fe0.6285Mg5.395(CO3)6..8H2O(s)", - { - 1: 16, - 6: 6, - 8: 26, - 12: 5.395, - 20: 2.832, - 26: 0.6285, - }, - ), - ], -) -@requires(parsing_library) -def test_formula_to_composition_fractional_subscripts(species, composition): - assert formula_to_composition(species) == composition +# This test is enabled in chempy-0.9+ +# @pytest.mark.parametrize( +# "species, composition", +# [ +# ( +# "Ca2.832Fe0.6285Mg5.395(CO3)6", +# { +# 6: 6, +# 8: 18, +# 12: 5.395, +# 20: 2.832, +# 26: 0.6285, +# }, +# ), +# ( +# "Ca2.832Fe0.6285Mg5.395(CO3)6(s)", +# { +# 6: 6, +# 8: 18, +# 12: 5.395, +# 20: 2.832, +# 26: 0.6285, +# }, +# ), +# ( +# "Ca2.832Fe0.6285Mg5.395(CO3)6..8H2O(s)", +# { +# 1: 16, +# 6: 6, +# 8: 26, +# 12: 5.395, +# 20: 2.832, +# 26: 0.6285, +# }, +# ), +# ], +# ) +# @requires(parsing_library) +# def test_formula_to_composition_fractional_subscripts(species, composition): +# assert formula_to_composition(species) == composition @pytest.mark.parametrize( @@ -535,18 +536,19 @@ def test_to_reaction(): ), ("[Fe(CN)6]-3", r"[Fe(CN)_{6}]^{3-}"), ("[Fe(CN)6]-3(aq)", r"[Fe(CN)_{6}]^{3-}(aq)"), - ( - "Ca2.832Fe0.6285Mg5.395(CO3)6", - r"Ca_{2.832}Fe_{0.6285}Mg_{5.395}(CO_{3})_{6}", - ), - ( - "Ca2.832Fe0.6285Mg5.395(CO3)6(s)", - r"Ca_{2.832}Fe_{0.6285}Mg_{5.395}(CO_{3})_{6}(s)", - ), - ( - "Ca2.832Fe0.6285Mg5.395(CO3)6..8H2O(s)", - r"Ca_{2.832}Fe_{0.6285}Mg_{5.395}(CO_{3})_{6}\cdot 8H_{2}O(s)", - ), + # This test is enabled in chempy-0.9+: + # ( + # "Ca2.832Fe0.6285Mg5.395(CO3)6", + # r"Ca_{2.832}Fe_{0.6285}Mg_{5.395}(CO_{3})_{6}", + # ), + # ( + # "Ca2.832Fe0.6285Mg5.395(CO3)6(s)", + # r"Ca_{2.832}Fe_{0.6285}Mg_{5.395}(CO_{3})_{6}(s)", + # ), + # ( + # "Ca2.832Fe0.6285Mg5.395(CO3)6..8H2O(s)", + # r"Ca_{2.832}Fe_{0.6285}Mg_{5.395}(CO_{3})_{6}\cdot 8H_{2}O(s)", + # ), ], ) @requires(parsing_library) @@ -712,3 +714,17 @@ def test_formula_to_html(species, html): def test_formula_to_html_caged(species, html): """Should produce HTML for cage species.""" assert formula_to_html(species) == html + + +def test_composition_dot_as_crystal_water_chempy08x(): + """In Chempy v0.8.x a dot will signify crystal water. But an asterisk '*' + or and interpunct (·) is also accepted (and preferred). + From Chempy v0.9.x on-wards, only interpunct and asterisk will be + interpreted as crystal water delimiters, and a dot will be interpreted + as floating point delimiter in fractional stoichiometric coefficients.""" + ref = {30: 1, 7: 2, 8: 12, 1: 12} + assert formula_to_composition('Zn(NO3)2{}6H2O'.format('\u00B7')) == ref + assert formula_to_composition('Zn(NO3)2*6H2O') == ref + # https://docs.pytest.org/en/7.1.x/how-to/capture-warnings.html#ensuring-code-triggers-a-deprecation-warning + with pytest.deprecated_call(): + assert formula_to_composition('Zn(NO3)2.6H2O') == ref From d33100af45b2e64769261f8f1b15bc8541b30bc0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Ingvar=20Dahlgren?= Date: Mon, 22 Apr 2024 15:29:14 +0200 Subject: [PATCH 5/5] linting --- README.rst | 2 +- chempy/util/parsing.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 74294e81..a16c85af 100644 --- a/README.rst +++ b/README.rst @@ -385,7 +385,7 @@ If you make use of ChemPy in e.g. academic work you may cite the following peer- Depending on what underlying solver you are using you should also cite the appropriate paper (you can look at the list of references in the JOSS article). If you need to reference, in addition to the paper, a specific point version of ChemPy (for e.g. reproducibility) -you can get per-version DOIs from the zendodo archive: +you can get per-version DOIs from the zenodo archive: .. image:: https://zenodo.org/badge/8840/bjodah/chempy.svg :target: https://zenodo.org/badge/latestdoi/8840/bjodah/chempy diff --git a/chempy/util/parsing.py b/chempy/util/parsing.py index 66d6bdb3..aca30723 100644 --- a/chempy/util/parsing.py +++ b/chempy/util/parsing.py @@ -538,6 +538,7 @@ def to_reaction(line, substance_keys, token, Cls, globals_=None, **kwargs): internal_asterisk = re.compile(r"([^\s\*]+)\*([a-zA-Z0-9]+)") + def _formula_to_format( sub, sup,