Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Restore old crystal water parsing in v0.8.x #229

Merged
merged 5 commits into from
Apr 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions .woodpecker.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ steps:
- export CPATH=$SUNDBASE/include:$CPATH
- export LIBRARY_PATH=$SUNDBASE/lib
- export LD_LIBRARY_PATH=$SUNDBASE/lib
- python3 -m pip install --cache-dir $CACHE_ROOT/pip_cache --user --upgrade-strategy=eager --upgrade cython
- python3 -m pip install --cache-dir $CACHE_ROOT/pip_cache --user -e .[all]
- python3 -c "import pycvodes; import pyodesys; import pygslodeiv2" # debug this CI config
- git fetch -tq
Expand Down
2 changes: 1 addition & 1 deletion README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -385,7 +385,7 @@ If you make use of ChemPy in e.g. academic work you may cite the following peer-
Depending on what underlying solver you are using you should also cite the appropriate paper
(you can look at the list of references in the JOSS article). If you need to reference,
in addition to the paper, a specific point version of ChemPy (for e.g. reproducibility)
you can get per-version DOIs from the zendodo archive:
you can get per-version DOIs from the zenodo archive:

.. image:: https://zenodo.org/badge/8840/bjodah/chempy.svg
:target: https://zenodo.org/badge/latestdoi/8840/bjodah/chempy
Expand Down
40 changes: 33 additions & 7 deletions chempy/util/parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,9 @@
from collections import defaultdict

import re
import warnings

from .pyutil import memoize
from .pyutil import memoize, ChemPyDeprecationWarning
from .periodic import symbols

parsing_library = "pyparsing" # info used for selective testing.
Expand Down Expand Up @@ -95,7 +96,7 @@ def _get_formula_parser():
| '{' formula '}'
| '[' formula ']' ) count prime charge?
formula :: term+
hydrate :: '.' count? formula
hydrate :: ( '.' | '\u00B7' | '*' ) count? formula
state :: '(' ( 's' | 'l' | 'g' | 'aq' | 'cr' ) ')'
compound :: count formula hydrate? state?

Expand All @@ -114,7 +115,7 @@ def _get_formula_parser():
| '{' formula '}'
| '[' formula ']' ) count prime charge?
formula :: term+
hydrate :: '..' count? formula
hydrate :: ( '..' | '\u00B7' | '*' ) count? formula
state :: '(' ( 's' | 'l' | 'g' | 'aq' | 'cr' ) ')'
compound :: count formula hydrate? state?
"""
Expand Down Expand Up @@ -334,7 +335,7 @@ def _parse_stoich(stoich):

_unicode_mapping = {k + "-": v + "-" for k, v in zip(_greek_letters, _greek_u)}
_unicode_mapping["."] = "⋅"
_unicode_infix_mapping = {"..": "·"}
_unicode_infix_mapping = {"..": "\u00b7"}

_html_mapping = {k + "-": "&" + k + ";-" for k in _greek_letters}
_html_mapping["."] = "⋅"
Expand Down Expand Up @@ -377,7 +378,7 @@ def formula_to_composition(
True
>>> formula_to_composition('.NHO-(aq)') == {0: -1, 1: 1, 7: 1, 8: 1}
True
>>> formula_to_composition('Na2CO3..7H2O') == {11: 2, 6: 1, 8: 10, 1: 14}
>>> formula_to_composition('Na2CO3*7H2O') == {11: 2, 6: 1, 8: 10, 1: 14}
True

"""
Expand All @@ -386,7 +387,19 @@ def formula_to_composition(

stoich_tok, chg_tok = _formula_to_parts(formula, prefixes, suffixes)[:2]
tot_comp = {}
parts = stoich_tok.split("..")
if ".." in stoich_tok:
parts = stoich_tok.split("..")
elif "\u00b7" in stoich_tok:
parts = stoich_tok.split('\u00b7')
elif '.' in stoich_tok:
warnings.warn(
("dot is ambiguous in chempy-0.8.x, prefer '*' or '\u00b7' for complexes."
" Dot will be interpreted as floating point in chempy-0.9+"),
ChemPyDeprecationWarning
)
parts = stoich_tok.split('.')
else:
parts = list(filter(len, internal_asterisk.split(stoich_tok)))

for idx, stoich in enumerate(parts):
if idx == 0:
Expand Down Expand Up @@ -523,6 +536,9 @@ def to_reaction(line, substance_keys, token, Cls, globals_=None, **kwargs):
)


internal_asterisk = re.compile(r"([^\s\*]+)\*([a-zA-Z0-9]+)")


def _formula_to_format(
sub,
sup,
Expand All @@ -532,7 +548,17 @@ def _formula_to_format(
suffixes=("(s)", "(l)", "(g)", "(aq)"),
):
parts = _formula_to_parts(formula, prefixes.keys(), suffixes)
stoichs = parts[0].split("..")
parts0 = parts[0].replace("..", "\u00B7")
parts0 = internal_asterisk.sub("\u00B7", parts0)
if '.' in parts0:
warnings.warn(
("dot is ambiguous in chempy-0.8.x, prefer '*' or '' for complexes."
" Dot will be interpreted as floating point in chempy-0.9+"),
ChemPyDeprecationWarning
)
parts0 = parts0.replace('.', "\u00B7")
stoichs = parts0.split("\u00B7")

string = ""
for idx, stoich in enumerate(stoichs):
if idx == 0:
Expand Down
118 changes: 67 additions & 51 deletions chempy/util/tests/test_parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -303,45 +303,46 @@ def test_formula_to_composition_bad_complexes(species):
formula_to_composition(species)


@pytest.mark.parametrize(
"species, composition",
[
(
"Ca2.832Fe0.6285Mg5.395(CO3)6",
{
6: 6,
8: 18,
12: 5.395,
20: 2.832,
26: 0.6285,
},
),
(
"Ca2.832Fe0.6285Mg5.395(CO3)6(s)",
{
6: 6,
8: 18,
12: 5.395,
20: 2.832,
26: 0.6285,
},
),
(
"Ca2.832Fe0.6285Mg5.395(CO3)6..8H2O(s)",
{
1: 16,
6: 6,
8: 26,
12: 5.395,
20: 2.832,
26: 0.6285,
},
),
],
)
@requires(parsing_library)
def test_formula_to_composition_fractional_subscripts(species, composition):
assert formula_to_composition(species) == composition
# This test is enabled in chempy-0.9+
# @pytest.mark.parametrize(
# "species, composition",
# [
# (
# "Ca2.832Fe0.6285Mg5.395(CO3)6",
# {
# 6: 6,
# 8: 18,
# 12: 5.395,
# 20: 2.832,
# 26: 0.6285,
# },
# ),
# (
# "Ca2.832Fe0.6285Mg5.395(CO3)6(s)",
# {
# 6: 6,
# 8: 18,
# 12: 5.395,
# 20: 2.832,
# 26: 0.6285,
# },
# ),
# (
# "Ca2.832Fe0.6285Mg5.395(CO3)6..8H2O(s)",
# {
# 1: 16,
# 6: 6,
# 8: 26,
# 12: 5.395,
# 20: 2.832,
# 26: 0.6285,
# },
# ),
# ],
# )
# @requires(parsing_library)
# def test_formula_to_composition_fractional_subscripts(species, composition):
# assert formula_to_composition(species) == composition


@pytest.mark.parametrize(
Expand Down Expand Up @@ -535,18 +536,19 @@ def test_to_reaction():
),
("[Fe(CN)6]-3", r"[Fe(CN)_{6}]^{3-}"),
("[Fe(CN)6]-3(aq)", r"[Fe(CN)_{6}]^{3-}(aq)"),
(
"Ca2.832Fe0.6285Mg5.395(CO3)6",
r"Ca_{2.832}Fe_{0.6285}Mg_{5.395}(CO_{3})_{6}",
),
(
"Ca2.832Fe0.6285Mg5.395(CO3)6(s)",
r"Ca_{2.832}Fe_{0.6285}Mg_{5.395}(CO_{3})_{6}(s)",
),
(
"Ca2.832Fe0.6285Mg5.395(CO3)6..8H2O(s)",
r"Ca_{2.832}Fe_{0.6285}Mg_{5.395}(CO_{3})_{6}\cdot 8H_{2}O(s)",
),
# This test is enabled in chempy-0.9+:
# (
# "Ca2.832Fe0.6285Mg5.395(CO3)6",
# r"Ca_{2.832}Fe_{0.6285}Mg_{5.395}(CO_{3})_{6}",
# ),
# (
# "Ca2.832Fe0.6285Mg5.395(CO3)6(s)",
# r"Ca_{2.832}Fe_{0.6285}Mg_{5.395}(CO_{3})_{6}(s)",
# ),
# (
# "Ca2.832Fe0.6285Mg5.395(CO3)6..8H2O(s)",
# r"Ca_{2.832}Fe_{0.6285}Mg_{5.395}(CO_{3})_{6}\cdot 8H_{2}O(s)",
# ),
],
)
@requires(parsing_library)
Expand Down Expand Up @@ -712,3 +714,17 @@ def test_formula_to_html(species, html):
def test_formula_to_html_caged(species, html):
"""Should produce HTML for cage species."""
assert formula_to_html(species) == html


def test_composition_dot_as_crystal_water_chempy08x():
"""In Chempy v0.8.x a dot will signify crystal water. But an asterisk '*'
or and interpunct (·) is also accepted (and preferred).
From Chempy v0.9.x on-wards, only interpunct and asterisk will be
interpreted as crystal water delimiters, and a dot will be interpreted
as floating point delimiter in fractional stoichiometric coefficients."""
ref = {30: 1, 7: 2, 8: 12, 1: 12}
assert formula_to_composition('Zn(NO3)2{}6H2O'.format('\u00B7')) == ref
assert formula_to_composition('Zn(NO3)2*6H2O') == ref
# https://docs.pytest.org/en/7.1.x/how-to/capture-warnings.html#ensuring-code-triggers-a-deprecation-warning
with pytest.deprecated_call():
assert formula_to_composition('Zn(NO3)2.6H2O') == ref