From 0230cd960a3e69f057aeaf840d49f337eeb0f32a Mon Sep 17 00:00:00 2001 From: Giovanni Pizzi Date: Wed, 21 Nov 2018 15:58:31 +0100 Subject: [PATCH 1/2] Properly checking comments in namelists now Fixes #18 --- qe_tools/parsers/qeinputparser.py | 74 ++++++++++++- tests/data/example_comment_in_namelist.in | 29 +++++ .../data/example_ibrav0_error_multiplekeys.in | 38 +++++++ .../data/ref/example_comment_in_namelist.json | 104 ++++++++++++++++++ tests/test_parsers.py | 12 ++ 5 files changed, 253 insertions(+), 4 deletions(-) create mode 100644 tests/data/example_comment_in_namelist.in create mode 100644 tests/data/example_ibrav0_error_multiplekeys.in create mode 100644 tests/data/ref/example_comment_in_namelist.json diff --git a/qe_tools/parsers/qeinputparser.py b/qe_tools/parsers/qeinputparser.py index fb15c87..8e6b5dd 100644 --- a/qe_tools/parsers/qeinputparser.py +++ b/qe_tools/parsers/qeinputparser.py @@ -339,8 +339,8 @@ def str2val(valstr): try: val = conversion_fn(valstr) except ValueError as error: - print('Error converting {} to a value'.format(repr(valstr))) - raise error + raise ValueError('Error converting {} to a value'.format( + repr(valstr))) if val is None: raise ValueError('Unable to convert {} to a python variable.\n' 'NOTE: Support for algebraic expressions is not yet ' @@ -403,8 +403,22 @@ def parse_namelists(txt): for nmlst, blockstr in namelist_re.findall(txt): # ...extract the key value pairs, storing them each in nmlst_dict,... nmlst_dict = {} - for key, valstr in key_value_re.findall(blockstr): - nmlst_dict[key.lower()] = str2val(valstr) + # I split the lines, putting back a \n at the end (I want + # to have it otherwise lines not ending with a comma are ignored) + blocklines = blockstr.splitlines() + # Remove comments on each line, and then put back the \n + # Note that strip_comment does not want \n in the string! + blocklines = [ + "{}\n".format(strip_comment(line)) for line in blocklines + ] + + for blockline in blocklines: + for key, valstr in key_value_re.findall(blockline): + if key.lower() in nmlst_dict: + raise ValueError( + "Key {} found more than once in namelist {}".format( + key.lower(), nmlst)) + nmlst_dict[key.lower()] = str2val(valstr) # ...and, store nmlst_dict as a value in params_dict with the namelist # as the key. if len(nmlst_dict.keys()) > 0: @@ -1185,3 +1199,55 @@ def get_structure_from_qeinput(filepath=None, species=atomic_species, cell=cell.tolist(), atom_names=atomic_positions['names']) + + +def strip_comment(string, + comment_characters=('!', ), + quote_characters=('"', "'")): + """ + Return the string only until the first comment (if any), but makes + sure to ignore comments if inside a string + + Note: it expects to get a SINGLE line, without newline characters + + Note: it does not cope with escaping of quotes + + :param string: the string to check + :param comment_characters: a list or tuple of accepted comment characters + :param quote_characters: a list or tuple of accepted valid string characters + """ + new_string = [] # Will contain individual charachters of the new string + + in_string = False + string_quote = None # Will contain the specific character used to enter + # this string: this avoids that 'aaa" is considered as + # the string containing the three characters aaa + + for char in string: + if char == '\n': + raise ValueError( + "I still cannot cope with newlines in the string...") + # Check if we are entering/existing a string + if in_string and char == string_quote: + #print("EXIT") + in_string = False + string_quote = None + elif not in_string and char in quote_characters: + #print("ENTER") + in_string = True + string_quote = char + #print(char, in_string, string_quote) + + # We found a comment, return until here (without the comment) + if not in_string and char in comment_characters: + return "".join(new_string) + + new_string.append(char) + + # If we are here, no comments where found + if in_string: + raise ValueError( + "String >>{}<< is not closed, it was open with the {} char".format( + string, string_quote)) + # I just return the same string, even if this would be equivalent to "".join(new_string) + return string diff --git a/tests/data/example_comment_in_namelist.in b/tests/data/example_comment_in_namelist.in new file mode 100644 index 0000000..821fd9d --- /dev/null +++ b/tests/data/example_comment_in_namelist.in @@ -0,0 +1,29 @@ +&control + pseudo_dir = 'pseudo/' + calculation = 'scf', + prefix = 'Si_exc1', + / + &system + ibrav = 0 + ! ibrav = -3, + celldm(1) = 20.385647759, + nat = 1, + ntyp = 1, + ecutwfc = 30 + / + &electrons + / + +ATOMIC_SPECIES + Si 28.086 Si.pbe-n-rrkjus_psl.1.0.0.UPF + +ATOMIC_POSITIONS {bohr} +Si 0. 0. 0. + +CELL_PARAMETERS {alat} +-1.0 1.0 1.0 + 1.0 -1.0 1.0 + 1.0 1.0 -1.0 + +K_POINTS {automatic} + 6 6 6 1 1 1 diff --git a/tests/data/example_ibrav0_error_multiplekeys.in b/tests/data/example_ibrav0_error_multiplekeys.in new file mode 100644 index 0000000..894766c --- /dev/null +++ b/tests/data/example_ibrav0_error_multiplekeys.in @@ -0,0 +1,38 @@ +&CONTROL + calculation = 'scf' + outdir = './out/' + prefix = 'aiida' + pseudo_dir = './pseudo/' + restart_mode = 'from_scratch' + tprnfor = .true. + tstress = .true. + tstress = .true. + verbosity = 'high' + wf_collect = .true. +/ +&SYSTEM + ecutrho = 3.2000000000d+02 + ecutwfc = 4.0000000000d+01 + ibrav = 0 + nat = 5 + ntyp = 3 +/ +&ELECTRONS + conv_thr = 1.0000000000d-10 +/ +ATOMIC_SPECIES +Ba 137.327 Ba.pbesol-spn-rrkjus_psl.0.2.3-tot-pslib030.UPF +O 15.9994 O.pbesol-n-rrkjus_psl.0.1-tested-pslib030.UPF +Ti 47.867 Ti.pbesol-spn-rrkjus_psl.0.2.3-tot-pslib030.UPF +ATOMIC_POSITIONS angstrom +Ba 0.0000000000 0.0000000000 0.0000000000 +Ti 2.0000000000 2.0000000000 2.0000000000 +O 2.0000000000 2.0000000000 0.0000000000 +O 2.0000000000 0.0000000000 2.0000000000 +O 0.0000000000 2.0000000000 2.0000000000 +K_POINTS automatic +2 2 2 0 0 0 +CELL_PARAMETERS angstrom + 4.0000000000 0.0000000000 0.0000000000 + 0.0000000000 4.0000000000 0.0000000000 + 0.0000000000 0.0000000000 4.0000000000 diff --git a/tests/data/ref/example_comment_in_namelist.json b/tests/data/ref/example_comment_in_namelist.json new file mode 100644 index 0000000..8cf1000 --- /dev/null +++ b/tests/data/ref/example_comment_in_namelist.json @@ -0,0 +1,104 @@ +{ + "atomic_positions": { + "fixed_coords": [ + [ + false, + false, + false + ] + ], + "names": [ + "Si" + ], + "positions": [ + [ + 0.0, + 0.0, + 0.0 + ] + ], + "units": "bohr" + }, + "atomic_species": { + "masses": [ + 28.086 + ], + "names": [ + "Si" + ], + "pseudo_file_names": [ + "Si.pbe-n-rrkjus_psl.1.0.0.UPF" + ] + }, + "cell": [ + [ + -10.787620176406609, + 10.787620176406609, + 10.787620176406609 + ], + [ + 10.787620176406609, + -10.787620176406609, + 10.787620176406609 + ], + [ + 10.787620176406609, + 10.787620176406609, + -10.787620176406609 + ] + ], + "cell_parameters": { + "cell": [ + [ + -1.0, + 1.0, + 1.0 + ], + [ + 1.0, + -1.0, + 1.0 + ], + [ + 1.0, + 1.0, + -1.0 + ] + ], + "units": "alat" + }, + "k_points": { + "offset": [ + 0.5, + 0.5, + 0.5 + ], + "points": [ + 6, + 6, + 6 + ], + "type": "automatic" + }, + "namelists": { + "CONTROL": { + "calculation": "scf", + "prefix": "Si_exc1", + "pseudo_dir": "pseudo/" + }, + "SYSTEM": { + "celldm(1)": 20.385647759, + "ecutwfc": 30, + "ibrav": 0, + "nat": 1, + "ntyp": 1 + } + }, + "positions_angstrom": [ + [ + 0.0, + 0.0, + 0.0 + ] + ] +} \ No newline at end of file diff --git a/tests/test_parsers.py b/tests/test_parsers.py index 1f15095..b1a7917 100644 --- a/tests/test_parsers.py +++ b/tests/test_parsers.py @@ -197,9 +197,21 @@ def singletest(self, label, parser='pw'): ############################################################################ ## Here start the tests ############################################################################ + def test_example_comment_in_namelist(self): + self.singletest(label='example_comment_in_namelist') + def test_example_ibrav0(self): self.singletest(label='example_ibrav0') + def test_example_ibrav0_error_multiplekeys(self): + # It should raise because there is twice the same key in a namelist + with self.assertRaises(ValueError) as exception_obj: + self.singletest(label='example_ibrav0_error_multiplekeys') + + # It should complain about 'tstress' being found multiple times + the_exception = exception_obj.exception + self.assertIn("tstress", str(the_exception)) + def test_example_ibrav0_uppercaseunits(self): self.singletest(label='example_ibrav0_uppercaseunits') From c053cb5191b64885dc67395827c2cfb35dc350fe Mon Sep 17 00:00:00 2001 From: Giovanni Pizzi Date: Wed, 21 Nov 2018 16:00:36 +0100 Subject: [PATCH 2/2] Upping to v.1.1.2 --- qe_tools/__init__.py | 2 +- setup.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/qe_tools/__init__.py b/qe_tools/__init__.py index edbae03..ffe33eb 100644 --- a/qe_tools/__init__.py +++ b/qe_tools/__init__.py @@ -1,3 +1,3 @@ from .parsers import PwInputFile, CpInputFile -__version__ = "1.1.1" +__version__ = "1.1.2" diff --git a/setup.json b/setup.json index af03d9e..e244619 100644 --- a/setup.json +++ b/setup.json @@ -32,5 +32,5 @@ "license": "MIT License", "name": "qe-tools", "url": "https://github.com/aiidateam/qe-tools", - "version": "1.1.1" + "version": "1.1.2" } \ No newline at end of file