From 0230cd960a3e69f057aeaf840d49f337eeb0f32a Mon Sep 17 00:00:00 2001
From: Giovanni Pizzi <giovanni.pizzi@epfl.ch>
Date: Wed, 21 Nov 2018 15:58:31 +0100
Subject: [PATCH 1/2] Properly checking comments in namelists now

Fixes #18
---
 qe_tools/parsers/qeinputparser.py             |  74 ++++++++++++-
 tests/data/example_comment_in_namelist.in     |  29 +++++
 .../data/example_ibrav0_error_multiplekeys.in |  38 +++++++
 .../data/ref/example_comment_in_namelist.json | 104 ++++++++++++++++++
 tests/test_parsers.py                         |  12 ++
 5 files changed, 253 insertions(+), 4 deletions(-)
 create mode 100644 tests/data/example_comment_in_namelist.in
 create mode 100644 tests/data/example_ibrav0_error_multiplekeys.in
 create mode 100644 tests/data/ref/example_comment_in_namelist.json

diff --git a/qe_tools/parsers/qeinputparser.py b/qe_tools/parsers/qeinputparser.py
index fb15c87..8e6b5dd 100644
--- a/qe_tools/parsers/qeinputparser.py
+++ b/qe_tools/parsers/qeinputparser.py
@@ -339,8 +339,8 @@ def str2val(valstr):
             try:
                 val = conversion_fn(valstr)
             except ValueError as error:
-                print('Error converting {} to a value'.format(repr(valstr)))
-                raise error
+                raise ValueError('Error converting {} to a value'.format(
+                    repr(valstr)))
     if val is None:
         raise ValueError('Unable to convert {} to a python variable.\n'
                          'NOTE: Support for algebraic expressions is not yet '
@@ -403,8 +403,22 @@ def parse_namelists(txt):
     for nmlst, blockstr in namelist_re.findall(txt):
         # ...extract the key value pairs, storing them each in nmlst_dict,...
         nmlst_dict = {}
-        for key, valstr in key_value_re.findall(blockstr):
-            nmlst_dict[key.lower()] = str2val(valstr)
+        # I split the lines, putting back a \n at the end (I want
+        # to have it otherwise lines not ending with a comma are ignored)
+        blocklines = blockstr.splitlines()
+        # Remove comments on each line, and then put back the \n
+        # Note that strip_comment does not want \n in the string!
+        blocklines = [
+            "{}\n".format(strip_comment(line)) for line in blocklines
+        ]
+
+        for blockline in blocklines:
+            for key, valstr in key_value_re.findall(blockline):
+                if key.lower() in nmlst_dict:
+                    raise ValueError(
+                        "Key {} found more than once in namelist {}".format(
+                            key.lower(), nmlst))
+                nmlst_dict[key.lower()] = str2val(valstr)
         # ...and, store nmlst_dict as a value in params_dict with the namelist
         # as the key.
         if len(nmlst_dict.keys()) > 0:
@@ -1185,3 +1199,55 @@ def get_structure_from_qeinput(filepath=None,
         species=atomic_species,
         cell=cell.tolist(),
         atom_names=atomic_positions['names'])
+
+
+def strip_comment(string,
+                  comment_characters=('!', ),
+                  quote_characters=('"', "'")):
+    """
+    Return the string only until the first comment (if any), but makes
+    sure to ignore comments if inside a string
+
+    Note: it expects to get a SINGLE line, without newline characters
+
+    Note: it does not cope with escaping of quotes
+
+    :param string: the string to check
+    :param comment_characters: a list or tuple of accepted comment characters
+    :param quote_characters: a list or tuple of accepted valid string characters
+    """
+    new_string = []  # Will contain individual charachters of the new string
+
+    in_string = False
+    string_quote = None  # Will contain the specific character used to enter
+    # this string: this avoids that 'aaa" is considered as
+    # the string containing the three characters aaa
+
+    for char in string:
+        if char == '\n':
+            raise ValueError(
+                "I still cannot cope with newlines in the string...")
+        # Check if we are entering/existing a string
+        if in_string and char == string_quote:
+            #print("EXIT")
+            in_string = False
+            string_quote = None
+        elif not in_string and char in quote_characters:
+            #print("ENTER")
+            in_string = True
+            string_quote = char
+        #print(char, in_string, string_quote)
+
+        # We found a comment, return until here (without the comment)
+        if not in_string and char in comment_characters:
+            return "".join(new_string)
+
+        new_string.append(char)
+
+    # If we are here, no comments where found
+    if in_string:
+        raise ValueError(
+            "String >>{}<< is not closed, it was open with the {} char".format(
+                string, string_quote))
+    # I just return the same string, even if this would be equivalent to "".join(new_string)
+    return string
diff --git a/tests/data/example_comment_in_namelist.in b/tests/data/example_comment_in_namelist.in
new file mode 100644
index 0000000..821fd9d
--- /dev/null
+++ b/tests/data/example_comment_in_namelist.in
@@ -0,0 +1,29 @@
+&control
+    pseudo_dir  = 'pseudo/'
+    calculation = 'scf',
+    prefix = 'Si_exc1',
+ /
+ &system
+    ibrav = 0
+    ! ibrav =  -3, 
+    celldm(1) = 20.385647759,	
+    nat =  1,
+    ntyp = 1,
+    ecutwfc = 30
+ /
+ &electrons
+ /
+
+ATOMIC_SPECIES
+ Si 28.086  Si.pbe-n-rrkjus_psl.1.0.0.UPF
+
+ATOMIC_POSITIONS {bohr}
+Si    0. 0. 0.
+
+CELL_PARAMETERS {alat}
+-1.0 1.0 1.0
+ 1.0 -1.0 1.0
+ 1.0 1.0 -1.0
+
+K_POINTS {automatic}
+  6 6 6 1 1 1
diff --git a/tests/data/example_ibrav0_error_multiplekeys.in b/tests/data/example_ibrav0_error_multiplekeys.in
new file mode 100644
index 0000000..894766c
--- /dev/null
+++ b/tests/data/example_ibrav0_error_multiplekeys.in
@@ -0,0 +1,38 @@
+&CONTROL
+  calculation = 'scf'
+  outdir = './out/'
+  prefix = 'aiida'
+  pseudo_dir = './pseudo/'
+  restart_mode = 'from_scratch'
+  tprnfor = .true.
+  tstress = .true.
+  tstress = .true.
+  verbosity = 'high'
+  wf_collect = .true.
+/
+&SYSTEM
+  ecutrho =   3.2000000000d+02
+  ecutwfc =   4.0000000000d+01
+  ibrav = 0
+  nat = 5
+  ntyp = 3
+/
+&ELECTRONS
+  conv_thr =   1.0000000000d-10
+/
+ATOMIC_SPECIES
+Ba     137.327 Ba.pbesol-spn-rrkjus_psl.0.2.3-tot-pslib030.UPF
+O      15.9994 O.pbesol-n-rrkjus_psl.0.1-tested-pslib030.UPF
+Ti     47.867 Ti.pbesol-spn-rrkjus_psl.0.2.3-tot-pslib030.UPF
+ATOMIC_POSITIONS angstrom
+Ba           0.0000000000       0.0000000000       0.0000000000 
+Ti           2.0000000000       2.0000000000       2.0000000000 
+O            2.0000000000       2.0000000000       0.0000000000 
+O            2.0000000000       0.0000000000       2.0000000000 
+O            0.0000000000       2.0000000000       2.0000000000 
+K_POINTS automatic
+2 2 2 0 0 0
+CELL_PARAMETERS angstrom
+      4.0000000000       0.0000000000       0.0000000000
+      0.0000000000       4.0000000000       0.0000000000
+      0.0000000000       0.0000000000       4.0000000000
diff --git a/tests/data/ref/example_comment_in_namelist.json b/tests/data/ref/example_comment_in_namelist.json
new file mode 100644
index 0000000..8cf1000
--- /dev/null
+++ b/tests/data/ref/example_comment_in_namelist.json
@@ -0,0 +1,104 @@
+{
+  "atomic_positions": {
+    "fixed_coords": [
+      [
+        false, 
+        false, 
+        false
+      ]
+    ], 
+    "names": [
+      "Si"
+    ], 
+    "positions": [
+      [
+        0.0, 
+        0.0, 
+        0.0
+      ]
+    ], 
+    "units": "bohr"
+  }, 
+  "atomic_species": {
+    "masses": [
+      28.086
+    ], 
+    "names": [
+      "Si"
+    ], 
+    "pseudo_file_names": [
+      "Si.pbe-n-rrkjus_psl.1.0.0.UPF"
+    ]
+  }, 
+  "cell": [
+    [
+      -10.787620176406609, 
+      10.787620176406609, 
+      10.787620176406609
+    ], 
+    [
+      10.787620176406609, 
+      -10.787620176406609, 
+      10.787620176406609
+    ], 
+    [
+      10.787620176406609, 
+      10.787620176406609, 
+      -10.787620176406609
+    ]
+  ], 
+  "cell_parameters": {
+    "cell": [
+      [
+        -1.0, 
+        1.0, 
+        1.0
+      ], 
+      [
+        1.0, 
+        -1.0, 
+        1.0
+      ], 
+      [
+        1.0, 
+        1.0, 
+        -1.0
+      ]
+    ], 
+    "units": "alat"
+  }, 
+  "k_points": {
+    "offset": [
+      0.5, 
+      0.5, 
+      0.5
+    ], 
+    "points": [
+      6, 
+      6, 
+      6
+    ], 
+    "type": "automatic"
+  }, 
+  "namelists": {
+    "CONTROL": {
+      "calculation": "scf", 
+      "prefix": "Si_exc1", 
+      "pseudo_dir": "pseudo/"
+    }, 
+    "SYSTEM": {
+      "celldm(1)": 20.385647759, 
+      "ecutwfc": 30, 
+      "ibrav": 0, 
+      "nat": 1, 
+      "ntyp": 1
+    }
+  }, 
+  "positions_angstrom": [
+    [
+      0.0, 
+      0.0, 
+      0.0
+    ]
+  ]
+}
\ No newline at end of file
diff --git a/tests/test_parsers.py b/tests/test_parsers.py
index 1f15095..b1a7917 100644
--- a/tests/test_parsers.py
+++ b/tests/test_parsers.py
@@ -197,9 +197,21 @@ def singletest(self, label, parser='pw'):
     ############################################################################
     ## Here start the tests
     ############################################################################
+    def test_example_comment_in_namelist(self):
+        self.singletest(label='example_comment_in_namelist')
+
     def test_example_ibrav0(self):
         self.singletest(label='example_ibrav0')
 
+    def test_example_ibrav0_error_multiplekeys(self):
+        # It should raise because there is twice the same key in a namelist
+        with self.assertRaises(ValueError) as exception_obj:
+            self.singletest(label='example_ibrav0_error_multiplekeys')
+
+        # It should complain about 'tstress' being found multiple times
+        the_exception = exception_obj.exception
+        self.assertIn("tstress", str(the_exception))
+
     def test_example_ibrav0_uppercaseunits(self):
         self.singletest(label='example_ibrav0_uppercaseunits')
 

From c053cb5191b64885dc67395827c2cfb35dc350fe Mon Sep 17 00:00:00 2001
From: Giovanni Pizzi <giovanni.pizzi@epfl.ch>
Date: Wed, 21 Nov 2018 16:00:36 +0100
Subject: [PATCH 2/2] Upping to v.1.1.2

---
 qe_tools/__init__.py | 2 +-
 setup.json           | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/qe_tools/__init__.py b/qe_tools/__init__.py
index edbae03..ffe33eb 100644
--- a/qe_tools/__init__.py
+++ b/qe_tools/__init__.py
@@ -1,3 +1,3 @@
 from .parsers import PwInputFile, CpInputFile
 
-__version__ = "1.1.1"
+__version__ = "1.1.2"
diff --git a/setup.json b/setup.json
index af03d9e..e244619 100644
--- a/setup.json
+++ b/setup.json
@@ -32,5 +32,5 @@
     "license": "MIT License", 
     "name": "qe-tools", 
     "url": "https://github.com/aiidateam/qe-tools", 
-    "version": "1.1.1"
+    "version": "1.1.2"
 }
\ No newline at end of file