Skip to content

Commit bc61449

Browse files
authored
Fix 420 comments parsing (ansible-collections#766)
* fixes parsing of hash-symbols within quotes (ansible-collections#420) * prints line-number of issue when file can't be parsed * verify error-message in tests
1 parent 30dc7a3 commit bc61449

File tree

4 files changed

+236
-28
lines changed

4 files changed

+236
-28
lines changed
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
bugfixes:
2+
- "postgresql_pg_hba - fixes #420 by properly handling hash-symbols in quotes (https://github.com/ansible-collections/community.postgresql/pull/766)"
3+
minor_changes:
4+
- "postgresql_pg_hba - show the number of the line with the issue if parsing a file fails (https://github.com/ansible-collections/community.postgresql/pull/766)"

plugins/modules/postgresql_pg_hba.py

Lines changed: 50 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,7 @@
153153
author:
154154
- Sebastiaan Mannem (@sebasmannem)
155155
- Felix Hamme (@betanummeric)
156+
- Thomas Ziegler (@toydarian)
156157
'''
157158

158159
EXAMPLES = '''
@@ -273,8 +274,8 @@
273274
PG_HBA_HDR = ['type', 'db', 'usr', 'src', 'mask', 'method', 'options']
274275

275276
WHITESPACES_RE = re.compile(r'\s+')
276-
TOKEN_SPLIT_RE = re.compile(r'(?<=[\s"])')
277-
WHITESPACE_OR_QUOTE_RE = re.compile(r'[\s"]')
277+
TOKEN_SPLIT_RE = re.compile(r'(?<=[\s"#])')
278+
WHITESPACE_QUOTE_OR_COMMENT_RE = re.compile(r'[\s"#]')
278279
ONLY_SPACES_RE = re.compile(r"^\s+$")
279280
OPTION_RE = re.compile(r"([^=]+)=(.+)")
280281
IPV4_ADDR_RE = re.compile(r'^"?((\d{1,3}\.){3}\d{1,3})(/(\d{1,2}))?"?$')
@@ -328,12 +329,15 @@ def parse_hba_file(input_string):
328329
rules = []
329330
line_iter = iter(input_string.split("\n"))
330331
line = next(line_iter, None)
332+
this_line_nr = 1
333+
next_line_nr = 1
331334
while line is not None:
332335
# if that line continues, we just glue the next line onto the end until it ends
333336
# we can and have to do that, as continuation even applies within comments and quoted strings [sic]
334337
# https://www.postgresql.org/docs/current/auth-pg-hba-conf.html#AUTH-PG-HBA-CONF
335338
comment = None
336339
while line.endswith("\\"):
340+
next_line_nr += 1
337341
cont_line = next(line_iter, None)
338342
if cont_line is None:
339343
# we got a line continuation, but there was no more line
@@ -348,42 +352,46 @@ def parse_hba_file(input_string):
348352
parsed_line = "EMPTY"
349353
# handle "normal" lines
350354
else:
351-
# handle lines with comments
352-
sanitized_line = line
353-
if line.find('#') >= 0:
354-
comment = sanitized_line[sanitized_line.index("#"):]
355-
sanitized_line = sanitized_line[0:sanitized_line.index("#")]
356355
# remove continuation tokens
357-
sanitized_line = sanitized_line.replace("\\\n", "")
358-
tokens = tokenize(sanitized_line)
356+
sanitized_line = line.replace("\\\n", "")
357+
try:
358+
tokens = tokenize(sanitized_line)
359+
except TokenizerException as e:
360+
raise TokenizerException("Error in line {0}: {1}".format(this_line_nr, e.args[0]))
359361
parsed_line = tokens
362+
# a comment would always be the last token
363+
if parsed_line[-1].startswith("#"):
364+
comment = parsed_line[-1]
365+
parsed_line = parsed_line[:-1]
360366
# create Rule
361-
rules.append({"tokens": parsed_line, "line": line, "comment": comment})
367+
rules.append({"tokens": parsed_line, "line": line, "comment": comment, "line_nr": this_line_nr})
362368
line = next(line_iter, None)
369+
this_line_nr = next_line_nr + 1
370+
next_line_nr = this_line_nr
363371
return rules
364372

365373

366374
def tokenize(string):
367375
"""
368376
This function tokenizes a string respecting quotes. It needs to be fed a complete string where all quotes are
369377
properly closed (there needs to be an even amount of `"`) otherwise it raises an exception.
370-
You can, for example use this to tokenize a full line of a pg_hba-file (make sure to handle any escaped newlines or
371-
comments before) or a string of options.
378+
You can, for example use this to tokenize a full line of a pg_hba-file (make sure to handle any escaped newlines)
379+
or a string of options.
372380
:param string: A string to tokenize
373381
:return: The tokenized string as a list of strings
374382
"""
375383

376384
# We need to do this charade for splitting to be compatible with Python 3.6 which has been EOL for three years
377385
# at the time of writing. If you come across this after support for Python 3.6 has been dropped, please replace
378-
# WHITESPACE_OR_QUOTE_RE in the beginning of the file with `TOKEN_SPLIT_RE = re.compile(r'(?<=[\s"])')`
386+
# WHITESPACE_OR_QUOTE_RE in the beginning of the file with `TOKEN_SPLIT_RE = re.compile(r'(?<=[\s"#])')`
379387
# and the next 8 lines (including bare_tokens.append) with `bare_tokens = TOKEN_SPLIT_RE.split(string)`
380388
bare_tokens = []
381389
lastpos = 0
382-
nextmatch = WHITESPACE_OR_QUOTE_RE.search(string)
390+
nextmatch = WHITESPACE_QUOTE_OR_COMMENT_RE.search(string)
383391
while nextmatch:
384392
bare_tokens.append(string[lastpos:nextmatch.end()])
385393
lastpos = nextmatch.end()
386-
nextmatch = WHITESPACE_OR_QUOTE_RE.search(string, lastpos)
394+
nextmatch = WHITESPACE_QUOTE_OR_COMMENT_RE.search(string, lastpos)
387395
bare_tokens.append(string[lastpos:])
388396

389397
tokens = []
@@ -396,7 +404,7 @@ def tokenize(string):
396404
if state == "QUOTE_END":
397405
state = "START"
398406
# if the token consists of only spaces, we know for sure this symbol is finished
399-
if token == "" or ONLY_SPACES_RE.match(token):
407+
if token == "" or not token.strip():
400408
tokens.append(current_symbol.strip())
401409
current_symbol = ""
402410
continue
@@ -408,25 +416,37 @@ def tokenize(string):
408416
# we either start a new symbol or continue after a finished quote
409417
if state == "START":
410418
# outside of quotes, whitespaces are ignored
411-
if ONLY_SPACES_RE.match(token):
419+
if not token.strip():
412420
continue
413421

414422
current_symbol += token
415423
# we use endswith here, to correctly handle strings like 'somekey="somevalue"'
416424
# if there was a space before it, the quote will be alone, so that is not an issue
417425
if token.endswith("\""):
418426
state = "QUOTE"
427+
elif token.endswith("#"):
428+
# handle edge-case of a comment having no space before the #-symbol like "... md5#some comment"
429+
if not token.startswith("#"):
430+
current_symbol = current_symbol[:-1]
431+
tokens.append(current_symbol.strip())
432+
current_symbol = "#"
433+
state = "COMMENT"
419434
else:
420435
tokens.append(current_symbol.strip())
421436
current_symbol = ""
422437

438+
elif state == "COMMENT":
439+
current_symbol += token
440+
423441
# if we are inside a quoted string we consume and append tokens until the quoted string ends
424442
elif state == "QUOTE":
425443
current_symbol += token
426444
if token.endswith("\""):
427445
state = "QUOTE_END"
428446

429-
if state != "START":
447+
if state == "COMMENT":
448+
tokens.append(current_symbol)
449+
elif state == "QUOTE":
430450
raise TokenizerException("Unterminated quote")
431451

432452
return tokens
@@ -486,14 +506,18 @@ def read(self):
486506
if line["tokens"] == "COMMENT":
487507
self.comment.append(line["comment"])
488508
elif line["tokens"] != "EMPTY":
489-
if not line["comment"]:
490-
self._from_tokens(line["tokens"])
491-
else:
492-
if self.keep_comments_at_rules:
493-
self._from_tokens(line["tokens"], line["comment"])
494-
else:
495-
self.comment.append(line["comment"])
509+
try:
510+
if not line["comment"]:
496511
self._from_tokens(line["tokens"])
512+
else:
513+
if self.keep_comments_at_rules:
514+
self._from_tokens(line["tokens"], line["comment"])
515+
else:
516+
self.comment.append(line["comment"])
517+
self._from_tokens(line["tokens"])
518+
except PgHbaError as e:
519+
raise e.__class__("Error in line {0}: {1}".format(line["line_nr"], e.args[0]))
520+
497521
self.unchanged()
498522
self.preexisting_rules = dict(self.rules)
499523

@@ -1048,7 +1072,7 @@ def main():
10481072
ret = {'msgs': []}
10491073
try:
10501074
pg_hba = PgHba(dest, backup=backup, create=create, keep_comments_at_rules=keep_comments_at_rules)
1051-
except PgHbaError as error:
1075+
except (PgHbaError, TokenizerException) as error:
10521076
module.fail_json(msg='Error reading file:\n{0}'.format(error))
10531077

10541078
if overwrite:

tests/integration/targets/postgresql_pg_hba/tasks/postgresql_pg_hba_initial.yml

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@
8181
- { address: "", contype: "local", method: "ldap", options: "ldapserver=example.com ldapport=389 ldapprefix=\"cn=\"" }
8282
- { address: "red", contype: "hostssl", method: "cert", options: "clientcert=1 map=mymap" }
8383
- { address: "blue", contype: "hostssl", method: "cert", options: "clientcert=1 map=mymap" }
84+
- { address: "green", contype: "hostssl", method: "ldap", options: "ldapserver=example.com ldapport=389 ldapprefix=\"cn=\" ldapbindpasswd=\"#BROKEN\"" }
8485
register: pg_hba_options
8586

8687
- name: read pg_hba rules
@@ -154,6 +155,7 @@
154155
{ "db": "all", "method": "md5", "type": "local", "usr": "all" },
155156
{ "db": "all", "method": "md5", "src": "2001:db8::1/128", "type": "hostgssenc", "usr": "postgres" },
156157
{ "db": "all", "method": "cert", "src": "blue", "type": "hostssl", "usr": "+some", "options": "clientcert=1 map=mymap" },
158+
{ "db": "all", "method": "ldap", "src": "green", "type": "hostssl", "usr": "+some", "options": "ldapserver=example.com ldapport=389 ldapprefix=\"cn=\" ldapbindpasswd=\"#BROKEN\"" },
157159
{ "db": "all", "method": "cert", "src": "red", "type": "hostssl", "usr": "+some", "options": "clientcert=1 map=mymap" },
158160
{ "db": "all", "method": "md5", "src": "127.0.0.1/32", "type": "host", "usr": "all" },
159161
{ "db": "all", "method": "md5", "src": "2001:db8::1/128", "type": "hostnogssenc", "usr": "all" },
@@ -269,3 +271,93 @@
269271
state: present
270272
contype: host
271273
create: true
274+
275+
- name: create file that contains edge-cases
276+
copy:
277+
dest: /tmp/edgecase_test_pg_hba.conf
278+
content: |
279+
# full line comment
280+
local all all ident # comment
281+
282+
hostssl all all 192.168.0.0/24 md5
283+
hostssl all all \
284+
10.10.0.0/16 md5
285+
hostssl all all 10.11.0.0/16 md5#comment
286+
hostssl all all 10.12.0.0/16 ldap ldapserver=example.com ldapport=389 ldapprefix="cn=" ldapbindpasswd="#BROKEN"
287+
hostssl all all 10.13.0.0/16 radius radiusservers="server1,server2" radiussecrets="""secret one"",""secret two"""
288+
289+
- name: check that there are no errors while parsing the file
290+
postgresql_pg_hba:
291+
dest: /tmp/edgecase_test_pg_hba.conf
292+
register: pg_hba
293+
294+
- debug:
295+
var: pg_hba.pg_hba
296+
297+
- assert:
298+
that:
299+
- 'pg_hba.pg_hba == [
300+
{"db": "all", "method": "ident", "type": "local", "usr": "all"},
301+
{"db": "all", "method": "md5", "src": "192.168.0.0/24", "type": "hostssl", "usr": "all"},
302+
{"db": "all", "method": "md5", "src": "10.10.0.0/16", "type": "hostssl", "usr": "all"},
303+
{"db": "all", "method": "md5", "src": "10.11.0.0/16", "type": "hostssl", "usr": "all"},
304+
{"db": "all", "method": "ldap", "options": "ldapserver=example.com ldapport=389 ldapprefix=\"cn=\" ldapbindpasswd=\"#BROKEN\"", "src": "10.12.0.0/16", "type": "hostssl", "usr": "all"},
305+
{"db": "all", "method": "radius", "options": "radiusservers=\"server1,server2\" radiussecrets=\"\"\"secret one\"\",\"\"secret two\"\"\"", "src": "10.13.0.0/16", "type": "hostssl", "usr": "all"}
306+
]'
307+
308+
- name: create faulty file
309+
copy:
310+
dest: /tmp/faulty_test_pg_hba.conf
311+
content: |-
312+
local all all ident # comment
313+
hostssl all all \
314+
315+
- name: check that parsing failed (invalid continuation)
316+
postgresql_pg_hba:
317+
dest: /tmp/faulty_test_pg_hba.conf
318+
register: faulty_pg_hba
319+
ignore_errors: true
320+
321+
- debug:
322+
var: faulty_pg_hba
323+
- assert:
324+
that:
325+
- faulty_pg_hba is failed
326+
- faulty_pg_hba.msg is search("The last line ended with a '\\\\' \(line continuation\)\.")
327+
328+
- name: create faulty file
329+
copy:
330+
dest: /tmp/faulty_test_pg_hba.conf
331+
content: |
332+
local all all ident # comment
333+
hostssl all all 192.168.0.0/24 ldap ldapserver=example.com ldapport=389 ldapprefix="cn=" ldapbindpasswd="#BROKEN
334+
335+
- name: check that parsing failed (unterminated quote)
336+
postgresql_pg_hba:
337+
dest: /tmp/faulty_test_pg_hba.conf
338+
register: faulty_pg_hba
339+
ignore_errors: true
340+
341+
- assert:
342+
that:
343+
- faulty_pg_hba is failed
344+
- faulty_pg_hba.msg is search("Error in line 2")
345+
346+
- name: create faulty file
347+
copy:
348+
dest: /tmp/faulty_test_pg_hba.conf
349+
content: |-
350+
local all all ident # comment
351+
hostssl all all
352+
hostssl all all 192.168.0.0/24 md5
353+
354+
- name: check that parsing failed (too few symbols)
355+
postgresql_pg_hba:
356+
dest: /tmp/faulty_test_pg_hba.conf
357+
register: faulty_pg_hba
358+
ignore_errors: true
359+
360+
- assert:
361+
that:
362+
- faulty_pg_hba is failed
363+
- faulty_pg_hba.msg is search("Error in line 2")

0 commit comments

Comments
 (0)