153153author:
154154- Sebastiaan Mannem (@sebasmannem)
155155- Felix Hamme (@betanummeric)
156+ - Thomas Ziegler (@toydarian)
156157'''
157158
158159EXAMPLES = '''
273274PG_HBA_HDR = ['type' , 'db' , 'usr' , 'src' , 'mask' , 'method' , 'options' ]
274275
275276WHITESPACES_RE = re .compile (r'\s+' )
276- TOKEN_SPLIT_RE = re .compile (r'(?<=[\s"])' )
277- WHITESPACE_OR_QUOTE_RE = re .compile (r'[\s"]' )
277+ TOKEN_SPLIT_RE = re .compile (r'(?<=[\s"# ])' )
278+ WHITESPACE_QUOTE_OR_COMMENT_RE = re .compile (r'[\s"# ]' )
278279ONLY_SPACES_RE = re .compile (r"^\s+$" )
279280OPTION_RE = re .compile (r"([^=]+)=(.+)" )
280281IPV4_ADDR_RE = re .compile (r'^"?((\d{1,3}\.){3}\d{1,3})(/(\d{1,2}))?"?$' )
@@ -328,12 +329,15 @@ def parse_hba_file(input_string):
328329 rules = []
329330 line_iter = iter (input_string .split ("\n " ))
330331 line = next (line_iter , None )
332+ this_line_nr = 1
333+ next_line_nr = 1
331334 while line is not None :
332335 # if that line continues, we just glue the next line onto the end until it ends
333336 # we can and have to do that, as continuation even applies within comments and quoted strings [sic]
334337 # https://www.postgresql.org/docs/current/auth-pg-hba-conf.html#AUTH-PG-HBA-CONF
335338 comment = None
336339 while line .endswith ("\\ " ):
340+ next_line_nr += 1
337341 cont_line = next (line_iter , None )
338342 if cont_line is None :
339343 # we got a line continuation, but there was no more line
@@ -348,42 +352,46 @@ def parse_hba_file(input_string):
348352 parsed_line = "EMPTY"
349353 # handle "normal" lines
350354 else :
351- # handle lines with comments
352- sanitized_line = line
353- if line .find ('#' ) >= 0 :
354- comment = sanitized_line [sanitized_line .index ("#" ):]
355- sanitized_line = sanitized_line [0 :sanitized_line .index ("#" )]
356355 # remove continuation tokens
357- sanitized_line = sanitized_line .replace ("\\ \n " , "" )
358- tokens = tokenize (sanitized_line )
356+ sanitized_line = line .replace ("\\ \n " , "" )
357+ try :
358+ tokens = tokenize (sanitized_line )
359+ except TokenizerException as e :
360+ raise TokenizerException ("Error in line {0}: {1}" .format (this_line_nr , e .args [0 ]))
359361 parsed_line = tokens
362+ # a comment would always be the last token
363+ if parsed_line [- 1 ].startswith ("#" ):
364+ comment = parsed_line [- 1 ]
365+ parsed_line = parsed_line [:- 1 ]
360366 # create Rule
361- rules .append ({"tokens" : parsed_line , "line" : line , "comment" : comment })
367+ rules .append ({"tokens" : parsed_line , "line" : line , "comment" : comment , "line_nr" : this_line_nr })
362368 line = next (line_iter , None )
369+ this_line_nr = next_line_nr + 1
370+ next_line_nr = this_line_nr
363371 return rules
364372
365373
366374def tokenize (string ):
367375 """
368376 This function tokenizes a string respecting quotes. It needs to be fed a complete string where all quotes are
369377 properly closed (there needs to be an even amount of `"`) otherwise it raises an exception.
370- You can, for example use this to tokenize a full line of a pg_hba-file (make sure to handle any escaped newlines or
371- comments before) or a string of options.
378+ You can, for example use this to tokenize a full line of a pg_hba-file (make sure to handle any escaped newlines)
379+ or a string of options.
372380 :param string: A string to tokenize
373381 :return: The tokenized string as a list of strings
374382 """
375383
376384 # We need to do this charade for splitting to be compatible with Python 3.6 which has been EOL for three years
377385 # at the time of writing. If you come across this after support for Python 3.6 has been dropped, please replace
378- # WHITESPACE_OR_QUOTE_RE in the beginning of the file with `TOKEN_SPLIT_RE = re.compile(r'(?<=[\s"])')`
386+ # WHITESPACE_OR_QUOTE_RE in the beginning of the file with `TOKEN_SPLIT_RE = re.compile(r'(?<=[\s"# ])')`
379387 # and the next 8 lines (including bare_tokens.append) with `bare_tokens = TOKEN_SPLIT_RE.split(string)`
380388 bare_tokens = []
381389 lastpos = 0
382- nextmatch = WHITESPACE_OR_QUOTE_RE .search (string )
390+ nextmatch = WHITESPACE_QUOTE_OR_COMMENT_RE .search (string )
383391 while nextmatch :
384392 bare_tokens .append (string [lastpos :nextmatch .end ()])
385393 lastpos = nextmatch .end ()
386- nextmatch = WHITESPACE_OR_QUOTE_RE .search (string , lastpos )
394+ nextmatch = WHITESPACE_QUOTE_OR_COMMENT_RE .search (string , lastpos )
387395 bare_tokens .append (string [lastpos :])
388396
389397 tokens = []
@@ -396,7 +404,7 @@ def tokenize(string):
396404 if state == "QUOTE_END" :
397405 state = "START"
398406 # if the token consists of only spaces, we know for sure this symbol is finished
399- if token == "" or ONLY_SPACES_RE . match ( token ):
407+ if token == "" or not token . strip ( ):
400408 tokens .append (current_symbol .strip ())
401409 current_symbol = ""
402410 continue
@@ -408,25 +416,37 @@ def tokenize(string):
408416 # we either start a new symbol or continue after a finished quote
409417 if state == "START" :
410418 # outside of quotes, whitespaces are ignored
411- if ONLY_SPACES_RE . match ( token ):
419+ if not token . strip ( ):
412420 continue
413421
414422 current_symbol += token
415423 # we use endswith here, to correctly handle strings like 'somekey="somevalue"'
416424 # if there was a space before it, the quote will be alone, so that is not an issue
417425 if token .endswith ("\" " ):
418426 state = "QUOTE"
427+ elif token .endswith ("#" ):
428+ # handle edge-case of a comment having no space before the #-symbol like "... md5#some comment"
429+ if not token .startswith ("#" ):
430+ current_symbol = current_symbol [:- 1 ]
431+ tokens .append (current_symbol .strip ())
432+ current_symbol = "#"
433+ state = "COMMENT"
419434 else :
420435 tokens .append (current_symbol .strip ())
421436 current_symbol = ""
422437
438+ elif state == "COMMENT" :
439+ current_symbol += token
440+
423441 # if we are inside a quoted string we consume and append tokens until the quoted string ends
424442 elif state == "QUOTE" :
425443 current_symbol += token
426444 if token .endswith ("\" " ):
427445 state = "QUOTE_END"
428446
429- if state != "START" :
447+ if state == "COMMENT" :
448+ tokens .append (current_symbol )
449+ elif state == "QUOTE" :
430450 raise TokenizerException ("Unterminated quote" )
431451
432452 return tokens
@@ -486,14 +506,18 @@ def read(self):
486506 if line ["tokens" ] == "COMMENT" :
487507 self .comment .append (line ["comment" ])
488508 elif line ["tokens" ] != "EMPTY" :
489- if not line ["comment" ]:
490- self ._from_tokens (line ["tokens" ])
491- else :
492- if self .keep_comments_at_rules :
493- self ._from_tokens (line ["tokens" ], line ["comment" ])
494- else :
495- self .comment .append (line ["comment" ])
509+ try :
510+ if not line ["comment" ]:
496511 self ._from_tokens (line ["tokens" ])
512+ else :
513+ if self .keep_comments_at_rules :
514+ self ._from_tokens (line ["tokens" ], line ["comment" ])
515+ else :
516+ self .comment .append (line ["comment" ])
517+ self ._from_tokens (line ["tokens" ])
518+ except PgHbaError as e :
519+ raise e .__class__ ("Error in line {0}: {1}" .format (line ["line_nr" ], e .args [0 ]))
520+
497521 self .unchanged ()
498522 self .preexisting_rules = dict (self .rules )
499523
@@ -1048,7 +1072,7 @@ def main():
10481072 ret = {'msgs' : []}
10491073 try :
10501074 pg_hba = PgHba (dest , backup = backup , create = create , keep_comments_at_rules = keep_comments_at_rules )
1051- except PgHbaError as error :
1075+ except ( PgHbaError , TokenizerException ) as error :
10521076 module .fail_json (msg = 'Error reading file:\n {0}' .format (error ))
10531077
10541078 if overwrite :
0 commit comments