From 50dd41bdf1da566b9898fec7e666909caee7bd1d Mon Sep 17 00:00:00 2001 From: Oleksandr Volha Date: Fri, 10 Nov 2023 15:43:49 +0200 Subject: [PATCH] splunk fixes --- .../app/converter/backends/splunk/parsers/splunk.py | 4 ++-- siem-converter/app/converter/backends/splunk/tokenizer.py | 8 ++++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/siem-converter/app/converter/backends/splunk/parsers/splunk.py b/siem-converter/app/converter/backends/splunk/parsers/splunk.py index 43c03033..f1cd16ad 100644 --- a/siem-converter/app/converter/backends/splunk/parsers/splunk.py +++ b/siem-converter/app/converter/backends/splunk/parsers/splunk.py @@ -45,8 +45,8 @@ def _parse_log_sources(self, query: str) -> Tuple[Dict[str, List[str]], str]: log_sources.setdefault(source_type, []) pattern = self.log_source_pattern.replace('___source_type___', source_type) while search := re.search(pattern, query, flags=re.IGNORECASE): - results = search.groupdict() - value = results.get("value") + group_dict = search.groupdict() + value = group_dict.get("d_q_value") or group_dict.get("value") log_sources.setdefault(source_type, []).append(value) pos_start = search.start() pos_end = search.end() diff --git a/siem-converter/app/converter/backends/splunk/tokenizer.py b/siem-converter/app/converter/backends/splunk/tokenizer.py index 6d02e449..cfbe66ee 100644 --- a/siem-converter/app/converter/backends/splunk/tokenizer.py +++ b/siem-converter/app/converter/backends/splunk/tokenizer.py @@ -29,8 +29,9 @@ class SplunkTokenizer(QueryTokenizer): num_value_pattern = r"(?P\d+(?:\.\d+)*)\s*" double_quotes_value_pattern = r'"(?P(?:[:a-zA-Z\*0-9=+%#\-_/,\'\.$&^@!\(\)\{\}\s]|\\\"|\\)*)"\s*' single_quotes_value_pattern = r"'(?P(?:[:a-zA-Z\*0-9=+%#\-_/,\"\.$&^@!\(\)\{\}\s]|\\\'|\\)*)'\s*" - _value_pattern = fr"{num_value_pattern}|{double_quotes_value_pattern}|{single_quotes_value_pattern}" - multi_value_pattern = r"""\((?P[:a-zA-Z\"\*0-9=+%#\-_\/\\'\,.&^@!\(\s]*)\)""" + no_quotes_value = r"(?P(?:[:a-zA-Z\*0-9=+%#\-_/,\.\\$&^@!])+)\s*" + _value_pattern = fr"{num_value_pattern}|{no_quotes_value}|{double_quotes_value_pattern}|{single_quotes_value_pattern}" + multi_value_pattern = r"""\((?P[:a-zA-Z\"\*0-9=+%#\-_\/\\'\,.&^@!\(\s]+)\)""" keyword_pattern = double_quotes_value_pattern multi_value_operators = ("in",) @@ -40,6 +41,9 @@ def get_operator_and_value(self, match: re.Match, operator: str = OperatorType.E if num_value := get_match_group(match, group_name='num_value'): return operator, num_value + elif no_q_value := get_match_group(match, group_name='no_q_value'): + return operator, no_q_value + elif d_q_value := get_match_group(match, group_name='d_q_value'): return operator, d_q_value