From 45f81a7b2f59c6c57b33f2b40f408fbce00e27ca Mon Sep 17 00:00:00 2001 From: "nazar.gesyk" Date: Fri, 5 Jan 2024 14:47:09 +0200 Subject: [PATCH 1/3] Added escaping logic support --- .../translator/core/custom_types/values.py | 14 +++++++ .../app/translator/core/escape_manager.py | 24 +++++++++++ .../translator/core/models/escape_details.py | 7 ++++ translator/app/translator/core/render.py | 7 ++++ translator/app/translator/core/tokenizer.py | 5 ++- .../translator/platforms/athena/tokenizer.py | 15 +++---- .../platforms/base/lucene/escape_manager.py | 12 ++++++ .../platforms/base/lucene/renders/lucene.py | 23 +++++------ .../platforms/base/lucene/tokenizer.py | 32 ++++++++------- .../platforms/base/spl/escape_manager.py | 12 ++++++ .../platforms/base/spl/renders/spl.py | 22 +++++----- .../platforms/base/spl/tokenizer.py | 26 +++++++----- .../platforms/chronicle/escape_manager.py | 13 ++++++ .../platforms/chronicle/renders/chronicle.py | 41 ++++++++++++++----- .../chronicle/renders/chronicle_rule.py | 20 ++++----- .../platforms/chronicle/tokenizer.py | 33 ++++++++------- .../elasticsearch/renders/elasticsearch.py | 9 ---- .../platforms/logscale/escape_manager.py | 12 ++++++ .../platforms/logscale/renders/logscale.py | 11 +---- .../platforms/logscale/tokenizer.py | 16 ++++---- .../platforms/microsoft/escape_manager.py | 12 ++++++ .../microsoft/renders/microsoft_sentinel.py | 14 ++++--- .../platforms/microsoft/tokenizer.py | 30 +++++++++----- .../opensearch/renders/opensearch.py | 20 ++++----- .../platforms/qradar/escape_manager.py | 12 ++++++ .../platforms/qradar/renders/qradar.py | 39 ++++++++++++------ .../translator/platforms/qradar/tokenizer.py | 18 ++++---- 27 files changed, 338 insertions(+), 161 deletions(-) create mode 100644 translator/app/translator/core/custom_types/values.py create mode 100644 translator/app/translator/core/escape_manager.py create mode 100644 translator/app/translator/core/models/escape_details.py create mode 100644 translator/app/translator/platforms/base/lucene/escape_manager.py create mode 100644 translator/app/translator/platforms/base/spl/escape_manager.py create mode 100644 translator/app/translator/platforms/chronicle/escape_manager.py create mode 100644 translator/app/translator/platforms/logscale/escape_manager.py create mode 100644 translator/app/translator/platforms/microsoft/escape_manager.py create mode 100644 translator/app/translator/platforms/qradar/escape_manager.py diff --git a/translator/app/translator/core/custom_types/values.py b/translator/app/translator/core/custom_types/values.py new file mode 100644 index 00000000..786da50a --- /dev/null +++ b/translator/app/translator/core/custom_types/values.py @@ -0,0 +1,14 @@ +from app.translator.tools.custom_enum import CustomEnum + + +class ValueType(CustomEnum): + value = "value" + number_value = "num_value" + double_quotes_value = "d_q_value" + single_quotes_value = "s_q_value" + back_quotes_value = "b_q_value" + no_quotes_value = "no_q_value" + bool_value = "bool_value" + regular_expression_value = "re_value" + greater_than_or_equal = "gte_value" + less_than_or_equal = "lte_value" \ No newline at end of file diff --git a/translator/app/translator/core/escape_manager.py b/translator/app/translator/core/escape_manager.py new file mode 100644 index 00000000..da7f4faa --- /dev/null +++ b/translator/app/translator/core/escape_manager.py @@ -0,0 +1,24 @@ +import re +from abc import ABC +from typing import Union + +from app.translator.core.custom_types.values import ValueType +from app.translator.core.models.escape_details import EscapeDetails + + +class EscapeManager(ABC): + escape_map: dict[str, EscapeDetails] = {} + + def escape(self, value: Union[str, int], value_type: str = ValueType.value) -> Union[str, int]: + if isinstance(value, int): + return value + if escape_details := self.escape_map.get(value_type): + symbols_pattern = re.compile(escape_details.pattern) + value = symbols_pattern.sub(escape_details.escape_symbols, value) + return value + + def remove_escape(self, value: Union[str, int]) -> Union[str, int]: + if isinstance(value, int): + return value + value = value.encode().decode("unicode_escape") + return value diff --git a/translator/app/translator/core/models/escape_details.py b/translator/app/translator/core/models/escape_details.py new file mode 100644 index 00000000..79f6d71a --- /dev/null +++ b/translator/app/translator/core/models/escape_details.py @@ -0,0 +1,7 @@ +from dataclasses import dataclass + + +@dataclass +class EscapeDetails: + pattern: str = None + escape_symbols: str = "\\\\\g<1>" diff --git a/translator/app/translator/core/render.py b/translator/app/translator/core/render.py index d8f7d0a7..f88e9952 100644 --- a/translator/app/translator/core/render.py +++ b/translator/app/translator/core/render.py @@ -21,6 +21,8 @@ from typing import Union, List, Dict from app.translator.const import DEFAULT_VALUE_TYPE +from app.translator.core.custom_types.values import ValueType +from app.translator.core.escape_manager import EscapeManager from app.translator.core.exceptions.core import NotImplementedException, StrictPlatformException from app.translator.core.exceptions.parser import UnsupportedOperatorException from app.translator.core.functions import PlatformFunctions @@ -34,6 +36,7 @@ class BaseQueryFieldValue(ABC): details: PlatformDetails = None + escape_manager: EscapeManager = None def __init__(self, or_token): self.field_value = { @@ -84,6 +87,10 @@ def regex_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: def keywords(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: raise NotImplementedException + def apply_value(self, value: Union[str, int], case_type: str = ValueType.value) -> Union[str, int]: + updated_value = self.escape_manager.escape(value, case_type) + return updated_value + def apply_field_value(self, field, operator, value): if modifier_function := self.field_value.get(operator.token_type): return modifier_function(field, value) diff --git a/translator/app/translator/core/tokenizer.py b/translator/app/translator/core/tokenizer.py index 540f4847..8f7ab6a2 100644 --- a/translator/app/translator/core/tokenizer.py +++ b/translator/app/translator/core/tokenizer.py @@ -20,6 +20,8 @@ import re from typing import Tuple, Union, List, Any, Optional, Type +from app.translator.core.custom_types.values import ValueType +from app.translator.core.escape_manager import EscapeManager from app.translator.core.exceptions.parser import ( UnsupportedOperatorException, TokenizerGeneralException, @@ -60,6 +62,7 @@ class QueryTokenizer(BaseTokenizer): multi_value_delimiter = "," wildcard_symbol = None + escape_manager: EscapeManager = None def __init_subclass__(cls, **kwargs): cls._validate_re_patterns() @@ -100,7 +103,7 @@ def search_operator(self, query, field_name) -> str: return operator def get_operator_and_value(self, match: re.Match, operator: str = OperatorType.EQ) -> Tuple[str, Any]: - return operator, get_match_group(match, group_name='value') + return operator, get_match_group(match, group_name=ValueType.value) @staticmethod def clean_multi_value(value: Union[int, str]) -> Union[int, str]: diff --git a/translator/app/translator/platforms/athena/tokenizer.py b/translator/app/translator/platforms/athena/tokenizer.py index 09230f94..0e67349b 100644 --- a/translator/app/translator/platforms/athena/tokenizer.py +++ b/translator/app/translator/platforms/athena/tokenizer.py @@ -19,6 +19,7 @@ import re from typing import Tuple, Any +from app.translator.core.custom_types.values import ValueType from app.translator.core.models.identifier import Identifier from app.translator.core.tokenizer import QueryTokenizer from app.translator.core.custom_types.tokens import OperatorType @@ -41,11 +42,11 @@ class AthenaTokenizer(QueryTokenizer): } field_pattern = r'(?P"[a-zA-Z\._\-\s]+"|[a-zA-Z\._\-]+)' - num_value_pattern = r"(?P\d+(?:\.\d+)*)\s*" - bool_value_pattern = r"(?Ptrue|false)\s*" - single_quotes_value_pattern = r"""'(?P(?:[:a-zA-Z\*0-9=+%#\-\/\\,_".$&^@!\(\)\{\}\s]|'')*)'""" + num_value_pattern = fr"(?P<{ValueType.number_value}>\d+(?:\.\d+)*)\s*" + bool_value_pattern = fr"(?P<{ValueType.bool_value}>true|false)\s*" + single_quotes_value_pattern = fr"""'(?P<{ValueType.single_quotes_value}>(?:[:a-zA-Z\*0-9=+%#\-\/\\,_".$&^@!\(\)\{{\}}\s]|'')*)'""" _value_pattern = fr"{num_value_pattern}|{bool_value_pattern}|{single_quotes_value_pattern}" - multi_value_pattern = r"""\((?P\d+(?:,\s*\d+)*|'(?:[:a-zA-Z\*0-9=+%#\-\/\\,_".$&^@!\(\)\{\}\s]|'')*'(?:,\s*'(?:[:a-zA-Z\*0-9=+%#\-\/\\,_".$&^@!\(\)\{\}\s]|'')*')*)\)""" + multi_value_pattern = fr"""\((?P<{ValueType.value}>\d+(?:,\s*\d+)*|'(?:[:a-zA-Z\*0-9=+%#\-\/\\,_".$&^@!\(\)\{{\}}\s]|'')*'(?:,\s*'(?:[:a-zA-Z\*0-9=+%#\-\/\\,_".$&^@!\(\)\{{\}}\s]|'')*')*)\)""" wildcard_symbol = "%" @@ -54,13 +55,13 @@ def should_process_value_wildcard_symbols(operator: str) -> bool: return operator.lower() in ("like",) def get_operator_and_value(self, match: re.Match, operator: str = OperatorType.EQ) -> Tuple[str, Any]: - if (num_value := get_match_group(match, group_name='num_value')) is not None: + if (num_value := get_match_group(match, group_name=ValueType.number_value)) is not None: return operator, num_value - elif (bool_value := get_match_group(match, group_name='bool_value')) is not None: + elif (bool_value := get_match_group(match, group_name=ValueType.bool_value)) is not None: return operator, bool_value - elif (s_q_value := get_match_group(match, group_name='s_q_value')) is not None: + elif (s_q_value := get_match_group(match, group_name=ValueType.single_quotes_value)) is not None: return operator, s_q_value return super().get_operator_and_value(match, operator) diff --git a/translator/app/translator/platforms/base/lucene/escape_manager.py b/translator/app/translator/platforms/base/lucene/escape_manager.py new file mode 100644 index 00000000..06882e4c --- /dev/null +++ b/translator/app/translator/platforms/base/lucene/escape_manager.py @@ -0,0 +1,12 @@ +from app.translator.core.custom_types.values import ValueType +from app.translator.core.escape_manager import EscapeManager +from app.translator.core.models.escape_details import EscapeDetails + + +class LuceneEscapeManager(EscapeManager): + escape_map = { + ValueType.value: EscapeDetails(pattern=r'([_!@#$%^&*=+()\[\]{}|;:\'",.<>?/`~\-\s\\])', escape_symbols=r"\\\1") + } + + +lucene_escape_manager = LuceneEscapeManager() diff --git a/translator/app/translator/platforms/base/lucene/renders/lucene.py b/translator/app/translator/platforms/base/lucene/renders/lucene.py index ef81bf37..d3f016a2 100644 --- a/translator/app/translator/platforms/base/lucene/renders/lucene.py +++ b/translator/app/translator/platforms/base/lucene/renders/lucene.py @@ -21,16 +21,15 @@ from app.translator.const import DEFAULT_VALUE_TYPE from app.translator.core.render import BaseQueryRender from app.translator.core.render import BaseQueryFieldValue +from app.translator.platforms.base.lucene.escape_manager import lucene_escape_manager class LuceneFieldValue(BaseQueryFieldValue): - - def apply_value(self, value: Union[str, int]): - return value + escape_manager = lucene_escape_manager def equal_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: if isinstance(value, list): - values = self.or_token.join(self.apply_value(f'{v}') for v in value) + values = self.or_token.join(f'{self.apply_value(v)}' for v in value) return f"{field}:({values})" return f'{field}:{self.apply_value(value)}' @@ -48,29 +47,29 @@ def greater_or_equal_modifier(self, field: str, value: Union[int, str]) -> str: def not_equal_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: if isinstance(value, list): - values = self.or_token.join(self.apply_value(f'{v}') for v in value) + values = self.or_token.join(f'{self.apply_value(v)}' for v in value) return f"NOT ({field} = ({values})" return f'NOT ({field} = {self.apply_value(value)})' def contains_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: if isinstance(value, list): - values = self.or_token.join(self.apply_value(f'*{v}*') for v in value) + values = self.or_token.join(f'*{self.apply_value(v)}*' for v in value) return f"{field}:({values})" - prepared_value = self.apply_value(f"*{value}*") + prepared_value = f"*{self.apply_value(value)}*" return f'{field}:{prepared_value}' def endswith_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: if isinstance(value, list): - values = self.or_token.join(self.apply_value(f'*{v}') for v in value) + values = self.or_token.join(f'*{self.apply_value(v)}' for v in value) return f"{field}:({values})" - prepared_value = self.apply_value(f"*{value}") + prepared_value = f"*{self.apply_value(value)}" return f'{field}:{prepared_value}' def startswith_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: if isinstance(value, list): - values = self.or_token.join(self.apply_value(f'{v}*') for v in value) + values = self.or_token.join(f'{self.apply_value(v)}*' for v in value) return f"{field}:({values})" - prepared_value = self.apply_value(f"{value}*") + prepared_value = f"{self.apply_value(value)}*" return f'{field}:{prepared_value}' def regex_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: @@ -81,7 +80,7 @@ def regex_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: def keywords(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: if isinstance(value, list): return f"({self.or_token.join(self.keywords(field=field, value=v) for v in value)})" - return self.apply_value(f"*{value}*") + return f"*{self.apply_value(value)}*" class LuceneQueryRender(BaseQueryRender): diff --git a/translator/app/translator/platforms/base/lucene/tokenizer.py b/translator/app/translator/platforms/base/lucene/tokenizer.py index e2d08b37..85eb48a2 100644 --- a/translator/app/translator/platforms/base/lucene/tokenizer.py +++ b/translator/app/translator/platforms/base/lucene/tokenizer.py @@ -19,12 +19,14 @@ from typing import Tuple, Union, List, Any +from app.translator.core.custom_types.values import ValueType from app.translator.core.exceptions.parser import TokenizerGeneralException from app.translator.core.mixins.logic import ANDLogicOperatorMixin from app.translator.core.models.field import Keyword, Field from app.translator.core.models.identifier import Identifier from app.translator.core.tokenizer import QueryTokenizer from app.translator.core.custom_types.tokens import OperatorType +from app.translator.platforms.base.lucene.escape_manager import lucene_escape_manager from app.translator.tools.utils import get_match_group @@ -41,19 +43,21 @@ class LuceneTokenizer(QueryTokenizer, ANDLogicOperatorMixin): field_pattern = r"(?P[a-zA-Z\.\-_]+)" match_operator_pattern = r"(?:___field___\s*(?P:\[\*\sTO|:\[|:<|:>|:))\s*" _num_value_pattern = r"\d+(?:\.\d+)*" - num_value_pattern = fr"(?P{_num_value_pattern})\s*" - double_quotes_value_pattern = r'"(?P(?:[:a-zA-Z\*0-9=+%#\-_/,\'\.$&^@!\(\)\{\}\s]|\\\"|\\)*)"\s*' - no_quotes_value_pattern = r"(?P(?:[a-zA-Z\*0-9=%#_/,\'\.$@]|\\\"|\\\\)+)\s*" - re_value_pattern = r"/(?P[:a-zA-Z\*0-9=+%#\\\-_\,\"\'\.$&^@!\(\)\{\}\[\]\s?]+)/\s*" - gte_value_pattern = fr"\[\s*(?P{_num_value_pattern})\s+TO\s+\*\s*\]" - lte_value_pattern = fr"\[\s*\*\s+TO\s+(?P{_num_value_pattern})\s*\]" + num_value_pattern = fr"(?P<{ValueType.number_value}>{_num_value_pattern})\s*" + double_quotes_value_pattern = fr'"(?P<{ValueType.double_quotes_value}>(?:[:a-zA-Z\*0-9=+%#\-_/,\'\.$&^@!\(\)\{{\}}\s]|\\\"|\\)*)"\s*' + no_quotes_value_pattern = fr"(?P<{ValueType.no_quotes_value}>(?:[a-zA-Z\*0-9=%#_/,\'\.$@]|\\\"|\\\\)+)\s*" + re_value_pattern = fr"/(?P<{ValueType.regular_expression_value}>[:a-zA-Z\*0-9=+%#\\\-_\,\"\'\.$&^@!\(\)\{{\}}\[\]\s?]+)/\s*" + gte_value_pattern = fr"\[\s*(?P<{ValueType.greater_than_or_equal}>{_num_value_pattern})\s+TO\s+\*\s*\]" + lte_value_pattern = fr"\[\s*\*\s+TO\s+(?P<{ValueType.less_than_or_equal}>{_num_value_pattern})\s*\]" range_value_pattern = fr"{gte_value_pattern}|{lte_value_pattern}" _value_pattern = fr"{num_value_pattern}|{re_value_pattern}|{no_quotes_value_pattern}|{double_quotes_value_pattern}|{range_value_pattern}" - keyword_pattern = r"(?P(?:[a-zA-Z\*0-9=%#_/,\'\.$@]|\\\"|\\\(|\\\)|\\\[|\\\]|\\\{|\\\}|\\\:|\\)+)(?:\s+|\)|$)" + keyword_pattern = fr"(?P<{ValueType.no_quotes_value}>(?:[a-zA-Z\*0-9=%#_/,\'\.$@]|\\\"|\\\(|\\\)|\\\[|\\\]|\\\{{|\\\}}|\\\:|\\)+)(?:\s+|\)|$)" - multi_value_pattern = r"""\((?P[:a-zA-Z\"\*0-9=+%#\-_\/\\'\,.&^@!\(\[\]\s]+)\)""" + multi_value_pattern = fr"""\((?P<{ValueType.value}>[:a-zA-Z\"\*0-9=+%#\-_\/\\'\,.&^@!\(\[\]\s]+)\)""" multi_value_check_pattern = r"___field___\s*___operator___\s*\(" + escape_manager = lucene_escape_manager + wildcard_symbol = "*" @staticmethod @@ -69,22 +73,22 @@ def clean_quotes(value: Union[str, int]): return value def get_operator_and_value(self, match: re.Match, operator: str = OperatorType.EQ) -> Tuple[str, Any]: - if (num_value := get_match_group(match, group_name='num_value')) is not None: + if (num_value := get_match_group(match, group_name=ValueType.number_value)) is not None: return operator, num_value - elif (re_value := get_match_group(match, group_name='re_value')) is not None: + elif (re_value := get_match_group(match, group_name=ValueType.regular_expression_value)) is not None: return OperatorType.REGEX, re_value - elif (n_q_value := get_match_group(match, group_name='n_q_value')) is not None: + elif (n_q_value := get_match_group(match, group_name=ValueType.no_quotes_value)) is not None: return operator, n_q_value - elif (d_q_value := get_match_group(match, group_name='d_q_value')) is not None: + elif (d_q_value := get_match_group(match, group_name=ValueType.double_quotes_value)) is not None: return operator, d_q_value - elif (gte_value := get_match_group(match, group_name='gte_value')) is not None: + elif (gte_value := get_match_group(match, group_name=ValueType.greater_than_or_equal)) is not None: return OperatorType.GTE, gte_value - elif (lte_value := get_match_group(match, group_name='lte_value')) is not None: + elif (lte_value := get_match_group(match, group_name=ValueType.less_than_or_equal)) is not None: return OperatorType.LTE, lte_value return super().get_operator_and_value(match, operator) diff --git a/translator/app/translator/platforms/base/spl/escape_manager.py b/translator/app/translator/platforms/base/spl/escape_manager.py new file mode 100644 index 00000000..bd6f4136 --- /dev/null +++ b/translator/app/translator/platforms/base/spl/escape_manager.py @@ -0,0 +1,12 @@ +from app.translator.core.custom_types.values import ValueType +from app.translator.core.escape_manager import EscapeManager +from app.translator.core.models.escape_details import EscapeDetails + + +class SplEscapeManager(EscapeManager): + escape_map = { + ValueType.value: EscapeDetails(pattern='("|(? str: if isinstance(value, list): return f"({self.or_token.join([self.equal_modifier(field=field, value=v) for v in value])})" - return f'{field}="{value}"' + return f'{field}="{self.apply_value(value)}"' def less_modifier(self, field: str, value: Union[int, str]) -> str: - return f'{field}<"{value}"' + return f'{field}<"{self.apply_value(value)}"' def less_or_equal_modifier(self, field: str, value: Union[int, str]) -> str: - return f'{field}<="{value}"' + return f'{field}<="{self.apply_value(value)}"' def greater_modifier(self, field: str, value: Union[int, str]) -> str: - return f'{field}>"{value}"' + return f'{field}>"{self.apply_value(value)}"' def greater_or_equal_modifier(self, field: str, value: Union[int, str]) -> str: - return f'{field}>="{value}"' + return f'{field}>="{self.apply_value(value)}"' def not_equal_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: if isinstance(value, list): return f"({self.or_token.join([self.not_equal_modifier(field=field, value=v) for v in value])})" - return f'{field}!="{value}"' + return f'{field}!="{self.apply_value(value)}"' def contains_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: if isinstance(value, list): return f"({self.or_token.join([self.contains_modifier(field=field, value=v) for v in value])})" - return f'{field}="*{value}*"' + return f'{field}="*{self.apply_value(value)}*"' def endswith_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: if isinstance(value, list): return f"({self.or_token.join([self.endswith_modifier(field=field, value=v) for v in value])})" - return f'{field}="*{value}"' + return f'{field}="*{self.apply_value(value)}"' def startswith_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: if isinstance(value, list): return f"({self.or_token.join([self.startswith_modifier(field=field, value=v) for v in value])})" - return f'{field}="{value}*"' + return f'{field}="{self.apply_value(value)}*"' def keywords(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: if isinstance(value, list): return f"({self.or_token.join(self.keywords(field=field, value=v) for v in value)})" - return f'"{value}"' + return f'"{self.apply_value(value)}"' def regex_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: raise UnsupportedRenderMethod(platform_name=self.details.name, method="Regex Expression") diff --git a/translator/app/translator/platforms/base/spl/tokenizer.py b/translator/app/translator/platforms/base/spl/tokenizer.py index 7c0a295b..0ef3977b 100644 --- a/translator/app/translator/platforms/base/spl/tokenizer.py +++ b/translator/app/translator/platforms/base/spl/tokenizer.py @@ -19,11 +19,13 @@ import re from typing import Tuple, Any, List, Union +from app.translator.core.custom_types.values import ValueType from app.translator.core.mixins.logic import ANDLogicOperatorMixin from app.translator.core.models.field import Field, Keyword from app.translator.core.models.identifier import Identifier from app.translator.core.tokenizer import QueryTokenizer from app.translator.core.custom_types.tokens import OperatorType +from app.translator.platforms.base.spl.escape_manager import spl_escape_manager from app.translator.tools.utils import get_match_group @@ -39,28 +41,30 @@ class SplTokenizer(QueryTokenizer, ANDLogicOperatorMixin): multi_value_operators_map = {"in": OperatorType.EQ} field_pattern = r"(?P[a-zA-Z\.\-_\{\}]+)" - num_value_pattern = r"(?P\d+(?:\.\d+)*)(?=$|\s|\))" - double_quotes_value_pattern = r'"(?P(?:[:a-zA-Z\*0-9=+%#\-_/,;\'\.$&^@!\]\[\(\)\{\}\s]|\\\"|\\)*)"\s*' - single_quotes_value_pattern = r"'(?P(?:[:a-zA-Z\*0-9=+%#\-_/,;\"\.$&^@!\(\)\{\}\s]|\\\'|\\)*)'\s*" - no_quotes_value_pattern = r"(?P(?:[:a-zA-Z\*0-9+%#\-_/,\.$&^@!]|\\\s|\\=|\\!=|\\<|\\<=|\\>|\\>=|\\\\)+)(?=$|\s|\))" + num_value_pattern = fr"(?P<{ValueType.number_value}>\d+(?:\.\d+)*)(?=$|\s|\))" + double_quotes_value_pattern = fr'"(?P<{ValueType.double_quotes_value}>(?:[:a-zA-Z\*0-9=+%#\-_/,;\'\.<>$&^@!\]\[\(\)\{{\}}\s]|\\\"|\\)*)"\s*' + single_quotes_value_pattern = fr"'(?P<{ValueType.single_quotes_value}>(?:[:a-zA-Z\*0-9=+%#\-_/,;\"\.<>$&^@!\(\)\{{\}}\s]|\\\'|\\)*)'\s*" + no_quotes_value_pattern = fr"(?P<{ValueType.no_quotes_value}>(?:[:a-zA-Z\*0-9+%#\-_/,\.$&^@!]|\\\s|\\=|\\!=|\\<|\\<=|\\>|\\>=|\\\\)+)(?=$|\s|\))" _value_pattern = fr"{num_value_pattern}|{no_quotes_value_pattern}|{double_quotes_value_pattern}|{single_quotes_value_pattern}" - multi_value_pattern = r"""\((?P[:a-zA-Z\"\*0-9=+%#\-_\/\\'\,;.$&^@!\{\}\(\s]+)\)""" + multi_value_pattern = fr"""\((?P<{ValueType.value}>[:a-zA-Z\"\*0-9=+%#\-_\/\\'\,;.$&^@!\{{\}}\(\s]+)\)""" keyword_pattern = fr"{double_quotes_value_pattern}|{no_quotes_value_pattern}" wildcard_symbol = "*" + escape_manager = spl_escape_manager + def get_operator_and_value(self, match: re.Match, operator: str = OperatorType.EQ) -> Tuple[str, Any]: - if (num_value := get_match_group(match, group_name='num_value')) is not None: + if (num_value := get_match_group(match, group_name=ValueType.number_value)) is not None: return operator, num_value - elif (no_q_value := get_match_group(match, group_name='no_q_value')) is not None: + elif (no_q_value := get_match_group(match, group_name=ValueType.no_quotes_value)) is not None: return operator, no_q_value - elif (d_q_value := get_match_group(match, group_name='d_q_value')) is not None: - return operator, d_q_value + elif (d_q_value := get_match_group(match, group_name=ValueType.double_quotes_value)) is not None: + return operator, self.escape_manager.remove_escape(d_q_value) - elif (s_q_value := get_match_group(match, group_name='s_q_value')) is not None: - return operator, s_q_value + elif (s_q_value := get_match_group(match, group_name=ValueType.single_quotes_value)) is not None: + return operator, self.escape_manager.remove_escape(s_q_value) return super().get_operator_and_value(match) diff --git a/translator/app/translator/platforms/chronicle/escape_manager.py b/translator/app/translator/platforms/chronicle/escape_manager.py new file mode 100644 index 00000000..ed9f9263 --- /dev/null +++ b/translator/app/translator/platforms/chronicle/escape_manager.py @@ -0,0 +1,13 @@ +from app.translator.core.custom_types.values import ValueType +from app.translator.core.escape_manager import EscapeManager +from app.translator.core.models.escape_details import EscapeDetails + + +class ChronicleEscapeManager(EscapeManager): + escape_map = { + ValueType.value: EscapeDetails(pattern='([\\\\|"])'), + ValueType.regular_expression_value: EscapeDetails(pattern='([\\\\|/(")\\[\\]{}.^$+<>!?])') + } + + +chronicle_escape_manager = ChronicleEscapeManager() diff --git a/translator/app/translator/platforms/chronicle/renders/chronicle.py b/translator/app/translator/platforms/chronicle/renders/chronicle.py index 281134dc..9eaa08f0 100644 --- a/translator/app/translator/platforms/chronicle/renders/chronicle.py +++ b/translator/app/translator/platforms/chronicle/renders/chronicle.py @@ -19,9 +19,11 @@ from typing import List, Union from app.translator.const import DEFAULT_VALUE_TYPE +from app.translator.core.custom_types.values import ValueType from app.translator.core.mapping import SourceMapping from app.translator.core.models.functions.base import Function from app.translator.platforms.chronicle.const import chronicle_query_details +from app.translator.platforms.chronicle.escape_manager import chronicle_escape_manager from app.translator.platforms.chronicle.mapping import ChronicleMappings, chronicle_mappings from app.translator.core.exceptions.render import UnsupportedRenderMethod from app.translator.core.models.platform_details import PlatformDetails @@ -30,52 +32,71 @@ class ChronicleFieldValue(BaseQueryFieldValue): details: PlatformDetails = chronicle_query_details + escape_manager = chronicle_escape_manager @staticmethod def apply_field(field): return field + def apply_value(self, value: Union[str, int], value_type: str = ValueType.value) -> Union[str, int]: + if isinstance(value, str): + if "*" in value: + return self.apply_asterics_value(value) + value = self.clean_str_value(value) + return super().apply_value(value, value_type) + + def apply_asterics_value(self, value: str) -> str: + value = value.replace(r"\\*", "*") + updated_value = super().apply_value(value, ValueType.regular_expression_value) + updated_value = updated_value.replace("*", ".*") + return updated_value + + def clean_str_value(self, value: str) -> str: + if value.endswith("/"): + value = value.rstrip("/") + return value + def equal_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: if isinstance(value, list): return f"({self.or_token.join(self.equal_modifier(field=field, value=v) for v in value)})" - return f'{self.apply_field(field)} = "{value}" nocase' + return f'{self.apply_field(field)} = "{self.apply_value(value)}" nocase' def less_modifier(self, field: str, value: Union[int, str]) -> str: - return f'{self.apply_field(field)} < "{value}" nocase' + return f'{self.apply_field(field)} < "{self.apply_value(value)}" nocase' def less_or_equal_modifier(self, field: str, value: Union[int, str]) -> str: - return f'{self.apply_field(field)} <= "{value}" nocase' + return f'{self.apply_field(field)} <= "{self.apply_value(value)}" nocase' def greater_modifier(self, field: str, value: Union[int, str]) -> str: - return f'{self.apply_field(field)} > "{value}" nocase' + return f'{self.apply_field(field)} > "{self.apply_value(value)}" nocase' def greater_or_equal_modifier(self, field: str, value: Union[int, str]) -> str: - return f'{self.apply_field(field)} >= "{value}" nocase' + return f'{self.apply_field(field)} >= "{self.apply_value(value)}" nocase' def not_equal_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: if isinstance(value, list): return f"({self.or_token.join([self.not_equal_modifier(field=field, value=v) for v in value])})" - return f'{self.apply_field(field)} != "{value}" nocase' + return f'{self.apply_field(field)} != "{self.apply_value(value)}" nocase' def contains_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: if isinstance(value, list): return f"({self.or_token.join(self.contains_modifier(field=field, value=v) for v in value)})" - return f'{self.apply_field(field)} = /.*{value}.*/ nocase' + return f'{self.apply_field(field)} = /.*{self.apply_value(value)}.*/ nocase' def endswith_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: if isinstance(value, list): return f"({self.or_token.join(self.endswith_modifier(field=field, value=v) for v in value)})" - return f'{self.apply_field(field)} = /.*{value}$/ nocase' + return f'{self.apply_field(field)} = /.*{self.apply_value(value)}$/ nocase' def startswith_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: if isinstance(value, list): return f"({self.or_token.join(self.startswith_modifier(field=field, value=v) for v in value)})" - return f'{self.apply_field(field)} = /^{value}.*/ nocase' + return f'{self.apply_field(field)} = /^{self.apply_value(value)}.*/ nocase' def regex_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: if isinstance(value, list): return f"({self.or_token.join(self.regex_modifier(field=field, value=v) for v in value)})" - return f'{self.apply_field(field)} = /{value}/ nocase' + return f'{self.apply_field(field)} = /{self.apply_asterics_value(value)}/ nocase' def keywords(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: raise UnsupportedRenderMethod(platform_name=self.details.name, method="Keywords") diff --git a/translator/app/translator/platforms/chronicle/renders/chronicle_rule.py b/translator/app/translator/platforms/chronicle/renders/chronicle_rule.py index 06e912c7..8ed55fab 100644 --- a/translator/app/translator/platforms/chronicle/renders/chronicle_rule.py +++ b/translator/app/translator/platforms/chronicle/renders/chronicle_rule.py @@ -38,39 +38,39 @@ class ChronicleRuleFieldValue(ChronicleFieldValue): def equal_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: if isinstance(value, list): return f"({self.or_token.join(self.equal_modifier(field=field, value=v) for v in value)})" - return f'{self.apply_field(field)} = "{value}"' + return f'{self.apply_field(field)} = "{self.apply_value(value)}"' def less_modifier(self, field: str, value: Union[int, str]) -> str: - return f'{self.apply_field(field)} < "{value}"' + return f'{self.apply_field(field)} < "{self.apply_value(value)}"' def less_or_equal_modifier(self, field: str, value: Union[int, str]) -> str: - return f'{self.apply_field(field)} <= "{value}"' + return f'{self.apply_field(field)} <= "{self.apply_value(value)}"' def greater_modifier(self, field: str, value: Union[int, str]) -> str: - return f'{self.apply_field(field)} > "{value}"' + return f'{self.apply_field(field)} > "{self.apply_value(value)}"' def greater_or_equal_modifier(self, field: str, value: Union[int, str]) -> str: - return f'{self.apply_field(field)} >= "{value}"' + return f'{self.apply_field(field)} >= "{self.apply_value(value)}"' def not_equal_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: if isinstance(value, list): return f"({self.or_token.join([self.not_equal_modifier(field=field, value=v) for v in value])})" - return f'{self.apply_field(field)} != "{value}"' + return f'{self.apply_field(field)} != "{self.apply_value(value)}"' def contains_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: if isinstance(value, list): return f"({self.or_token.join(self.contains_modifier(field=field, value=v) for v in value)})" - return f're.regex({self.apply_field(field)}, `.*{value}.*`)' + return f're.regex({self.apply_field(field)}, `.*{self.apply_value(value)}.*`)' def endswith_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: if isinstance(value, list): return f"({self.or_token.join(self.endswith_modifier(field=field, value=v) for v in value)})" - return f're.regex({self.apply_field(field)}, `.*{value}`)' + return f're.regex({self.apply_field(field)}, `.*{self.apply_value(value)}`)' def startswith_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: if isinstance(value, list): return f"({self.or_token.join(self.startswith_modifier(field=field, value=v) for v in value)})" - return f're.regex({self.apply_field(field)}, `{value}.*`)' + return f're.regex({self.apply_field(field)}, `{self.apply_value(value)}.*`)' @staticmethod def apply_field(field): @@ -79,7 +79,7 @@ def apply_field(field): def regex_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: if isinstance(value, list): return f"({self.or_token.join(self.regex_modifier(field=field, value=v) for v in value)})" - return f're.regex({self.apply_field(field)}, `{value}`)' + return f're.regex({self.apply_field(field)}, `{self.apply_asterics_value(value)}`)' class ChronicleSecurityRuleRender(ChronicleQueryRender): diff --git a/translator/app/translator/platforms/chronicle/tokenizer.py b/translator/app/translator/platforms/chronicle/tokenizer.py index b633ba7c..dd64500a 100644 --- a/translator/app/translator/platforms/chronicle/tokenizer.py +++ b/translator/app/translator/platforms/chronicle/tokenizer.py @@ -19,9 +19,11 @@ import re from typing import Tuple, Any +from app.translator.core.custom_types.values import ValueType from app.translator.core.exceptions.parser import TokenizerGeneralException from app.translator.core.tokenizer import QueryTokenizer from app.translator.core.custom_types.tokens import OperatorType +from app.translator.platforms.chronicle.escape_manager import chronicle_escape_manager from app.translator.tools.utils import get_match_group @@ -36,25 +38,26 @@ class ChronicleQueryTokenizer(QueryTokenizer): } field_pattern = r"(?P[a-zA-Z0-9\._]+)" - num_value_pattern = r"(?P\d+(?:\.\d+)*)\s*" - bool_value_pattern = r"(?Ptrue|false)\s*" - double_quotes_value_pattern = r'"(?P(?:[:a-zA-Z\*0-9=+%#\-_/,\'\.$&^@!\(\)\{\}\s]|\\\"|\\\\)*)"\s*(?:nocase)?' - re_value_pattern = r"/(?P(?:\\\/|[:a-zA-Z\*0-9=+%#\\\-_\,\"\'\.$&^@!\(\)\{\}\s?])+)/\s*(?:nocase)?" + num_value_pattern = fr"(?P<{ValueType.number_value}>\d+(?:\.\d+)*)\s*" + bool_value_pattern = fr"(?P<{ValueType.bool_value}>true|false)\s*" + double_quotes_value_pattern = fr'"(?P<{ValueType.double_quotes_value}>(?:[:a-zA-Z\*0-9=+%#\-_/,\'\.$&^@!\(\)\{{\}}\s]|\\\"|\\\\)*)"\s*(?:nocase)?' + re_value_pattern = fr"/(?P<{ValueType.regular_expression_value}>(?:\\\/|[:a-zA-Z\*0-9=+%#\\\-_\,\"\'\.$&^@!\(\)\{{\}}\s?])+)/\s*(?:nocase)?" _value_pattern = fr"{num_value_pattern}|{bool_value_pattern}|{double_quotes_value_pattern}|{re_value_pattern}" + escape_manager = chronicle_escape_manager wildcard_symbol = ".*" def get_operator_and_value(self, match: re.Match, operator: str = OperatorType.EQ) -> Tuple[str, Any]: - if (num_value := get_match_group(match, group_name='num_value')) is not None: + if (num_value := get_match_group(match, group_name=ValueType.number_value)) is not None: return operator, num_value - elif (bool_value := get_match_group(match, group_name='bool_value')) is not None: + elif (bool_value := get_match_group(match, group_name=ValueType.bool_value)) is not None: return operator, bool_value - elif (d_q_value := get_match_group(match, group_name='d_q_value')) is not None: - return operator, d_q_value + elif (d_q_value := get_match_group(match, group_name=ValueType.double_quotes_value)) is not None: + return operator, self.escape_manager.remove_escape(d_q_value) - elif (re_value := get_match_group(match, group_name='re_value')) is not None: + elif (re_value := get_match_group(match, group_name=ValueType.regular_expression_value)) is not None: return OperatorType.REGEX, re_value return super().get_operator_and_value(match, operator) @@ -70,8 +73,8 @@ class ChronicleRuleTokenizer(ChronicleQueryTokenizer): field_pattern = r"(?P[$a-zA-Z0-9\._]+)" regex_field_regex = r"re\.regex\((?P[$a-zA-Z\._]+)," - double_quotes_value_pattern = r'"(?P(?:[:a-zA-Z\*0-9=+%#\-_/,\'\.$&^@!\(\)\{\}\s]|\\\"|\\\\)*)"' - back_quotes_value_pattern = r'`(?P(?:[:a-zA-Z\*0-9=+%#\-_/,\'\"\\\.$&^@!\(\)\{\}\s])*)`' + double_quotes_value_pattern = fr'"(?P<{ValueType.double_quotes_value}>(?:[:a-zA-Z\*0-9=+%#\-_/,\'\.$&^@!\(\)\{{\}}\s]|\\\"|\\\\)*)"' + back_quotes_value_pattern = fr'`(?P<{ValueType.back_quotes_value}>(?:[:a-zA-Z\*0-9=+%#\-_/,\'\"\\\.$&^@!\(\)\{{\}}\s])*)`' regex_value_regex = fr"{double_quotes_value_pattern}|{back_quotes_value_pattern}\s*\)\s*(?:nocase)?\s*" def search_field_value(self, query): @@ -102,10 +105,10 @@ def search_field_value(self, query): return super().search_field_value(query=query) def get_operator_and_value(self, match: re.Match, operator: str = OperatorType.EQ) -> Tuple[str, Any]: - if (d_q_value := get_match_group(match, group_name='d_q_value')) is not None: - return operator, d_q_value + if (d_q_value := get_match_group(match, group_name=ValueType.double_quotes_value)) is not None: + return operator, self.escape_manager.remove_escape(d_q_value) - elif (b_q_value := get_match_group(match, group_name='b_q_value')) is not None: - return operator, b_q_value + elif (b_q_value := get_match_group(match, group_name=ValueType.back_quotes_value)) is not None: + return operator, self.escape_manager.remove_escape(b_q_value) return super().get_operator_and_value(match, operator) diff --git a/translator/app/translator/platforms/elasticsearch/renders/elasticsearch.py b/translator/app/translator/platforms/elasticsearch/renders/elasticsearch.py index ef4447d8..ce10f32a 100644 --- a/translator/app/translator/platforms/elasticsearch/renders/elasticsearch.py +++ b/translator/app/translator/platforms/elasticsearch/renders/elasticsearch.py @@ -16,8 +16,6 @@ limitations under the License. ----------------------------------------------------------------- """ -from typing import Union - from app.translator.platforms.base.lucene.renders.lucene import LuceneQueryRender, LuceneFieldValue from app.translator.platforms.elasticsearch.const import elasticsearch_lucene_query_details from app.translator.platforms.elasticsearch.mapping import ElasticSearchMappings, elasticsearch_mappings @@ -27,13 +25,6 @@ class ElasticSearchFieldValue(LuceneFieldValue): details: PlatformDetails = elasticsearch_lucene_query_details - def apply_value(self, value: Union[str, int]): - if isinstance(value, int): - return value - if " " in value: - return f'"{value}"'.replace(" ", r"\ ") - return value - class ElasticSearchQueryRender(LuceneQueryRender): details: PlatformDetails = elasticsearch_lucene_query_details diff --git a/translator/app/translator/platforms/logscale/escape_manager.py b/translator/app/translator/platforms/logscale/escape_manager.py new file mode 100644 index 00000000..cd5132e0 --- /dev/null +++ b/translator/app/translator/platforms/logscale/escape_manager.py @@ -0,0 +1,12 @@ +from app.translator.core.custom_types.values import ValueType +from app.translator.core.escape_manager import EscapeManager +from app.translator.core.models.escape_details import EscapeDetails + + +class LogscaleEscapeManager(EscapeManager): + escape_map = { + ValueType.value: EscapeDetails(pattern='(\+|\\\\|{|\[|\*|"|\(|\)|/(?![*?\\\\]))') + } + + +logscale_escape_manager = LogscaleEscapeManager() diff --git a/translator/app/translator/platforms/logscale/renders/logscale.py b/translator/app/translator/platforms/logscale/renders/logscale.py index 55117c93..f851e94a 100644 --- a/translator/app/translator/platforms/logscale/renders/logscale.py +++ b/translator/app/translator/platforms/logscale/renders/logscale.py @@ -16,12 +16,11 @@ limitations under the License. ----------------------------------------------------------------- """ -import re - from typing import Union from app.translator.const import DEFAULT_VALUE_TYPE from app.translator.platforms.logscale.const import logscale_query_details +from app.translator.platforms.logscale.escape_manager import logscale_escape_manager from app.translator.platforms.logscale.functions import LogScaleFunctions, log_scale_functions from app.translator.platforms.logscale.mapping import LogScaleMappings, logscale_mappings from app.translator.core.mapping import SourceMapping @@ -32,13 +31,7 @@ class LogScaleFieldValue(BaseQueryFieldValue): details: PlatformDetails = logscale_query_details - - def apply_value(self, value: Union[str, int]) -> str: - if isinstance(value, str) and '"' in value: - value = re.sub(r'(? str: if not field_name.isalpha(): diff --git a/translator/app/translator/platforms/logscale/tokenizer.py b/translator/app/translator/platforms/logscale/tokenizer.py index 209a5498..ee606141 100644 --- a/translator/app/translator/platforms/logscale/tokenizer.py +++ b/translator/app/translator/platforms/logscale/tokenizer.py @@ -19,11 +19,13 @@ import re from typing import Tuple, Any, List, Union +from app.translator.core.custom_types.values import ValueType from app.translator.core.mixins.logic import ANDLogicOperatorMixin from app.translator.core.models.field import Keyword, Field from app.translator.core.models.identifier import Identifier from app.translator.core.custom_types.tokens import GroupType, LogicalOperatorType, OperatorType from app.translator.core.tokenizer import QueryTokenizer +from app.translator.platforms.logscale.escape_manager import logscale_escape_manager from app.translator.tools.utils import get_match_group @@ -38,22 +40,22 @@ class LogScaleTokenizer(QueryTokenizer, ANDLogicOperatorMixin): } field_pattern = r"(?P[a-zA-Z\._\-]+)" - num_value_pattern = r"(?P\d+(?:\.\d+)*)\s*" - double_quotes_value_pattern = r'"(?P(?:[:a-zA-Z\*0-9=+%#\-_/,\'\.$&^@!\(\)\{\}\s]|\\\"|\\)*)"\s*' - re_value_pattern = r"/(?P[:a-zA-Z\*0-9=+%#\\\-_\,\"\'\.$&^@!\(\)\{\}\s?]+)/i?\s*" + num_value_pattern = fr"(?P<{ValueType.number_value}>\d+(?:\.\d+)*)\s*" + double_quotes_value_pattern = fr'"(?P<{ValueType.double_quotes_value}>(?:[:a-zA-Z\*0-9=+%#\-_/,\'\.$&^@!\(\)\{{\}}\s]|\\\"|\\)*)"\s*' + re_value_pattern = fr"/(?P<{ValueType.regular_expression_value}>[:a-zA-Z\*0-9=+%#\\\-_\,\"\'\.$&^@!\(\)\{{\}}\s?]+)/i?\s*" _value_pattern = fr"""{num_value_pattern}|{re_value_pattern}|{double_quotes_value_pattern}""" keyword_pattern = double_quotes_value_pattern - + escape_manager = logscale_escape_manager wildcard_symbol = "*" def get_operator_and_value(self, match: re.Match, operator: str = OperatorType.EQ) -> Tuple[str, Any]: - if (num_value := get_match_group(match, group_name='num_value')) is not None: + if (num_value := get_match_group(match, group_name=ValueType.number_value)) is not None: return operator, num_value - elif (d_q_value := get_match_group(match, group_name='d_q_value')) is not None: + elif (d_q_value := get_match_group(match, group_name=ValueType.double_quotes_value)) is not None: return operator, d_q_value - elif (re_value := get_match_group(match, group_name='re_value')) is not None: + elif (re_value := get_match_group(match, group_name=ValueType.regular_expression_value)) is not None: return OperatorType.REGEX, re_value return super().get_operator_and_value(match, operator) diff --git a/translator/app/translator/platforms/microsoft/escape_manager.py b/translator/app/translator/platforms/microsoft/escape_manager.py new file mode 100644 index 00000000..57ab7c15 --- /dev/null +++ b/translator/app/translator/platforms/microsoft/escape_manager.py @@ -0,0 +1,12 @@ +from app.translator.core.custom_types.values import ValueType +from app.translator.core.escape_manager import EscapeManager +from app.translator.core.models.escape_details import EscapeDetails + + +class MicrosoftEscapeManager(EscapeManager): + escape_map = { + ValueType.value: EscapeDetails(pattern='(?:\\\\)?(")') + } + + +microsoft_escape_manager = MicrosoftEscapeManager() diff --git a/translator/app/translator/platforms/microsoft/renders/microsoft_sentinel.py b/translator/app/translator/platforms/microsoft/renders/microsoft_sentinel.py index 7982504b..ab05d8ac 100644 --- a/translator/app/translator/platforms/microsoft/renders/microsoft_sentinel.py +++ b/translator/app/translator/platforms/microsoft/renders/microsoft_sentinel.py @@ -20,6 +20,7 @@ from app.translator.const import DEFAULT_VALUE_TYPE from app.translator.platforms.microsoft.const import microsoft_sentinel_query_details +from app.translator.platforms.microsoft.escape_manager import microsoft_escape_manager from app.translator.platforms.microsoft.functions import MicrosoftFunctions, microsoft_sentinel_functions from app.translator.platforms.microsoft.mapping import MicrosoftSentinelMappings, microsoft_sentinel_mappings from app.translator.core.mapping import LogSourceSignature @@ -29,6 +30,7 @@ class MicrosoftSentinelFieldValue(BaseQueryFieldValue): details: PlatformDetails = microsoft_sentinel_query_details + escape_manager = microsoft_escape_manager @staticmethod def __escape_value(value: Union[int, str]) -> Union[int, str]: @@ -41,34 +43,34 @@ def equal_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: prepared_values = ", ".join(f"@'{self.__escape_value(v)}'" for v in value) operator = "in~" if all(isinstance(v, str) for v in value) else "in" return f"{field} {operator} ({prepared_values})" - return f"{field} == {value}" + return f"{field} == {self.apply_value(value)}" def less_modifier(self, field: str, value: Union[int, str]) -> str: if isinstance(value, int): return f"{field} < {value}" - return f"{field} < '{value}'" + return f"{field} < '{self.apply_value(value)}'" def less_or_equal_modifier(self, field: str, value: Union[int, str]) -> str: if isinstance(value, int): return f"{field} <= {value}" - return f"{field} <= '{value}'" + return f"{field} <= '{self.apply_value(value)}'" def greater_modifier(self, field: str, value: Union[int, str]) -> str: if isinstance(value, int): return f"{field} > {value}" - return f"{field} > '{value}'" + return f"{field} > '{self.apply_value(value)}'" def greater_or_equal_modifier(self, field: str, value: Union[int, str]) -> str: if isinstance(value, int): return f"{field} >= {value}" - return f"{field} >= '{value}'" + return f"{field} >= '{self.apply_value(value)}'" def not_equal_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: if isinstance(value, list): return f"({self.or_token.join([self.not_equal_modifier(field=field, value=v) for v in value])})" if isinstance(value, int): return f"{field} !~ {value}" - return f"{field} !~ '{value}'" + return f"{field} !~ '{self.apply_value(value)}'" def contains_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: if isinstance(value, list): diff --git a/translator/app/translator/platforms/microsoft/tokenizer.py b/translator/app/translator/platforms/microsoft/tokenizer.py index 622a84fe..559c0339 100644 --- a/translator/app/translator/platforms/microsoft/tokenizer.py +++ b/translator/app/translator/platforms/microsoft/tokenizer.py @@ -19,9 +19,11 @@ import re from typing import Tuple, Any, Union +from app.translator.core.custom_types.values import ValueType from app.translator.core.mixins.operator import OperatorBasedMixin from app.translator.core.tokenizer import QueryTokenizer from app.translator.core.custom_types.tokens import OperatorType +from app.translator.platforms.microsoft.escape_manager import microsoft_escape_manager from app.translator.tools.utils import get_match_group @@ -45,27 +47,33 @@ class MicrosoftSentinelTokenizer(QueryTokenizer, OperatorBasedMixin): } field_pattern = r"(?P[a-zA-Z\.\-_]+)" - bool_value_pattern = r"(?Ptrue|false)\s*" - num_value_pattern = r"(?P\d+(?:\.\d+)*)\s*" - double_quotes_value_pattern = r'@?"(?P(?:[:a-zA-Z\*0-9=+%#\-_/,\'\.$&^@!\(\)\{\}\s]|\\\"|\\\\)*)"\s*' - single_quotes_value_pattern = r"@?'(?P(?:[:a-zA-Z\*0-9=+%#\-_/,\"\.$&^@!\(\)\{\}\s]|\\\'|\\\\)*)'\s*" + bool_value_pattern = fr"(?P<{ValueType.bool_value}>true|false)\s*" + num_value_pattern = fr"(?P<{ValueType.number_value}>\d+(?:\.\d+)*)\s*" + double_quotes_value_pattern = fr'(?P<{ValueType.double_quotes_value}>@?"(?:[:a-zA-Z\*0-9=+%#\-_/,\'\.$&^@!\(\)\{{\}}\s]|\\\"|\\\\)*)"\s*' + single_quotes_value_pattern = fr"(?P<{ValueType.single_quotes_value}>@?'(?:[:a-zA-Z\*0-9=+%#\-_/,\"\.$&^@!\(\)\{{\}}\s]|\\\'|\\\\)*)'\s*" str_value_pattern = fr"""{double_quotes_value_pattern}|{single_quotes_value_pattern}""" _value_pattern = fr"""{bool_value_pattern}|{num_value_pattern}|{str_value_pattern}""" - multi_value_pattern = r"""\((?P[:a-zA-Z\"\*0-9=+%#\-_\/\\'\,.&^@!\(\s]+)\)""" + multi_value_pattern = fr"""\((?P<{ValueType.value}>[:a-zA-Z\"\*0-9=+%#\-_\/\\'\,.&^@!\(\s]+)\)""" keyword_pattern = fr"\*\s+contains\s+(?:{str_value_pattern})" + escape_manager = microsoft_escape_manager + def get_operator_and_value(self, match: re.Match, operator: str = OperatorType.EQ) -> Tuple[str, Any]: - if (num_value := get_match_group(match, group_name='num_value')) is not None: + if (num_value := get_match_group(match, group_name=ValueType.number_value)) is not None: return operator, num_value - elif (bool_value := get_match_group(match, group_name='bool_value')) is not None: + elif (bool_value := get_match_group(match, group_name=ValueType.bool_value)) is not None: return operator, bool_value - elif (d_q_value := get_match_group(match, group_name='d_q_value')) is not None: - return operator, d_q_value + elif (d_q_value := get_match_group(match, group_name=ValueType.double_quotes_value)) is not None: + if d_q_value.startswith("@"): + return operator, d_q_value.lstrip("@").lstrip('"') + return operator, self.escape_manager.remove_escape(d_q_value.lstrip('"')) - elif (s_q_value := get_match_group(match, group_name='s_q_value')) is not None: - return operator, s_q_value + elif (s_q_value := get_match_group(match, group_name=ValueType.single_quotes_value)) is not None: + if s_q_value.startswith("@"): + return operator, s_q_value.lstrip("@").lstrip("'") + return operator, self.escape_manager.remove_escape(s_q_value.lstrip("'")) return super().get_operator_and_value(match, operator) diff --git a/translator/app/translator/platforms/opensearch/renders/opensearch.py b/translator/app/translator/platforms/opensearch/renders/opensearch.py index 823e0ecb..b145cf84 100644 --- a/translator/app/translator/platforms/opensearch/renders/opensearch.py +++ b/translator/app/translator/platforms/opensearch/renders/opensearch.py @@ -30,9 +30,9 @@ class OpenSearchFieldValue(LuceneFieldValue): def equal_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: if isinstance(value, list): - values = self.or_token.join(f'"{v}"' for v in value) + values = self.or_token.join(f'"{self.apply_value(v)}"' for v in value) return f"{field}:({values})" - return f'{field}:"{value}"' + return f'{field}:"{self.apply_value(value)}"' def less_modifier(self, field: str, value: Union[int, str]) -> str: return f'{field}:<"{self.apply_value(value)}"' @@ -48,27 +48,27 @@ def greater_or_equal_modifier(self, field: str, value: Union[int, str]) -> str: def not_equal_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: if isinstance(value, list): - values = self.or_token.join(self.apply_value(f'"{v}"') for v in value) + values = self.or_token.join(f'"{self.apply_value(v)}"' for v in value) return f"NOT ({field} = ({values})" return f'NOT ({field} = "{self.apply_value(value)}")' def contains_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: if isinstance(value, list): - values = self.or_token.join(f'"*{v}*"' for v in value) + values = self.or_token.join(f'"*{self.apply_value(v)}*"' for v in value) return f"{field}:({values})" - return f'{field}:"*{value}*"' + return f'{field}:"*{self.apply_value(value)}*"' def endswith_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: if isinstance(value, list): - values = self.or_token.join(f'"*{v}"' for v in value) + values = self.or_token.join(f'"*{self.apply_value(v)}"' for v in value) return f"{field}:({values})" - return f'{field}:"*{value}"' + return f'{field}:"*{self.apply_value(value)}"' def startswith_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: if isinstance(value, list): - values = self.or_token.join(f'"{v}*"' for v in value) + values = self.or_token.join(f'"{self.apply_value(v)}*"' for v in value) return f"{field}:({values})" - return f'{field}:"{value}*"' + return f'{field}:"{self.apply_value(value)}*"' def regex_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: if isinstance(value, list): @@ -78,7 +78,7 @@ def regex_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: def keywords(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: if isinstance(value, list): return f"({self.or_token.join(self.keywords(field=field, value=v) for v in value)})" - return f'"*{value}*"' + return f'"*{self.apply_value(value)}*"' class OpenSearchQueryRender(LuceneQueryRender): diff --git a/translator/app/translator/platforms/qradar/escape_manager.py b/translator/app/translator/platforms/qradar/escape_manager.py new file mode 100644 index 00000000..18462b01 --- /dev/null +++ b/translator/app/translator/platforms/qradar/escape_manager.py @@ -0,0 +1,12 @@ +from app.translator.core.custom_types.values import ValueType +from app.translator.core.escape_manager import EscapeManager +from app.translator.core.models.escape_details import EscapeDetails + + +class QradarEscapeManager(EscapeManager): + escape_map = { + ValueType.value: EscapeDetails(pattern="([\\\])") + } + + +qradar_escape_manager = QradarEscapeManager() diff --git a/translator/app/translator/platforms/qradar/renders/qradar.py b/translator/app/translator/platforms/qradar/renders/qradar.py index 26367002..15f61a21 100644 --- a/translator/app/translator/platforms/qradar/renders/qradar.py +++ b/translator/app/translator/platforms/qradar/renders/qradar.py @@ -18,10 +18,12 @@ """ from typing import Union, List -from app.translator.const import DEFAULT_VALUE_TYPE +from app.translator.core.custom_types.values import ValueType from app.translator.core.mapping import SourceMapping from app.translator.core.models.functions.base import Function +from app.translator.const import DEFAULT_VALUE_TYPE from app.translator.platforms.qradar.const import qradar_query_details +from app.translator.platforms.qradar.escape_manager import qradar_escape_manager from app.translator.platforms.qradar.mapping import QradarLogSourceSignature, QradarMappings, qradar_mappings from app.translator.core.models.platform_details import PlatformDetails from app.translator.core.render import BaseQueryRender, BaseQueryFieldValue @@ -29,58 +31,71 @@ class QradarFieldValue(BaseQueryFieldValue): details: PlatformDetails = qradar_query_details + escape_manager = qradar_escape_manager + + def apply_value(self, value: Union[str, int], value_type: str = ValueType.value) -> Union[str, int]: + if isinstance(value, str): + value = value.replace("\\'", "%").replace("'", '"') + if value.endswith("\\\\%"): + value = value.replace("\\\\%", "\\%") + return super().apply_value(value, value_type) + + def _apply_value(self, value: Union[str, int]) -> Union[str, int]: + if isinstance(value, str) and "\\" in value: + return value + return self.apply_value(value) def equal_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: if isinstance(value, list): return f"({self.or_token.join([self.equal_modifier(field=field, value=v) for v in value])})" if field == "UTF8(payload)": - return f"UTF8(payload) ILIKE '{value}'" + return f"UTF8(payload) ILIKE '{self.apply_value(value)}'" if isinstance(value, int): return f'"{field}"={value}' - return f'"{field}"=\'{value}\'' + return f'"{field}"=\'{self._apply_value(value)}\'' def less_modifier(self, field: str, value: Union[int, str]) -> str: if isinstance(value, int): return f'"{field}"<{value}' - return f'"{field}"<\'{value}\'' + return f'"{field}"<\'{self._apply_value(value)}\'' def less_or_equal_modifier(self, field: str, value: Union[int, str]) -> str: if isinstance(value, int): return f'"{field}"<={value}' - return f'"{field}"<=\'{value}\'' + return f'"{field}"<=\'{self._apply_value(value)}\'' def greater_modifier(self, field: str, value: Union[int, str]) -> str: if isinstance(value, int): return f'"{field}">{value}' - return f'"{field}">\'{value}\'' + return f'"{field}">\'{self._apply_value(value)}\'' def greater_or_equal_modifier(self, field: str, value: Union[int, str]) -> str: if isinstance(value, int): return f'"{field}">={value}' - return f'"{field}">=\'{value}\'' + return f'"{field}">=\'{self._apply_value(value)}\'' def not_equal_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: if isinstance(value, list): return f"({self.or_token.join([self.not_equal_modifier(field=field, value=v) for v in value])})" if isinstance(value, int): return f'"{field}"!={value}' - return f'"{field}"!=\'{value}\'' + return f'"{field}"!=\'{self._apply_value(value)}\'' def contains_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: if isinstance(value, list): return f"({self.or_token.join(self.contains_modifier(field=field, value=v) for v in value)})" - return f'"{field}" ILIKE \'%{value}%\'' + return f'"{field}" ILIKE \'%{self._apply_value(value)}%\'' def endswith_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: if isinstance(value, list): return f"({self.or_token.join(self.endswith_modifier(field=field, value=v) for v in value)})" - return f'"{field}" ILIKE \'%{value}\'' + return f'"{field}" ILIKE \'%{self._apply_value(value)}\'' def startswith_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: if isinstance(value, list): return f"({self.or_token.join(self.startswith_modifier(field=field, value=v) for v in value)})" - return f'"{field}" ILIKE \'{value}%\'' + return f'"{field}" ILIKE \'{self._apply_value(value)}%\'' def regex_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: if isinstance(value, list): @@ -90,7 +105,7 @@ def regex_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: def keywords(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: if isinstance(value, list): return f"({self.or_token.join(self.keywords(field=field, value=v) for v in value)})" - return f'UTF8(payload) ILIKE "%{value}%"' + return f"UTF8(payload) ILIKE '%{self.apply_value(value)}%'" class QradarQueryRender(BaseQueryRender): diff --git a/translator/app/translator/platforms/qradar/tokenizer.py b/translator/app/translator/platforms/qradar/tokenizer.py index a98001e8..b50dd031 100644 --- a/translator/app/translator/platforms/qradar/tokenizer.py +++ b/translator/app/translator/platforms/qradar/tokenizer.py @@ -19,15 +19,18 @@ import re from typing import Tuple, Any +from app.translator.core.custom_types.values import ValueType from app.translator.platforms.qradar.const import UTF8_PAYLOAD_PATTERN, SINGLE_QUOTES_VALUE_PATTERN, NUM_VALUE_PATTERN from app.translator.core.models.field import Keyword from app.translator.core.models.identifier import Identifier from app.translator.core.tokenizer import QueryTokenizer from app.translator.core.custom_types.tokens import OperatorType +from app.translator.platforms.qradar.escape_manager import qradar_escape_manager from app.translator.tools.utils import get_match_group class QradarTokenizer(QueryTokenizer): + single_value_operators_map = { "=": OperatorType.EQ, "<=": OperatorType.LTE, @@ -45,10 +48,11 @@ class QradarTokenizer(QueryTokenizer): } field_pattern = r'(?P"[a-zA-Z\._\-\s]+"|[a-zA-Z\._\-]+)' - bool_value_pattern = r"(?Ptrue|false)\s*" + bool_value_pattern = fr"(?P<{ValueType.bool_value}>true|false)\s*" _value_pattern = fr"{NUM_VALUE_PATTERN}|{bool_value_pattern}|{SINGLE_QUOTES_VALUE_PATTERN}" - multi_value_pattern = r"""\((?P[:a-zA-Z\"\*0-9=+%#\-_\/\\'\,.&^@!\(\s]*)\)""" + multi_value_pattern = fr"""\((?P<{ValueType.value}>[:a-zA-Z\"\*0-9=+%#\-_\/\\'\,.&^@!\(\s]*)\)""" keyword_pattern = fr"{UTF8_PAYLOAD_PATTERN}\s+(?:like|LIKE|ilike|ILIKE)\s+{SINGLE_QUOTES_VALUE_PATTERN}" + escape_manager = qradar_escape_manager wildcard_symbol = "%" @@ -57,14 +61,14 @@ def should_process_value_wildcard_symbols(operator: str) -> bool: return operator.lower() in ("like", "ilike") def get_operator_and_value(self, match: re.Match, operator: str = OperatorType.EQ) -> Tuple[str, Any]: - if (num_value := get_match_group(match, group_name='num_value')) is not None: + if (num_value := get_match_group(match, group_name=ValueType.number_value)) is not None: return operator, num_value - elif (bool_value := get_match_group(match, group_name='bool_value')) is not None: - return operator, bool_value + elif (bool_value := get_match_group(match, group_name=ValueType.bool_value)) is not None: + return operator, self.escape_manager.remove_escape(bool_value) - elif (s_q_value := get_match_group(match, group_name='s_q_value')) is not None: - return operator, s_q_value + elif (s_q_value := get_match_group(match, group_name=ValueType.single_quotes_value)) is not None: + return operator, self.escape_manager.remove_escape(s_q_value) return super().get_operator_and_value(match, operator) From ebb8585e39d79e99cd41d42bff0def55fccb3828 Mon Sep 17 00:00:00 2001 From: "nazar.gesyk" Date: Fri, 5 Jan 2024 14:53:50 +0200 Subject: [PATCH 2/3] Added escaping logic support --- translator/app/translator/core/custom_types/values.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/translator/app/translator/core/custom_types/values.py b/translator/app/translator/core/custom_types/values.py index 786da50a..8d1ff906 100644 --- a/translator/app/translator/core/custom_types/values.py +++ b/translator/app/translator/core/custom_types/values.py @@ -11,4 +11,4 @@ class ValueType(CustomEnum): bool_value = "bool_value" regular_expression_value = "re_value" greater_than_or_equal = "gte_value" - less_than_or_equal = "lte_value" \ No newline at end of file + less_than_or_equal = "lte_value" From 5b57082f599dcfd8aa1a5f0a133ab99d9635b67d Mon Sep 17 00:00:00 2001 From: "nazar.gesyk" Date: Fri, 5 Jan 2024 15:11:07 +0200 Subject: [PATCH 3/3] Added escaping logic support --- translator/app/translator/core/render.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/translator/app/translator/core/render.py b/translator/app/translator/core/render.py index f88e9952..7416082e 100644 --- a/translator/app/translator/core/render.py +++ b/translator/app/translator/core/render.py @@ -87,8 +87,8 @@ def regex_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: def keywords(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: raise NotImplementedException - def apply_value(self, value: Union[str, int], case_type: str = ValueType.value) -> Union[str, int]: - updated_value = self.escape_manager.escape(value, case_type) + def apply_value(self, value: Union[str, int], value_type: str = ValueType.value) -> Union[str, int]: + updated_value = self.escape_manager.escape(value, value_type) return updated_value def apply_field_value(self, field, operator, value):