diff --git a/uncoder-core/app/translator/core/custom_types/functions.py b/uncoder-core/app/translator/core/custom_types/functions.py index 13f172d0..9fc6f845 100644 --- a/uncoder-core/app/translator/core/custom_types/functions.py +++ b/uncoder-core/app/translator/core/custom_types/functions.py @@ -5,16 +5,29 @@ class FunctionType(CustomEnum): avg = "avg" count = "count" distinct_count = "distinct_count" + max = "max" + min = "min" + sum = "sum" + + divide = "divide" + earliest = "earliest" + latest = "latest" + + lower = "lower" + upper = "upper" + + compare_boolean = "compare_boolean" + + ipv4_is_in_range = "ipv4_is_in_range" + + bin = "bin" eval = "eval" fields = "fields" - latest = "latest" - max = "max" - min = "min" rename = "rename" search = "search" - sort = "sort" + sort_limit = "sort_limit" stats = "stats" - sum = "sum" table = "table" + timeframe = "timeframe" values = "values" diff --git a/uncoder-core/app/translator/core/custom_types/tokens.py b/uncoder-core/app/translator/core/custom_types/tokens.py index 0b945627..663e5d33 100644 --- a/uncoder-core/app/translator/core/custom_types/tokens.py +++ b/uncoder-core/app/translator/core/custom_types/tokens.py @@ -33,6 +33,12 @@ class GroupType(CustomEnum): STR_SEARCH_OPERATORS = ( - OperatorType.CONTAINS, OperatorType.NOT_CONTAINS, OperatorType.ENDSWITH, OperatorType.NOT_ENDSWITH, - OperatorType.STARTSWITH, OperatorType.NOT_STARTSWITH, OperatorType.REGEX, OperatorType.NOT_REGEX + OperatorType.CONTAINS, + OperatorType.NOT_CONTAINS, + OperatorType.ENDSWITH, + OperatorType.NOT_ENDSWITH, + OperatorType.STARTSWITH, + OperatorType.NOT_STARTSWITH, + OperatorType.REGEX, + OperatorType.NOT_REGEX, ) diff --git a/uncoder-core/app/translator/core/functions.py b/uncoder-core/app/translator/core/functions.py index 4a75df2b..21ddd0ff 100644 --- a/uncoder-core/app/translator/core/functions.py +++ b/uncoder-core/app/translator/core/functions.py @@ -100,7 +100,10 @@ class PlatformFunctionsManager: def __init__(self): self._parsers_map: dict[str, HigherOrderFunctionParser] = {} self._renders_map: dict[str, FunctionRender] = {} + self._in_query_renders_map: dict[str, FunctionRender] = {} self._names_map: dict[str, str] = {} + self._order_to_render: dict[str, int] = {} + self._render_to_prefix_functions: list[str] = [] def post_init_configure(self, platform_render: PlatformQueryRender) -> None: raise NotImplementedError @@ -121,6 +124,12 @@ def get_render(self, generic_func_name: str) -> FunctionRender: raise NotSupportedFunctionException + def get_in_query_render(self, generic_func_name: str) -> FunctionRender: + if INIT_FUNCTIONS and (render := self._in_query_renders_map.get(generic_func_name)): + return render + + raise NotSupportedFunctionException + def get_generic_func_name(self, platform_func_name: str) -> Optional[str]: if INIT_FUNCTIONS and (generic_func_name := self._names_map.get(platform_func_name)): return generic_func_name @@ -131,6 +140,20 @@ def get_platform_func_name(self, generic_func_name: str) -> Optional[str]: if INIT_FUNCTIONS: return self._inverted_names_map.get(generic_func_name) + @property + def order_to_render(self) -> dict[str, int]: + if INIT_FUNCTIONS: + return self._order_to_render + + return {} + + @property + def render_to_prefix_functions(self) -> list[str]: + if INIT_FUNCTIONS: + return self._render_to_prefix_functions + + return [] + class PlatformFunctions: manager: PlatformFunctionsManager = PlatformFunctionsManager() @@ -158,18 +181,27 @@ def parse(self, query: str) -> ParsedFunctions: invalid=invalid, ) + def _sort_functions_to_render(self, functions: list[Function]) -> list[Function]: + return sorted(functions, key=lambda func: self.manager.order_to_render.get(func.name, 0)) + def render(self, functions: list[Function], source_mapping: SourceMapping) -> RenderedFunctions: rendered = "" + rendered_prefix = "" not_supported = [] + functions = self._sort_functions_to_render(functions) for func in functions: try: func_render = self.manager.get_render(func.name) - rendered += self.wrap_function_with_delimiter(func_render.render(func, source_mapping)) + _rendered = func_render.render(func, source_mapping) + if func.name in self.manager.render_to_prefix_functions: + rendered_prefix += _rendered + else: + rendered += self.wrap_function_with_delimiter(_rendered) except NotSupportedFunctionException: not_supported.append(func.raw) not_supported = [self.wrap_function_with_delimiter(func.strip()) for func in not_supported] - return RenderedFunctions(rendered=rendered, not_supported=not_supported) + return RenderedFunctions(rendered_prefix=rendered_prefix, rendered=rendered, not_supported=not_supported) def wrap_function_with_delimiter(self, func: str) -> str: return f" {self.function_delimiter} {func}" diff --git a/uncoder-core/app/translator/core/mapping.py b/uncoder-core/app/translator/core/mapping.py index 5fb8956a..0ecccbc1 100644 --- a/uncoder-core/app/translator/core/mapping.py +++ b/uncoder-core/app/translator/core/mapping.py @@ -82,6 +82,7 @@ def __init__( class BasePlatformMappings: skip_load_default_mappings: bool = True + extend_default_mapping_with_all_fields: bool = False def __init__(self, platform_dir: str): self._loader = LoaderFileMappings() @@ -116,6 +117,9 @@ def prepare_mapping(self) -> dict[str, SourceMapping]: if self.skip_load_default_mappings: source_mappings[DEFAULT_MAPPING_NAME] = default_mapping + if self.extend_default_mapping_with_all_fields: + source_mappings[DEFAULT_MAPPING_NAME].fields_mapping.update(default_mapping.fields_mapping) + return source_mappings @staticmethod diff --git a/uncoder-core/app/translator/core/models/field.py b/uncoder-core/app/translator/core/models/field.py index fc18196e..10b661b0 100644 --- a/uncoder-core/app/translator/core/models/field.py +++ b/uncoder-core/app/translator/core/models/field.py @@ -1,11 +1,16 @@ from typing import Optional, Union -from app.translator.core.custom_types.tokens import OperatorType, STR_SEARCH_OPERATORS +from app.translator.core.custom_types.tokens import STR_SEARCH_OPERATORS, OperatorType from app.translator.core.mapping import DEFAULT_MAPPING_NAME, SourceMapping from app.translator.core.models.identifier import Identifier from app.translator.core.str_value_manager import StrValue +class Alias: + def __init__(self, name: str): + self.name = name + + class Field: def __init__(self, source_name: str): self.source_name = source_name @@ -33,8 +38,18 @@ def set_generic_names_map(self, source_mappings: list[SourceMapping], default_ma class FieldValue: - def __init__(self, source_name: str, operator: Identifier, value: Union[int, str, StrValue, list, tuple]): + def __init__( + self, + source_name: str, + operator: Identifier, + value: Union[int, str, StrValue, list, tuple], + is_alias: bool = False, + ): self.field = Field(source_name=source_name) + self.alias = None + if is_alias: + self.alias = Alias(name=source_name) + self.operator = operator self.values = [] self.__add_value(value) @@ -49,13 +64,21 @@ def __add_value(self, value: Optional[Union[int, str, StrValue, list, tuple]]) - if value and isinstance(value, (list, tuple)): for v in value: self.__add_value(v) - elif value and isinstance(value, str) and value.isnumeric() and self.operator.token_type not in STR_SEARCH_OPERATORS: + elif ( + value + and isinstance(value, str) + and value.isnumeric() + and self.operator.token_type not in STR_SEARCH_OPERATORS + ): self.values.append(int(value)) elif value is not None and isinstance(value, (int, str)): self.values.append(value) def __repr__(self): - return f"{self.field.source_name} {self.operator.token_type} {self.values}" + if self.field: + return f"{self.field.source_name} {self.operator.token_type} {self.values}" + + return f"{self.alias.name} {self.operator.token_type} {self.values}" class Keyword: diff --git a/uncoder-core/app/translator/core/models/functions/base.py b/uncoder-core/app/translator/core/models/functions/base.py index 28a29842..187a92c2 100644 --- a/uncoder-core/app/translator/core/models/functions/base.py +++ b/uncoder-core/app/translator/core/models/functions/base.py @@ -1,18 +1,17 @@ from __future__ import annotations from dataclasses import dataclass, field -from typing import Union +from typing import Optional, Union -from app.translator.core.models.field import Field, FieldValue, Keyword +from app.translator.core.models.field import Alias, Field, FieldValue, Keyword from app.translator.core.models.identifier import Identifier @dataclass class Function: name: str = None - args: list[Union[Field, FieldValue, Keyword, Function, Identifier]] = field(default_factory=list) - as_clause: str = None - by_clauses: list[Field] = field(default_factory=list) + args: list[Union[Alias, Field, FieldValue, Keyword, Function, Identifier, str, bool]] = field(default_factory=list) + alias: Optional[Alias] = None raw: str = "" @@ -21,9 +20,11 @@ class ParsedFunctions: functions: list[Function] = field(default_factory=list) not_supported: list[str] = field(default_factory=list) invalid: list[str] = field(default_factory=list) + aliases: dict[str, Function] = field(default_factory=dict) @dataclass class RenderedFunctions: + rendered_prefix: str = "" rendered: str = "" not_supported: list[str] = field(default_factory=list) diff --git a/uncoder-core/app/translator/core/models/functions/bin.py b/uncoder-core/app/translator/core/models/functions/bin.py new file mode 100644 index 00000000..a54884e6 --- /dev/null +++ b/uncoder-core/app/translator/core/models/functions/bin.py @@ -0,0 +1,27 @@ +from dataclasses import dataclass +from typing import Optional + +from app.translator.core.custom_types.functions import FunctionType +from app.translator.core.models.field import Field +from app.translator.core.models.functions.base import Function +from app.translator.tools.custom_enum import CustomEnum + + +class SpanType(CustomEnum): + days = "days" + hours = "hours" + minutes = "minutes" + + +@dataclass +class Span: + value: str = "1" + type_: str = SpanType.days + + +@dataclass +class BinFunction(Function): + name: str = FunctionType.bin + span: Optional[Span] = None + field: Optional[Field] = None + bins: Optional[int] = None diff --git a/uncoder-core/app/translator/core/models/functions/eval.py b/uncoder-core/app/translator/core/models/functions/eval.py index 755a2773..6e32449f 100644 --- a/uncoder-core/app/translator/core/models/functions/eval.py +++ b/uncoder-core/app/translator/core/models/functions/eval.py @@ -2,14 +2,14 @@ from typing import Union from app.translator.core.custom_types.functions import FunctionType -from app.translator.core.models.field import Field +from app.translator.core.models.field import Alias, Field from app.translator.core.models.functions.base import Function from app.translator.core.models.identifier import Identifier @dataclass class EvalArg: - field_: Field = None + field_: Union[Alias, Field] = None expression: list[Union[Field, Function, Identifier, int, float, str]] = field(default_factory=list) diff --git a/uncoder-core/app/translator/core/models/functions/group_by.py b/uncoder-core/app/translator/core/models/functions/group_by.py new file mode 100644 index 00000000..04b3d4e6 --- /dev/null +++ b/uncoder-core/app/translator/core/models/functions/group_by.py @@ -0,0 +1,14 @@ +from dataclasses import Field, dataclass, field +from typing import Union + +from app.translator.core.custom_types.functions import FunctionType +from app.translator.core.models.field import Alias +from app.translator.core.models.functions.base import Function + + +@dataclass +class GroupByFunction(Function): + name: str = FunctionType.stats + args: list[Function] = field(default_factory=list) + by_clauses: list[Union[Alias, Field]] = field(default_factory=list) + filter_: Function = None diff --git a/uncoder-core/app/translator/core/models/functions/rename.py b/uncoder-core/app/translator/core/models/functions/rename.py index 20a4c123..06455e05 100644 --- a/uncoder-core/app/translator/core/models/functions/rename.py +++ b/uncoder-core/app/translator/core/models/functions/rename.py @@ -1,14 +1,14 @@ from dataclasses import dataclass from app.translator.core.custom_types.functions import FunctionType -from app.translator.core.models.field import Field +from app.translator.core.models.field import Alias, Field from app.translator.core.models.functions.base import Function @dataclass class RenameArg: field_: Field = None - alias: str = None + alias: Alias = None @dataclass diff --git a/uncoder-core/app/translator/core/models/functions/sort.py b/uncoder-core/app/translator/core/models/functions/sort.py index 97e95e33..e35646dc 100644 --- a/uncoder-core/app/translator/core/models/functions/sort.py +++ b/uncoder-core/app/translator/core/models/functions/sort.py @@ -1,7 +1,8 @@ from dataclasses import dataclass +from typing import Union from app.translator.core.custom_types.functions import FunctionType -from app.translator.core.models.field import Field +from app.translator.core.models.field import Alias, Field from app.translator.core.models.functions.base import Function from app.translator.tools.custom_enum import CustomEnum @@ -13,12 +14,13 @@ class SortOrder(CustomEnum): @dataclass class SortArg: - field: Field = None + field: Union[Alias, Field] = None + function: Function = None sort_order: str = SortOrder.asc @dataclass -class SortFunction(Function): - name: str = FunctionType.sort +class SortLimitFunction(Function): + name: str = FunctionType.sort_limit args: list[SortArg] = None limit: str = None diff --git a/uncoder-core/app/translator/core/models/functions/timeframe.py b/uncoder-core/app/translator/core/models/functions/timeframe.py new file mode 100644 index 00000000..b9fedc82 --- /dev/null +++ b/uncoder-core/app/translator/core/models/functions/timeframe.py @@ -0,0 +1,18 @@ +from dataclasses import dataclass + +from app.translator.core.custom_types.functions import FunctionType +from app.translator.core.models.functions.base import Function +from app.translator.tools.custom_enum import CustomEnum + + +class TimeFrameType(CustomEnum): + days = "days" + hours = "hours" + minutes = "minutes" + + +@dataclass +class TimeFrameFunction(Function): + name: str = FunctionType.timeframe + timeframe_value: str = "1" + timeframe_type: str = TimeFrameType.days diff --git a/uncoder-core/app/translator/core/render.py b/uncoder-core/app/translator/core/render.py index a6fcbcb5..81dec670 100644 --- a/uncoder-core/app/translator/core/render.py +++ b/uncoder-core/app/translator/core/render.py @@ -33,7 +33,7 @@ from app.translator.core.models.identifier import Identifier from app.translator.core.models.platform_details import PlatformDetails from app.translator.core.models.query_container import MetaInfoContainer, RawQueryContainer, TokenizedQueryContainer -from app.translator.core.str_value_manager import StrValueManager +from app.translator.core.str_value_manager import StrValue, StrValueManager from app.translator.core.tokenizer import TOKEN_TYPE @@ -64,6 +64,40 @@ def __init__(self, or_token: str): } self.or_token = f" {or_token} " + @staticmethod + def _get_value_type(field_name: str, value: Union[int, str, StrValue], value_type: Optional[str] = None) -> str: # noqa: ARG004 + return value_type or ValueType.value + + @staticmethod + def _wrap_str_value(value: str) -> str: + return value + + def _pre_process_value( + self, field: str, value: Union[int, str, StrValue], value_type: str = ValueType.value, wrap_str: bool = False + ) -> Union[int, str]: + value_type = self._get_value_type(field, value, value_type) + if isinstance(value, StrValue): + value = self.str_value_manager.from_container_to_str(value, value_type) + return self._wrap_str_value(value) if wrap_str else value + if isinstance(value, str): + value = self.str_value_manager.escape_manager.escape(value, value_type) + return self._wrap_str_value(value) if wrap_str else value + return value + + def _pre_process_values_list( + self, field: str, values: list[Union[int, str, StrValue]], value_type: str = ValueType.value + ) -> list[str]: + processed = [] + for val in values: + value_type = self._get_value_type(field, val, value_type) + if isinstance(val, StrValue): + processed.append(self.str_value_manager.from_container_to_str(val, value_type)) + elif isinstance(val, str): + processed.append(self.str_value_manager.escape_manager.escape(val, value_type)) + else: + processed.append(str(val)) + return processed + def equal_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: # noqa: ARG002 raise NotImplementedException @@ -167,7 +201,7 @@ def __init__(self): LogicalOperatorType.NOT: f" {self.not_token} ", } - def generate_prefix(self, log_source_signature: LogSourceSignature) -> str: + def generate_prefix(self, log_source_signature: LogSourceSignature, functions_prefix: str = "") -> str: # noqa: ARG002 if str(log_source_signature): return f"{log_source_signature!s} {self.and_token}" return "" @@ -189,21 +223,29 @@ def map_field(self, field: Field, source_mapping: SourceMapping) -> list[str]: def apply_token(self, token: Union[FieldValue, Keyword, Identifier], source_mapping: SourceMapping) -> str: if isinstance(token, FieldValue): - mapped_fields = self.map_field(token.field, source_mapping) - if len(mapped_fields) > 1: - return self.group_token % self.operator_map[LogicalOperatorType.OR].join( - [ - self.field_value_map.apply_field_value(field=field, operator=token.operator, value=token.value) - for field in mapped_fields - ] - ) + if token.alias: + field_name = token.alias.name + else: + mapped_fields = self.map_field(token.field, source_mapping) + if len(mapped_fields) > 1: + return self.group_token % self.operator_map[LogicalOperatorType.OR].join( + [ + self.field_value_map.apply_field_value( + field=field, operator=token.operator, value=token.value + ) + for field in mapped_fields + ] + ) - return self.field_value_map.apply_field_value( - field=mapped_fields[0], operator=token.operator, value=token.value - ) + field_name = mapped_fields[0] + return self.field_value_map.apply_field_value(field=field_name, operator=token.operator, value=token.value) + + if isinstance(token, Function): + func_render = self.platform_functions.manager.get_in_query_render(token.name) + return func_render.render(token, source_mapping) if isinstance(token, Keyword): - return self.field_value_map.apply_field_value(field=None, operator=token.operator, value=token.value) + return self.field_value_map.apply_field_value(field="", operator=token.operator, value=token.value) if token.token_type in LogicalOperatorType: return self.operator_map.get(token.token_type) @@ -285,13 +327,18 @@ def generate_raw_log_fields(self, fields: list[Field], source_mapping: SourceMap defined_raw_log_fields = [] for field in fields: mapped_field = source_mapping.fields_mapping.get_platform_field_name(generic_field_name=field.source_name) + if not mapped_field: + generic_field_name = field.get_generic_field_name(source_mapping.source_id) + mapped_field = source_mapping.fields_mapping.get_platform_field_name( + generic_field_name=generic_field_name + ) if not mapped_field and self.is_strict_mapping: raise StrictPlatformException(field_name=field.source_name, platform_name=self.details.name) if mapped_field not in source_mapping.raw_log_fields: continue field_prefix = self.raw_log_field_pattern.format(field=mapped_field) defined_raw_log_fields.append(field_prefix) - return "\n".join(defined_raw_log_fields) + return "\n".join(set(defined_raw_log_fields)) def _generate_from_tokenized_query_container(self, query_container: TokenizedQueryContainer) -> str: queries_map = {} @@ -299,7 +346,8 @@ def _generate_from_tokenized_query_container(self, query_container: TokenizedQue source_mappings = self._get_source_mappings(query_container.meta_info.source_mapping_ids) for source_mapping in source_mappings: - prefix = self.generate_prefix(source_mapping.log_source_signature) + rendered_functions = self.generate_functions(query_container.functions.functions, source_mapping) + prefix = self.generate_prefix(source_mapping.log_source_signature, rendered_functions.rendered_prefix) try: if source_mapping.raw_log_fields: defined_raw_log_fields = self.generate_raw_log_fields( @@ -311,7 +359,6 @@ def _generate_from_tokenized_query_container(self, query_container: TokenizedQue errors.append(err) continue else: - rendered_functions = self.generate_functions(query_container.functions.functions, source_mapping) not_supported_functions = query_container.functions.not_supported + rendered_functions.not_supported finalized_query = self.finalize_query( prefix=prefix, diff --git a/uncoder-core/app/translator/core/tokenizer.py b/uncoder-core/app/translator/core/tokenizer.py index 264cd98a..45486ef1 100644 --- a/uncoder-core/app/translator/core/tokenizer.py +++ b/uncoder-core/app/translator/core/tokenizer.py @@ -18,7 +18,7 @@ import re from abc import ABC, abstractmethod -from typing import Any, ClassVar, Union +from typing import Any, ClassVar, Optional, Union from app.translator.core.custom_types.tokens import GroupType, LogicalOperatorType, OperatorType from app.translator.core.custom_types.values import ValueType @@ -32,13 +32,14 @@ from app.translator.core.models.field import Field, FieldValue, Keyword from app.translator.core.models.functions.base import Function from app.translator.core.models.functions.eval import EvalArg +from app.translator.core.models.functions.group_by import GroupByFunction from app.translator.core.models.functions.rename import RenameArg from app.translator.core.models.functions.sort import SortArg from app.translator.core.models.identifier import Identifier from app.translator.core.str_value_manager import StrValue, StrValueManager from app.translator.tools.utils import get_match_group -TOKEN_TYPE = Union[FieldValue, Keyword, Identifier] +TOKEN_TYPE = Union[FieldValue, Keyword, Identifier, Field] class BaseTokenizer(ABC): @@ -112,8 +113,13 @@ def search_operator(self, query: str, field_name: str) -> str: operator = operator_search.group("operator") return operator.strip(" ") - def get_operator_and_value(self, match: re.Match, operator: str = OperatorType.EQ) -> tuple[str, Any]: - return operator, get_match_group(match, group_name=ValueType.value) + def get_operator_and_value( + self, + match: re.Match, + mapped_operator: str = OperatorType.EQ, + operator: Optional[str] = None, # noqa: ARG002 + ) -> tuple[str, Any]: + return mapped_operator, get_match_group(match, group_name=ValueType.value) @staticmethod def clean_multi_value(value: str) -> str: @@ -125,7 +131,7 @@ def clean_multi_value(value: str) -> str: def search_single_value(self, query: str, operator: str, field_name: str) -> tuple[str, str, Union[str, StrValue]]: field_value_match = self._get_field_value_match(query, operator, field_name, self.value_pattern) - mapped_operator, value = self.get_operator_and_value(field_value_match, self.map_operator(operator)) + mapped_operator, value = self.get_operator_and_value(field_value_match, self.map_operator(operator), operator) if self.should_process_value_wildcards(operator): mapped_operator, value = self.process_value_wildcards(value, mapped_operator) @@ -177,7 +183,7 @@ def get_field_value_pattern(self, operator: str, field_name: str, value_pattern: return field_value_pattern.replace("___value___", value_pattern) @staticmethod - def should_process_value_wildcards(operator: str) -> bool: # noqa: ARG004 + def should_process_value_wildcards(operator: Optional[str]) -> bool: # noqa: ARG004 return True def process_value_wildcards( @@ -313,7 +319,7 @@ def tokenize(self, query: str) -> list[Union[FieldValue, Keyword, Identifier]]: @staticmethod def filter_tokens( - tokens: list[TOKEN_TYPE], token_type: Union[type[FieldValue], type[Keyword], type[Identifier]] + tokens: list[TOKEN_TYPE], token_type: Union[type[FieldValue], type[Field], type[Keyword], type[Identifier]] ) -> list[TOKEN_TYPE]: return [token for token in tokens if isinstance(token, token_type)] @@ -325,16 +331,21 @@ def get_field_tokens_from_func_args( if isinstance(arg, Field): result.append(arg) elif isinstance(arg, FieldValue): - result.append(arg.field) - elif isinstance(arg, Function): + if not arg.alias or arg.alias.name != arg.field.source_name: + result.append(arg.field) + elif isinstance(arg, GroupByFunction): result.extend(self.get_field_tokens_from_func_args(args=arg.args)) result.extend(self.get_field_tokens_from_func_args(args=arg.by_clauses)) - elif isinstance(arg, SortArg): + result.extend(self.get_field_tokens_from_func_args(args=[arg.filter_])) + elif isinstance(arg, Function): + result.extend(self.get_field_tokens_from_func_args(args=arg.args)) + elif isinstance(arg, SortArg) and isinstance(arg.field, Field): result.append(arg.field) elif isinstance(arg, RenameArg): result.append(arg.field_) elif isinstance(arg, EvalArg): - result.append(arg.field_) + if isinstance(arg.field_, Field): + result.append(arg.field_) result.extend(self.get_field_tokens_from_func_args(args=arg.expression)) return result diff --git a/uncoder-core/app/translator/mappings/platforms/qradar/default.yml b/uncoder-core/app/translator/mappings/platforms/qradar/default.yml index a51b46fa..7efabc4e 100644 --- a/uncoder-core/app/translator/mappings/platforms/qradar/default.yml +++ b/uncoder-core/app/translator/mappings/platforms/qradar/default.yml @@ -16,13 +16,16 @@ field_mapping: src-hostname: SrcHost src-port: SourcePort src-ip: - - sourceip - - source_ip - - SourceIP + - sourceip + - source_ip + - SourceIP + - sourceIP dst-ip: - DestinationIP - destinationip - destination_ip + - destinationIP + - destinationaddress User: userName CommandLine: Command Protocol: IPProtocol diff --git a/uncoder-core/app/translator/platforms/base/aql/const.py b/uncoder-core/app/translator/platforms/base/aql/const.py index 267ead7d..063c6d78 100644 --- a/uncoder-core/app/translator/platforms/base/aql/const.py +++ b/uncoder-core/app/translator/platforms/base/aql/const.py @@ -1,3 +1,5 @@ UTF8_PAYLOAD_PATTERN = r"UTF8\(payload\)" NUM_VALUE_PATTERN = r"(?P\d+(?:\.\d+)*)" -SINGLE_QUOTES_VALUE_PATTERN = r"""'(?P(?:[:a-zA-Z\*0-9=+%#\-\/\\,_".$&^@!\(\)\{\}\s]|'')*)'""" +SINGLE_QUOTES_VALUE_PATTERN = r"""'(?P(?:[:a-zA-Z\*0-9=+%#\-\/\\|,;_<>`~".$&^@!?\(\)\{\}\[\]\s]|'')*)'""" +TABLE_PATTERN = r"\s+FROM\s+[a-zA-Z.\-*]+" +TABLE_GROUP_PATTERN = r"\s+FROM\s+(?P[a-zA-Z.\-*]+)" diff --git a/uncoder-core/app/translator/platforms/base/aql/escape_manager.py b/uncoder-core/app/translator/platforms/base/aql/escape_manager.py index fd172ba2..7eb64c22 100644 --- a/uncoder-core/app/translator/platforms/base/aql/escape_manager.py +++ b/uncoder-core/app/translator/platforms/base/aql/escape_manager.py @@ -1,8 +1,18 @@ +from typing import ClassVar + +from app.translator.core.custom_types.values import ValueType from app.translator.core.escape_manager import EscapeManager +from app.translator.core.models.escape_details import EscapeDetails class AQLEscapeManager(EscapeManager): - ... + escape_map: ClassVar[dict[str, list[EscapeDetails]]] = { + ValueType.value: [EscapeDetails(pattern=r"(')", escape_symbols=r"'\1")], + ValueType.regex_value: [ + EscapeDetails(pattern=r"([$^*+()\[\]{}|.?\-\\])", escape_symbols=r"\\\1"), + EscapeDetails(pattern=r"(')", escape_symbols=r"'\1") + ] + } aql_escape_manager = AQLEscapeManager() diff --git a/uncoder-core/app/translator/platforms/base/aql/functions/__init__.py b/uncoder-core/app/translator/platforms/base/aql/functions/__init__.py new file mode 100644 index 00000000..5914c2d8 --- /dev/null +++ b/uncoder-core/app/translator/platforms/base/aql/functions/__init__.py @@ -0,0 +1,140 @@ +""" +Uncoder IO Commercial Edition License +----------------------------------------------------------------- +Copyright (c) 2024 SOC Prime, Inc. + +This file is part of the Uncoder IO Commercial Edition ("CE") and is +licensed under the Uncoder IO Non-Commercial License (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + https://github.com/UncoderIO/UncoderIO/blob/main/LICENSE + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +----------------------------------------------------------------- +""" + +import re +from typing import Optional, Union + +from app.translator.core.custom_types.functions import FunctionType +from app.translator.core.exceptions.functions import InvalidFunctionSignature, NotSupportedFunctionException +from app.translator.core.functions import PlatformFunctions +from app.translator.core.models.field import Field +from app.translator.core.models.functions.base import Function, ParsedFunctions +from app.translator.core.models.functions.sort import SortLimitFunction +from app.translator.platforms.base.aql.const import TABLE_PATTERN +from app.translator.platforms.base.aql.functions.const import func_aliases_ctx_var, AGGREGATION_FUNCTIONS_MAP +from app.translator.platforms.base.aql.functions.const import AQLFunctionType +from app.translator.platforms.base.aql.functions.manager import AQLFunctionsManager + + +class AQLFunctions(PlatformFunctions): + function_delimiter = "" + functions_pattern = r"\s(?P(group by|order by|last))" + manager = AQLFunctionsManager() + + def parse(self, query: str) -> tuple[str, ParsedFunctions]: + parsed = [] + not_supported = [] + invalid = [] + + query_prefix, query = re.split(TABLE_PATTERN, query, flags=re.IGNORECASE) + if not re.match(r"\s*SELECT\s+(?:UTF8\(payload\)|\*)", query_prefix, flags=re.IGNORECASE): + self._parse_function( + function_name=AQLFunctionType.fields, + function=query_prefix, + parsed=parsed, + not_supported=not_supported, + invalid=invalid, + ) + aliases = self._parse_aliases(parsed=parsed) + self.__set_aliases_ctx_var(value=list(aliases.keys())) + if search := re.search(self.functions_pattern, query, flags=re.IGNORECASE): + agg_functions = query[search.start() :] + query = query[: search.start()] + self._parse_function( + function_name=AQLFunctionType.aggregation_data_parser, + function=agg_functions, + parsed=parsed, + not_supported=not_supported, + invalid=invalid, + ) + + if group_by_func := self.__filter_function_by_type(parsed, FunctionType.stats): + if table_func := self.__filter_function_by_type(parsed, FunctionType.table): + self.__group_by_post_processing(group_by_func, table_func) + else: + parsed = [func for func in parsed if func.name != FunctionType.stats] + not_supported.append(group_by_func.raw) + + parsed = self.__merge_sort_limit_functions(parsed) + self.__set_aliases_ctx_var(value=[]) + query = re.sub(r"[a-zA-Z0-9_\-\s]+WHERE", "", query, 1, flags=re.IGNORECASE) + return query, ParsedFunctions(functions=parsed, not_supported=not_supported, invalid=invalid, aliases=aliases) + + @staticmethod + def __set_aliases_ctx_var(value: list[str]) -> None: + func_aliases_ctx_var.set(value) + + @staticmethod + def __filter_function_by_type(functions: list[Function], function_type: str) -> Optional[Function]: + for func in functions: + if func.name == function_type: + return func + + @staticmethod + def __group_by_post_processing(group_by_func: Function, table_func: Function) -> None: + agg_functions = [] + for index, arg in enumerate(table_func.args): + if isinstance(arg, Function) and arg.name in AGGREGATION_FUNCTIONS_MAP.values(): + agg_functions.append(arg) + table_func.args[index] = arg.alias + + group_by_func.args = agg_functions + + @staticmethod + def __merge_sort_limit_functions(functions: list[Function]) -> list[Function]: + indices = [] + funcs = [] + for index, func in enumerate(functions): + if func.name == FunctionType.sort_limit: + func: SortLimitFunction + indices.append(index) + funcs.append(func) + + if len(funcs) == 2: # noqa: PLR2004 + funcs[1].args = funcs[1].args or funcs[0].args + funcs[1].limit = funcs[1].limit or funcs[0].limit + functions.pop(indices[0]) + + return functions + + def _parse_function( + self, function: str, function_name: str, parsed: list[Function], not_supported: list[str], invalid: list[str] + ) -> None: + try: + function_parser = self.manager.get_parser(function_name) + function_token = function_parser.parse(func_body=function, raw=function) + if isinstance(function_token, list): + parsed.extend(function_token) + else: + parsed.append(function_token) + except NotSupportedFunctionException: + not_supported.append(function) + except InvalidFunctionSignature: + invalid.append(function) + + @staticmethod + def _parse_aliases(parsed: list[Union[Field, Function]]) -> dict[str, Function]: + return { + arg.alias.name: arg + for function in parsed + for arg in function.args + if isinstance(arg, Function) and arg.alias + } + + +aql_functions = AQLFunctions() diff --git a/uncoder-core/app/translator/platforms/base/aql/functions/const.py b/uncoder-core/app/translator/platforms/base/aql/functions/const.py new file mode 100644 index 00000000..850ab55e --- /dev/null +++ b/uncoder-core/app/translator/platforms/base/aql/functions/const.py @@ -0,0 +1,42 @@ +from contextvars import ContextVar + +from app.translator.core.custom_types.functions import FunctionType +from app.translator.tools.custom_enum import CustomEnum + + +class AQLFunctionType(CustomEnum): + lower: str = "LOWER" + upper: str = "UPPER" + min: str = "MIN" + max: str = "MAX" + sum: str = "SUM" + avg: str = "AVG" + count: str = "COUNT" + distinct_count: str = "DISTINCTCOUNT" + last: str = "LAST" + fields: str = "fields" + aggregation_data_parser: str = "aggregation_data_parser" + + +class AQLSortOrderType(CustomEnum): + asc: str = "ASC" + desc: str = "DESC" + + +class AQLTimeFrameType(CustomEnum): + days = "DAYS" + hours = "HOURS" + minutes = "MINUTES" + + +func_aliases_ctx_var: ContextVar[list[str]] = ContextVar("func_aliases_ctx_var", default=[]) + + +AGGREGATION_FUNCTIONS_MAP = { + AQLFunctionType.avg: FunctionType.avg, + AQLFunctionType.count: FunctionType.count, + AQLFunctionType.distinct_count: FunctionType.distinct_count, + AQLFunctionType.max: FunctionType.max, + AQLFunctionType.min: FunctionType.min, + AQLFunctionType.sum: FunctionType.sum, +} diff --git a/uncoder-core/app/translator/platforms/base/aql/functions/manager.py b/uncoder-core/app/translator/platforms/base/aql/functions/manager.py new file mode 100644 index 00000000..45ea3c02 --- /dev/null +++ b/uncoder-core/app/translator/platforms/base/aql/functions/manager.py @@ -0,0 +1,7 @@ +from __future__ import annotations + +from app.translator.core.functions import PlatformFunctionsManager + + +class AQLFunctionsManager(PlatformFunctionsManager): + ... diff --git a/uncoder-core/app/translator/platforms/base/aql/mapping.py b/uncoder-core/app/translator/platforms/base/aql/mapping.py index a9aa1668..c0fb4b2f 100644 --- a/uncoder-core/app/translator/platforms/base/aql/mapping.py +++ b/uncoder-core/app/translator/platforms/base/aql/mapping.py @@ -28,11 +28,12 @@ def is_suitable( device_type_match = set(devicetype).issubset(self.device_types) if devicetype else None category_match = set(category).issubset(self.categories) if category else None qid_match = set(qid).issubset(self.qids) if qid else None - qid_event_category_match = set(qideventcategory).issubset(self.qid_event_categories) if qideventcategory else None + qid_event_category_match = ( + set(qideventcategory).issubset(self.qid_event_categories) if qideventcategory else None + ) return all( - condition for condition in ( - device_type_match, category_match, - qid_match, qid_event_category_match) + condition + for condition in (device_type_match, category_match, qid_match, qid_event_category_match) if condition is not None ) @@ -46,6 +47,9 @@ def extra_condition(self) -> str: class AQLMappings(BasePlatformMappings): + skip_load_default_mappings: bool = False + extend_default_mapping_with_all_fields: bool = True + def prepare_log_source_signature(self, mapping: dict) -> AQLLogSourceSignature: log_source = mapping.get("log_source", {}) default_log_source = mapping["default_log_source"] @@ -71,7 +75,7 @@ def get_suitable_source_mappings( continue log_source_signature: AQLLogSourceSignature = source_mapping.log_source_signature - if log_source_signature.is_suitable(devicetype, category, qid, qideventcategory): + if log_source_signature.is_suitable(devicetype, category, qid, qideventcategory): # noqa: SIM102 if source_mapping.fields_mapping.is_suitable(field_names): suitable_source_mappings.append(source_mapping) diff --git a/uncoder-core/app/translator/platforms/base/aql/parsers/aql.py b/uncoder-core/app/translator/platforms/base/aql/parsers/aql.py index 4cf4cb27..f911ea27 100644 --- a/uncoder-core/app/translator/platforms/base/aql/parsers/aql.py +++ b/uncoder-core/app/translator/platforms/base/aql/parsers/aql.py @@ -19,18 +19,22 @@ import re from typing import Union +from app.translator.core.exceptions.parser import TokenizerGeneralException +from app.translator.core.models.functions.base import ParsedFunctions from app.translator.core.models.query_container import RawQueryContainer, TokenizedQueryContainer from app.translator.core.parser import PlatformQueryParser -from app.translator.platforms.base.aql.const import NUM_VALUE_PATTERN, SINGLE_QUOTES_VALUE_PATTERN +from app.translator.platforms.base.aql.const import NUM_VALUE_PATTERN, SINGLE_QUOTES_VALUE_PATTERN, TABLE_GROUP_PATTERN +from app.translator.platforms.base.aql.functions import AQLFunctions, aql_functions from app.translator.platforms.base.aql.log_source_map import LOG_SOURCE_FUNCTIONS_MAP from app.translator.platforms.base.aql.mapping import AQLMappings, aql_mappings -from app.translator.platforms.base.aql.tokenizer import AQLTokenizer +from app.translator.platforms.base.aql.tokenizer import AQLTokenizer, aql_tokenizer from app.translator.tools.utils import get_match_group class AQLQueryParser(PlatformQueryParser): - tokenizer = AQLTokenizer() + tokenizer: AQLTokenizer = aql_tokenizer mappings: AQLMappings = aql_mappings + platform_functions: AQLFunctions = aql_functions log_source_functions = ("LOGSOURCENAME", "LOGSOURCEGROUPNAME") log_source_function_pattern = r"\(?(?P___func_name___\([a-zA-Z]+\))(?:\s+like\s+|\s+ilike\s+|\s*=\s*)'(?P[%a-zA-Z\s]+)'\s*\)?\s+(?:and|or)?\s" # noqa: E501 @@ -46,8 +50,6 @@ class AQLQueryParser(PlatformQueryParser): rf"""___source_type___\s+in\s+\((?P(?:{str_value_pattern}(?:\s*,\s*)?)+)\)(?:\s+(?:and|or)\s+|\s+)?""" ) - table_pattern = r"\sFROM\s(?P
[a-zA-Z\.\-\*]+)\sWHERE\s" - def __clean_query(self, query: str) -> str: for func_name in self.log_source_functions: pattern = self.log_source_function_pattern.replace("___func_name___", func_name) @@ -59,27 +61,27 @@ def __clean_query(self, query: str) -> str: return query @staticmethod - def __parse_multi_value_log_source( - match: re.Match, query: str, pattern: str - ) -> tuple[str, Union[list[str], list[int]]]: + def __parse_multi_value_log_source(match: re.Match, query: str, pattern: str) -> tuple[str, list[str]]: value = match.group("value") pos_start = match.start() pos_end = match.end() query = query[:pos_start] + query[pos_end:] return query, re.findall(pattern, value) - def __map_log_source_value(self, logsource_key: str, value: Union[str, int]) -> tuple[str, Union[int, str]]: + @staticmethod + def __map_log_source_value(logsource_key: str, value: Union[str, int]) -> tuple[str, Union[int, str]]: if log_source_map := LOG_SOURCE_FUNCTIONS_MAP.get(logsource_key): return log_source_map.name, log_source_map.id_map.get(value, value) return logsource_key, value + @staticmethod + def __check_table(query: str) -> None: + if not re.search(TABLE_GROUP_PATTERN, query, flags=re.IGNORECASE): + raise TokenizerGeneralException + def __parse_log_sources(self, query: str) -> tuple[dict[str, Union[list[str], list[int]]], str]: log_sources = {} - if search := re.search(self.table_pattern, query, flags=re.IGNORECASE): - pos_end = search.end() - query = query[pos_end:] - for log_source_key in self.log_source_key_types: pattern = self.log_source_pattern.replace("___source_type___", log_source_key) while search := re.search(pattern, query, flags=re.IGNORECASE): @@ -105,16 +107,19 @@ def __parse_log_sources(self, query: str) -> tuple[dict[str, Union[list[str], li return log_sources, query - def _parse_query(self, text: str) -> tuple[str, dict[str, Union[list[str], list[int]]]]: + def _parse_query(self, text: str) -> tuple[str, dict[str, Union[list[str], list[int]]], ParsedFunctions]: query = self.__clean_query(text) + self.__check_table(query) + query, functions = self.platform_functions.parse(query) log_sources, query = self.__parse_log_sources(query) - return query, log_sources + return query, log_sources, functions def parse(self, raw_query_container: RawQueryContainer) -> TokenizedQueryContainer: - query, log_sources = self._parse_query(raw_query_container.query) + query, log_sources, functions = self._parse_query(raw_query_container.query) tokens, source_mappings = self.get_tokens_and_source_mappings(query, log_sources) fields_tokens = self.get_fields_tokens(tokens=tokens) + self.set_functions_fields_generic_names(functions=functions, source_mappings=source_mappings) meta_info = raw_query_container.meta_info meta_info.query_fields = fields_tokens meta_info.source_mapping_ids = [source_mapping.source_id for source_mapping in source_mappings] - return TokenizedQueryContainer(tokens=tokens, meta_info=meta_info) + return TokenizedQueryContainer(tokens=tokens, meta_info=meta_info, functions=functions) diff --git a/uncoder-core/app/translator/platforms/base/aql/renders/aql.py b/uncoder-core/app/translator/platforms/base/aql/renders/aql.py index ba4caa10..6792d900 100644 --- a/uncoder-core/app/translator/platforms/base/aql/renders/aql.py +++ b/uncoder-core/app/translator/platforms/base/aql/renders/aql.py @@ -21,86 +21,102 @@ from app.translator.const import DEFAULT_VALUE_TYPE from app.translator.core.custom_types.values import ValueType from app.translator.core.render import BaseQueryFieldValue, PlatformQueryRender -from app.translator.platforms.base.aql.escape_manager import aql_escape_manager +from app.translator.core.str_value_manager import StrValue from app.translator.platforms.base.aql.mapping import AQLLogSourceSignature, AQLMappings, aql_mappings +from app.translator.platforms.base.aql.str_value_manager import aql_str_value_manager class AQLFieldValue(BaseQueryFieldValue): - escape_manager = aql_escape_manager + str_value_manager = aql_str_value_manager - def apply_value(self, value: Union[str, int], value_type: str = ValueType.value) -> Union[str, int]: # noqa: ARG002 - if isinstance(value, str): - value = value.replace("_", "__").replace("%", "%%").replace("\\'", "%").replace("'", '"') - if value.endswith("\\\\%"): - value = value.replace("\\\\%", "\\%") - return value - - def _apply_value(self, value: Union[str, int]) -> Union[str, int]: - if isinstance(value, str) and "\\" in value: - return value - return self.apply_value(value) + @staticmethod + def _wrap_str_value(value: str) -> str: + return f"'{value}'" def equal_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: if isinstance(value, list): return f"({self.or_token.join([self.equal_modifier(field=field, value=v) for v in value])})" - if field == "UTF8(payload)": - return f"UTF8(payload) ILIKE '{self.apply_value(value)}'" - if isinstance(value, int): - return f'"{field}"={value}' - - return f"\"{field}\"='{self._apply_value(value)}'" + if isinstance(value, StrValue) and value.has_spec_symbols: + return self.__render_i_like(field, value) + return f'"{field}"={self._pre_process_value(field, value, value_type=ValueType.value, wrap_str=True)}' - def less_modifier(self, field: str, value: Union[int, str]) -> str: - if isinstance(value, int): - return f'"{field}"<{value}' - return f"\"{field}\"<'{self._apply_value(value)}'" + def less_modifier(self, field: str, value: Union[int, str, StrValue]) -> str: + return f'"{field}"<{self._pre_process_value(field, value, value_type=ValueType.value, wrap_str=True)}' - def less_or_equal_modifier(self, field: str, value: Union[int, str]) -> str: - if isinstance(value, int): - return f'"{field}"<={value}' - return f"\"{field}\"<='{self._apply_value(value)}'" + def less_or_equal_modifier(self, field: str, value: Union[int, str, StrValue]) -> str: + return f'"{field}"<={self._pre_process_value(field, value, value_type=ValueType.value, wrap_str=True)}' - def greater_modifier(self, field: str, value: Union[int, str]) -> str: - if isinstance(value, int): - return f'"{field}">{value}' - return f"\"{field}\">'{self._apply_value(value)}'" + def greater_modifier(self, field: str, value: Union[int, str, StrValue]) -> str: + return f'"{field}">{self._pre_process_value(field, value, value_type=ValueType.value, wrap_str=True)}' - def greater_or_equal_modifier(self, field: str, value: Union[int, str]) -> str: - if isinstance(value, int): - return f'"{field}">={value}' - return f"\"{field}\">='{self._apply_value(value)}'" + def greater_or_equal_modifier(self, field: str, value: Union[int, str, StrValue]) -> str: + return f'"{field}">={self._pre_process_value(field, value, value_type=ValueType.value, wrap_str=True)}' def not_equal_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: if isinstance(value, list): return f"({self.or_token.join([self.not_equal_modifier(field=field, value=v) for v in value])})" - if isinstance(value, int): - return f'"{field}"!={value}' - return f"\"{field}\"!='{self._apply_value(value)}'" + if isinstance(value, StrValue) and value.has_spec_symbols: + return self.__render_i_like(field, value, not_=True) + return f'"{field}"!={self._pre_process_value(field, value, value_type=ValueType.value, wrap_str=True)}' def contains_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: if isinstance(value, list): return f"({self.or_token.join(self.contains_modifier(field=field, value=v) for v in value)})" - return f"\"{field}\" ILIKE '%{self._apply_value(value)}%'" + return self.__render_i_like(field, value, startswith=True, endswith=True) def endswith_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: if isinstance(value, list): return f"({self.or_token.join(self.endswith_modifier(field=field, value=v) for v in value)})" - return f"\"{field}\" ILIKE '%{self._apply_value(value)}'" + return self.__render_i_like(field, value, endswith=True) def startswith_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: if isinstance(value, list): return f"({self.or_token.join(self.startswith_modifier(field=field, value=v) for v in value)})" - return f"\"{field}\" ILIKE '{self._apply_value(value)}%'" + return self.__render_i_like(field, value, startswith=True) + + def __render_i_like( + self, + field: str, + value: DEFAULT_VALUE_TYPE, + startswith: bool = False, + endswith: bool = False, + not_: bool = False, + ) -> str: + prefix = "%" if endswith else "" + re_prefix = ".*" if endswith else "" + suffix = "%" if startswith else "" + re_suffix = ".*" if startswith else "" + if self.__has_special_symbols(value): + re_value = self._pre_process_value(field, value, value_type=ValueType.regex_value) + return self.__regex_modifier(field, f"{re_prefix}{re_value}{re_suffix}") + + value = self._pre_process_value(field, value, value_type=ValueType.value) + not_ = "NOT " if not_ else "" + return f"\"{field}\" {not_}ILIKE '{prefix}{value}{suffix}'" + + @staticmethod + def __has_special_symbols(value: DEFAULT_VALUE_TYPE) -> bool: + if any(char for char in str(value) if char in ("%", "_")): + return True + + return False + + @staticmethod + def __regex_modifier(field: str, value: str) -> str: + return f"\"{field}\" IMATCHES '{value}'" def regex_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: if isinstance(value, list): return f"({self.or_token.join(self.regex_modifier(field=field, value=v) for v in value)})" - return f"\"{field}\" IMATCHES '{value}'" + + if isinstance(value, StrValue): + value = self.str_value_manager.from_container_to_str(value, value_type=ValueType.regex_value) + return self.__regex_modifier(field, value) def keywords(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: if isinstance(value, list): return f"({self.or_token.join(self.keywords(field=field, value=v) for v in value)})" - return f"UTF8(payload) ILIKE '%{self.apply_value(value)}%'" + return self.__render_i_like("UTF8(payload)", value, startswith=True, endswith=True) class AQLQueryRender(PlatformQueryRender): @@ -113,7 +129,7 @@ class AQLQueryRender(PlatformQueryRender): field_value_map = AQLFieldValue(or_token=or_token) query_pattern = "{prefix} AND {query} {functions}" - def generate_prefix(self, log_source_signature: AQLLogSourceSignature) -> str: + def generate_prefix(self, log_source_signature: AQLLogSourceSignature, functions_prefix: str = "") -> str: # noqa: ARG002 table = str(log_source_signature) extra_condition = log_source_signature.extra_condition return f"SELECT UTF8(payload) FROM {table} WHERE {extra_condition}" diff --git a/uncoder-core/app/translator/platforms/base/aql/str_value_manager.py b/uncoder-core/app/translator/platforms/base/aql/str_value_manager.py new file mode 100644 index 00000000..a5f0abdf --- /dev/null +++ b/uncoder-core/app/translator/platforms/base/aql/str_value_manager.py @@ -0,0 +1,128 @@ +""" +Uncoder IO Community Edition License +----------------------------------------------------------------- +Copyright (c) 2024 SOC Prime, Inc. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +----------------------------------------------------------------- +""" +import copy +from typing import ClassVar + +from app.translator.core.custom_types.values import ValueType +from app.translator.core.str_value_manager import ( + CONTAINER_SPEC_SYMBOLS_MAP, + BaseSpecSymbol, + ReAnySymbol, + ReCaretSymbol, + ReCommaSymbol, + ReDigitalSymbol, + ReEndOfStrSymbol, + ReHyphenSymbol, + ReLeftCurlyBracket, + ReLeftParenthesis, + ReLeftSquareBracket, + ReOneOrMoreQuantifier, + ReOrOperator, + ReRightCurlyBracket, + ReRightParenthesis, + ReRightSquareBracket, + ReWhiteSpaceSymbol, + ReWordSymbol, + ReZeroOrMoreQuantifier, + ReZeroOrOneQuantifier, + SingleSymbolWildCard, + StrValue, + StrValueManager, + UnboundLenWildCard, +) +from app.translator.platforms.base.aql.escape_manager import aql_escape_manager + +RE_STR_SPEC_SYMBOLS_MAP = { + "?": ReZeroOrOneQuantifier, + "*": ReZeroOrMoreQuantifier, + "+": ReOneOrMoreQuantifier, + "^": ReCaretSymbol, + "$": ReEndOfStrSymbol, + ".": ReAnySymbol, + "[": ReLeftSquareBracket, + "]": ReRightSquareBracket, + "(": ReLeftParenthesis, + ")": ReRightParenthesis, + "{": ReLeftCurlyBracket, + "}": ReRightCurlyBracket, + "|": ReOrOperator, + ",": ReCommaSymbol, + "-": ReHyphenSymbol, +} +AQL_CONTAINER_SPEC_SYMBOLS_MAP = copy.copy(CONTAINER_SPEC_SYMBOLS_MAP) +AQL_CONTAINER_SPEC_SYMBOLS_MAP.update({SingleSymbolWildCard: "_", UnboundLenWildCard: "%"}) + + +class AQLStrValueManager(StrValueManager): + escape_manager = aql_escape_manager + container_spec_symbols_map: ClassVar[dict[type[BaseSpecSymbol], str]] = AQL_CONTAINER_SPEC_SYMBOLS_MAP + re_str_alpha_num_symbols_map: ClassVar[dict[str, type[BaseSpecSymbol]]] = { + "w": ReWordSymbol, + "d": ReDigitalSymbol, + "s": ReWhiteSpaceSymbol, + } + re_str_spec_symbols_map = RE_STR_SPEC_SYMBOLS_MAP + str_spec_symbols_map: ClassVar[dict[str, type[BaseSpecSymbol]]] = { + "_": SingleSymbolWildCard, + "%": UnboundLenWildCard, + } + + def from_str_to_container(self, value: str) -> StrValue: + split = [] + prev_char = None + for char in value: + if char in self.str_spec_symbols_map: + split.append(self.str_spec_symbols_map[char]()) + else: + if char == "'": + if prev_char == "'": + split.append(char) + prev_char = char + continue + split.append(char) + + prev_char = char + + return StrValue(value, self._concat(split)) + + def from_re_str_to_container(self, value: str) -> StrValue: + value = value.replace("''", "'") + return super().from_re_str_to_container(value) + + def from_container_to_str(self, container: StrValue, value_type: str = ValueType.value) -> str: + result = "" + for el in container.split_value: + if isinstance(el, str): + result += self.escape_manager.escape(el, value_type) + elif isinstance(el, BaseSpecSymbol): + if value_type == ValueType.regex_value: + if isinstance(el, SingleSymbolWildCard): + result += "." + continue + if isinstance(el, UnboundLenWildCard): + result += ".*" + continue + + if pattern := self.container_spec_symbols_map.get(type(el)): + result += pattern + + return result + + +aql_str_value_manager = AQLStrValueManager() diff --git a/uncoder-core/app/translator/platforms/base/aql/tokenizer.py b/uncoder-core/app/translator/platforms/base/aql/tokenizer.py index 39e46b5d..d2bfdfb7 100644 --- a/uncoder-core/app/translator/platforms/base/aql/tokenizer.py +++ b/uncoder-core/app/translator/platforms/base/aql/tokenizer.py @@ -16,15 +16,16 @@ ----------------------------------------------------------------- """ import re -from typing import Any, ClassVar, Union +from typing import ClassVar, Optional, Union from app.translator.core.custom_types.tokens import OperatorType from app.translator.core.custom_types.values import ValueType from app.translator.core.models.field import FieldValue, Keyword from app.translator.core.models.identifier import Identifier +from app.translator.core.str_value_manager import StrValue from app.translator.core.tokenizer import QueryTokenizer from app.translator.platforms.base.aql.const import NUM_VALUE_PATTERN, SINGLE_QUOTES_VALUE_PATTERN, UTF8_PAYLOAD_PATTERN -from app.translator.platforms.base.aql.escape_manager import aql_escape_manager +from app.translator.platforms.base.aql.str_value_manager import aql_str_value_manager from app.translator.tools.utils import get_match_group @@ -48,25 +49,33 @@ class AQLTokenizer(QueryTokenizer): _value_pattern = rf"{NUM_VALUE_PATTERN}|{bool_value_pattern}|{SINGLE_QUOTES_VALUE_PATTERN}" multi_value_pattern = rf"""\((?P<{ValueType.multi_value}>[:a-zA-Z\"\*0-9=+%#\-_\/\\'\,.&^@!\(\s]*)\)""" keyword_pattern = rf"{UTF8_PAYLOAD_PATTERN}\s+(?:like|LIKE|ilike|ILIKE)\s+{SINGLE_QUOTES_VALUE_PATTERN}" - escape_manager = aql_escape_manager wildcard_symbol = "%" + str_value_manager = aql_str_value_manager @staticmethod - def should_process_value_wildcards(operator: str) -> bool: - return operator.lower() in ("like", "ilike") + def should_process_value_wildcards(operator: Optional[str]) -> bool: + return operator and operator.lower() in ("like", "ilike") - def get_operator_and_value(self, match: re.Match, operator: str = OperatorType.EQ) -> tuple[str, Any]: + def get_operator_and_value( + self, match: re.Match, mapped_operator: str = OperatorType.EQ, operator: Optional[str] = None + ) -> tuple[str, StrValue]: if (num_value := get_match_group(match, group_name=ValueType.number_value)) is not None: - return operator, num_value + return mapped_operator, StrValue(num_value, split_value=[num_value]) if (bool_value := get_match_group(match, group_name=ValueType.bool_value)) is not None: - return operator, self.escape_manager.remove_escape(bool_value) + return mapped_operator, StrValue(bool_value, split_value=[bool_value]) if (s_q_value := get_match_group(match, group_name=ValueType.single_quotes_value)) is not None: - return operator, self.escape_manager.remove_escape(s_q_value) + if mapped_operator == OperatorType.REGEX: + return mapped_operator, self.str_value_manager.from_re_str_to_container(s_q_value) - return super().get_operator_and_value(match, operator) + if self.should_process_value_wildcards(operator): + return mapped_operator, self.str_value_manager.from_str_to_container(s_q_value) + + return mapped_operator, self.str_value_manager.from_str_to_container(s_q_value) + + return super().get_operator_and_value(match, mapped_operator, operator) def escape_field_name(self, field_name: str) -> str: return field_name.replace('"', r"\"").replace(" ", r"\ ") @@ -82,3 +91,6 @@ def search_keyword(self, query: str) -> tuple[Keyword, str]: keyword = Keyword(value=value.strip(self.wildcard_symbol)) pos = keyword_search.end() return keyword, query[pos:] + + +aql_tokenizer = AQLTokenizer() diff --git a/uncoder-core/app/translator/platforms/base/lucene/renders/lucene.py b/uncoder-core/app/translator/platforms/base/lucene/renders/lucene.py index 618a66e6..70760930 100644 --- a/uncoder-core/app/translator/platforms/base/lucene/renders/lucene.py +++ b/uncoder-core/app/translator/platforms/base/lucene/renders/lucene.py @@ -16,7 +16,7 @@ limitations under the License. ----------------------------------------------------------------- """ -from typing import Union +from typing import Optional, Union from app.translator.const import DEFAULT_VALUE_TYPE from app.translator.core.custom_types.values import ValueType @@ -30,37 +30,13 @@ class LuceneFieldValue(BaseQueryFieldValue): str_value_manager = lucene_str_value_manager @staticmethod - def __get_value_type(field_name: str, value_type: str = ValueType.value) -> str: + def _get_value_type(field_name: str, value: Union[int, str, StrValue], value_type: Optional[str] = None) -> str: # noqa: ARG004 is_ip_field = field_name and (field_name.endswith(".ip") or field_name.endswith(".address")) if is_ip_field and value_type != ValueType.regex_value: return ValueType.ip return ValueType.value - def _pre_process_values_list( - self, field: str, values: list[Union[int, str, StrValue]], value_type: str = ValueType.value - ) -> list[str]: - value_type = self.__get_value_type(field, value_type) - processed = [] - for val in values: - if isinstance(val, StrValue): - processed.append(self.str_value_manager.from_container_to_str(val, value_type)) - elif isinstance(val, str): - processed.append(self.str_value_manager.escape_manager.escape(val, value_type)) - else: - processed.append(str(val)) - return processed - - def _pre_process_value( - self, field: str, value: Union[int, str, StrValue], value_type: str = ValueType.value - ) -> Union[int, str]: - value_type = self.__get_value_type(field, value_type) - if isinstance(value, StrValue): - return self.str_value_manager.from_container_to_str(value, value_type) - if isinstance(value, str): - return self.str_value_manager.escape_manager.escape(value, value_type) - return value - def equal_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: if isinstance(value, list): values = self.or_token.join(self._pre_process_values_list(field, value)) @@ -135,5 +111,5 @@ class LuceneQueryRender(PlatformQueryRender): comment_symbol = "//" is_single_line_comment = True - def generate_prefix(self, log_source_signature: LuceneLogSourceSignature) -> str: # noqa: ARG002 + def generate_prefix(self, log_source_signature: LuceneLogSourceSignature, functions_prefix: str = "") -> str: # noqa: ARG002 return "" diff --git a/uncoder-core/app/translator/platforms/base/lucene/tokenizer.py b/uncoder-core/app/translator/platforms/base/lucene/tokenizer.py index 096e04bc..45fed5e4 100644 --- a/uncoder-core/app/translator/platforms/base/lucene/tokenizer.py +++ b/uncoder-core/app/translator/platforms/base/lucene/tokenizer.py @@ -16,7 +16,7 @@ ----------------------------------------------------------------- """ import re -from typing import ClassVar, Union +from typing import ClassVar, Optional, Union from app.translator.core.custom_types.tokens import OperatorType from app.translator.core.custom_types.values import ValueType @@ -76,26 +76,28 @@ def create_field_value(field_name: str, operator: Identifier, value: Union[str, def clean_multi_value(value: str) -> str: return value.strip('"') if value.startswith('"') and value.endswith('"') else value - def get_operator_and_value(self, match: re.Match, operator: str = OperatorType.EQ) -> tuple[str, StrValue]: # noqa: PLR0911 + def get_operator_and_value( # noqa: PLR0911 + self, match: re.Match, mapped_operator: str = OperatorType.EQ, operator: Optional[str] = None + ) -> tuple[str, StrValue]: if (num_value := get_match_group(match, group_name=ValueType.number_value)) is not None: - return operator, StrValue(num_value) + return mapped_operator, StrValue(num_value, split_value=[num_value]) if (re_value := get_match_group(match, group_name=ValueType.regex_value)) is not None: return OperatorType.REGEX, lucene_str_value_manager.from_re_str_to_container(re_value) if (n_q_value := get_match_group(match, group_name=ValueType.no_quotes_value)) is not None: - return operator, lucene_str_value_manager.from_str_to_container(n_q_value) + return mapped_operator, lucene_str_value_manager.from_str_to_container(n_q_value) if (d_q_value := get_match_group(match, group_name=ValueType.double_quotes_value)) is not None: - return operator, lucene_str_value_manager.from_str_to_container(d_q_value) + return mapped_operator, lucene_str_value_manager.from_str_to_container(d_q_value) if (gte_value := get_match_group(match, group_name=ValueType.greater_than_or_equal)) is not None: - return OperatorType.GTE, StrValue(gte_value) + return OperatorType.GTE, StrValue(gte_value, split_value=[gte_value]) if (lte_value := get_match_group(match, group_name=ValueType.less_than_or_equal)) is not None: - return OperatorType.LTE, StrValue(lte_value) + return OperatorType.LTE, StrValue(lte_value, split_value=[lte_value]) - return super().get_operator_and_value(match, operator) + return super().get_operator_and_value(match, mapped_operator, operator) def group_values_by_operator(self, values: list[str], operator: str) -> dict[str, list[StrValue]]: mapped_operator = self.map_operator(operator) diff --git a/uncoder-core/app/translator/platforms/base/spl/tokenizer.py b/uncoder-core/app/translator/platforms/base/spl/tokenizer.py index fcb92227..8a030519 100644 --- a/uncoder-core/app/translator/platforms/base/spl/tokenizer.py +++ b/uncoder-core/app/translator/platforms/base/spl/tokenizer.py @@ -17,7 +17,7 @@ """ import re -from typing import Any, ClassVar, Union +from typing import Any, ClassVar, Optional, Union from app.translator.core.custom_types.tokens import OperatorType from app.translator.core.custom_types.values import ValueType @@ -60,20 +60,22 @@ class SplTokenizer(QueryTokenizer, ANDLogicOperatorMixin): escape_manager = spl_escape_manager - def get_operator_and_value(self, match: re.Match, operator: str = OperatorType.EQ) -> tuple[str, Any]: + def get_operator_and_value( + self, match: re.Match, mapped_operator: str = OperatorType.EQ, operator: Optional[str] = None + ) -> tuple[str, Any]: if (num_value := get_match_group(match, group_name=ValueType.number_value)) is not None: - return operator, num_value + return mapped_operator, num_value if (no_q_value := get_match_group(match, group_name=ValueType.no_quotes_value)) is not None: - return operator, no_q_value + return mapped_operator, no_q_value if (d_q_value := get_match_group(match, group_name=ValueType.double_quotes_value)) is not None: - return operator, self.escape_manager.remove_escape(d_q_value) + return mapped_operator, self.escape_manager.remove_escape(d_q_value) if (s_q_value := get_match_group(match, group_name=ValueType.single_quotes_value)) is not None: - return operator, self.escape_manager.remove_escape(s_q_value) + return mapped_operator, self.escape_manager.remove_escape(s_q_value) - return super().get_operator_and_value(match) + return super().get_operator_and_value(match, mapped_operator, operator) def tokenize(self, query: str) -> list[Union[FieldValue, Keyword, Identifier]]: tokens = super().tokenize(query=query) diff --git a/uncoder-core/app/translator/platforms/base/sql/renders/sql.py b/uncoder-core/app/translator/platforms/base/sql/renders/sql.py index 2b8ab030..ebcb21af 100644 --- a/uncoder-core/app/translator/platforms/base/sql/renders/sql.py +++ b/uncoder-core/app/translator/platforms/base/sql/renders/sql.py @@ -80,6 +80,6 @@ class SqlQueryRender(PlatformQueryRender): comment_symbol = "--" is_single_line_comment = True - def generate_prefix(self, log_source_signature: LogSourceSignature) -> str: + def generate_prefix(self, log_source_signature: LogSourceSignature, functions_prefix: str = "") -> str: # noqa: ARG002 table = str(log_source_signature) if str(log_source_signature) else "eventlog" return f"SELECT * FROM {table}" diff --git a/uncoder-core/app/translator/platforms/base/sql/tokenizer.py b/uncoder-core/app/translator/platforms/base/sql/tokenizer.py index 3880c157..944d3c9b 100644 --- a/uncoder-core/app/translator/platforms/base/sql/tokenizer.py +++ b/uncoder-core/app/translator/platforms/base/sql/tokenizer.py @@ -17,7 +17,7 @@ """ import re -from typing import Any, ClassVar, Union +from typing import Any, ClassVar, Optional, Union from app.translator.core.custom_types.tokens import OperatorType from app.translator.core.custom_types.values import ValueType @@ -52,20 +52,22 @@ class SqlTokenizer(QueryTokenizer): wildcard_symbol = "%" @staticmethod - def should_process_value_wildcards(operator: str) -> bool: - return operator.lower() in ("like",) + def should_process_value_wildcards(operator: Optional[str]) -> bool: + return operator and operator.lower() in ("like",) - def get_operator_and_value(self, match: re.Match, operator: str = OperatorType.EQ) -> tuple[str, Any]: + def get_operator_and_value( + self, match: re.Match, mapped_operator: str = OperatorType.EQ, operator: Optional[str] = None + ) -> tuple[str, Any]: if (num_value := get_match_group(match, group_name=ValueType.number_value)) is not None: - return operator, num_value + return mapped_operator, num_value if (bool_value := get_match_group(match, group_name=ValueType.bool_value)) is not None: - return operator, bool_value + return mapped_operator, bool_value if (s_q_value := get_match_group(match, group_name=ValueType.single_quotes_value)) is not None: - return operator, s_q_value + return mapped_operator, s_q_value - return super().get_operator_and_value(match, operator) + return super().get_operator_and_value(match, mapped_operator, operator) @staticmethod def create_field_value(field_name: str, operator: Identifier, value: Union[str, list]) -> FieldValue: diff --git a/uncoder-core/app/translator/platforms/chronicle/tokenizer.py b/uncoder-core/app/translator/platforms/chronicle/tokenizer.py index 94a19673..5278da4a 100644 --- a/uncoder-core/app/translator/platforms/chronicle/tokenizer.py +++ b/uncoder-core/app/translator/platforms/chronicle/tokenizer.py @@ -17,7 +17,7 @@ """ import re -from typing import Any, ClassVar +from typing import Any, ClassVar, Optional from app.translator.core.custom_types.tokens import OperatorType from app.translator.core.custom_types.values import ValueType @@ -50,20 +50,22 @@ class ChronicleQueryTokenizer(QueryTokenizer): wildcard_symbol = ".*" - def get_operator_and_value(self, match: re.Match, operator: str = OperatorType.EQ) -> tuple[str, Any]: + def get_operator_and_value( + self, match: re.Match, mapped_operator: str = OperatorType.EQ, operator: Optional[str] = None + ) -> tuple[str, Any]: if (num_value := get_match_group(match, group_name=ValueType.number_value)) is not None: - return operator, num_value + return mapped_operator, num_value if (bool_value := get_match_group(match, group_name=ValueType.bool_value)) is not None: - return operator, bool_value + return mapped_operator, bool_value if (d_q_value := get_match_group(match, group_name=ValueType.double_quotes_value)) is not None: - return operator, self.escape_manager.remove_escape(d_q_value) + return mapped_operator, self.escape_manager.remove_escape(d_q_value) if (re_value := get_match_group(match, group_name=ValueType.regex_value)) is not None: return OperatorType.REGEX, re_value - return super().get_operator_and_value(match, operator) + return super().get_operator_and_value(match, mapped_operator, operator) def escape_field_name(self, field_name: str) -> str: symbols_to_check = [".", "_", "$"] @@ -88,7 +90,7 @@ class ChronicleRuleTokenizer(ChronicleQueryTokenizer): def search_field_value(self, query: str) -> tuple[FieldValue, str]: if regex_field_value_search := re.match(self.regex_field_value_pattern, query): field = regex_field_value_search.group("field") - operator, value = self.get_operator_and_value(regex_field_value_search, operator=OperatorType.REGEX) + operator, value = self.get_operator_and_value(regex_field_value_search, mapped_operator=OperatorType.REGEX) operator, value = self.process_value_wildcards(value=value, operator=OperatorType.REGEX) pos = regex_field_value_search.end() query = query[pos:] @@ -99,14 +101,16 @@ def search_field_value(self, query: str) -> tuple[FieldValue, str]: return super().search_field_value(query=query) - def get_operator_and_value(self, match: re.Match, operator: str = OperatorType.EQ) -> tuple[str, Any]: + def get_operator_and_value( + self, match: re.Match, mapped_operator: str = OperatorType.EQ, operator: Optional[str] = None + ) -> tuple[str, Any]: if (d_q_value := get_match_group(match, group_name=ValueType.double_quotes_value)) is not None: - return operator, self.escape_manager.remove_escape(d_q_value) + return mapped_operator, self.escape_manager.remove_escape(d_q_value) if (b_q_value := get_match_group(match, group_name=ValueType.back_quotes_value)) is not None: - return operator, self.escape_manager.remove_escape(b_q_value) + return mapped_operator, self.escape_manager.remove_escape(b_q_value) - return super().get_operator_and_value(match, operator) + return super().get_operator_and_value(match, mapped_operator, operator) def _check_field_value_match(self, query: str, white_space_pattern: str = r"\s+") -> bool: if re.match(self.regex_field_value_pattern, query, re.IGNORECASE): diff --git a/uncoder-core/app/translator/platforms/logrhythm_axon/renders/logrhythm_axon_query.py b/uncoder-core/app/translator/platforms/logrhythm_axon/renders/logrhythm_axon_query.py index 4bfc4749..9be24b73 100644 --- a/uncoder-core/app/translator/platforms/logrhythm_axon/renders/logrhythm_axon_query.py +++ b/uncoder-core/app/translator/platforms/logrhythm_axon/renders/logrhythm_axon_query.py @@ -210,7 +210,7 @@ class LogRhythmAxonQueryRender(PlatformQueryRender): is_single_line_comment = True is_strict_mapping = True - def generate_prefix(self, log_source_signature: LogSourceSignature) -> str: + def generate_prefix(self, log_source_signature: LogSourceSignature, functions_prefix: str = "") -> str: # noqa: ARG002 return str(log_source_signature) def apply_token(self, token: Union[FieldValue, Keyword, Identifier], source_mapping: SourceMapping) -> str: @@ -238,12 +238,7 @@ def apply_token(self, token: Union[FieldValue, Keyword, Identifier], source_mapp field=mapped_fields[0], operator=token.operator, value=token.value ) - if isinstance(token, Keyword): - return self.field_value_map.apply_field_value(field=None, operator=token.operator, value=token.value) - if token.token_type in LogicalOperatorType: - return self.operator_map.get(token.token_type) - - return token.token_type + return super().apply_token(token, source_mapping) def _generate_from_tokenized_query_container(self, query_container: TokenizedQueryContainer) -> str: queries_map = {} diff --git a/uncoder-core/app/translator/platforms/logscale/tokenizer.py b/uncoder-core/app/translator/platforms/logscale/tokenizer.py index 2b886759..c765c8a9 100644 --- a/uncoder-core/app/translator/platforms/logscale/tokenizer.py +++ b/uncoder-core/app/translator/platforms/logscale/tokenizer.py @@ -17,7 +17,7 @@ """ import re -from typing import Any, ClassVar, Union +from typing import Any, ClassVar, Optional, Union from app.translator.core.custom_types.tokens import LogicalOperatorType, OperatorType from app.translator.core.custom_types.values import ValueType @@ -50,17 +50,19 @@ class LogScaleTokenizer(QueryTokenizer, ANDLogicOperatorMixin): escape_manager = logscale_escape_manager wildcard_symbol = "*" - def get_operator_and_value(self, match: re.Match, operator: str = OperatorType.EQ) -> tuple[str, Any]: + def get_operator_and_value( + self, match: re.Match, mapped_operator: str = OperatorType.EQ, operator: Optional[str] = None + ) -> tuple[str, Any]: if (num_value := get_match_group(match, group_name=ValueType.number_value)) is not None: - return operator, num_value + return mapped_operator, num_value if (d_q_value := get_match_group(match, group_name=ValueType.double_quotes_value)) is not None: - return operator, d_q_value + return mapped_operator, d_q_value if (re_value := get_match_group(match, group_name=ValueType.regex_value)) is not None: return OperatorType.REGEX, re_value - return super().get_operator_and_value(match, operator) + return super().get_operator_and_value(match, mapped_operator, operator) def _get_next_token(self, query: str) -> (list, str): query = query.strip("\n").strip(" ").strip("\n") diff --git a/uncoder-core/app/translator/platforms/microsoft/renders/microsoft_sentinel.py b/uncoder-core/app/translator/platforms/microsoft/renders/microsoft_sentinel.py index f86bdc0c..cb32443a 100644 --- a/uncoder-core/app/translator/platforms/microsoft/renders/microsoft_sentinel.py +++ b/uncoder-core/app/translator/platforms/microsoft/renders/microsoft_sentinel.py @@ -138,5 +138,5 @@ def __init__(self): super().__init__() self.platform_functions.manager.post_init_configure(self) - def generate_prefix(self, log_source_signature: LogSourceSignature) -> str: + def generate_prefix(self, log_source_signature: LogSourceSignature, functions_prefix: str = "") -> str: # noqa: ARG002 return str(log_source_signature) diff --git a/uncoder-core/app/translator/platforms/microsoft/tokenizer.py b/uncoder-core/app/translator/platforms/microsoft/tokenizer.py index 6d5ac603..85cf3316 100644 --- a/uncoder-core/app/translator/platforms/microsoft/tokenizer.py +++ b/uncoder-core/app/translator/platforms/microsoft/tokenizer.py @@ -17,7 +17,7 @@ """ import re -from typing import Any, ClassVar +from typing import Any, ClassVar, Optional from app.translator.core.custom_types.tokens import OperatorType from app.translator.core.mixins.operator import OperatorBasedMixin @@ -59,28 +59,30 @@ class MicrosoftSentinelTokenizer(QueryTokenizer, OperatorBasedMixin): escape_manager = microsoft_escape_manager - def get_operator_and_value(self, match: re.Match, operator: str = OperatorType.EQ) -> tuple[str, Any]: # noqa: PLR0911 + def get_operator_and_value( # noqa: PLR0911 + self, match: re.Match, mapped_operator: str = OperatorType.EQ, operator: Optional[str] = None + ) -> tuple[str, Any]: if (num_value := get_match_group(match, group_name=MicrosoftValueType.number_value)) is not None: - return operator, num_value + return mapped_operator, num_value if (bool_value := get_match_group(match, group_name=MicrosoftValueType.bool_value)) is not None: - return operator, bool_value + return mapped_operator, bool_value if (d_q_value := get_match_group(match, group_name=MicrosoftValueType.double_quotes_value)) is not None: - return operator, self.escape_manager.remove_escape(d_q_value) + return mapped_operator, self.escape_manager.remove_escape(d_q_value) if (s_q_value := get_match_group(match, group_name=MicrosoftValueType.single_quotes_value)) is not None: - return operator, self.escape_manager.remove_escape(s_q_value) + return mapped_operator, self.escape_manager.remove_escape(s_q_value) group_name = MicrosoftValueType.verbatim_double_quotes_value if (v_d_q_value := get_match_group(match, group_name=group_name)) is not None: - return operator, v_d_q_value + return mapped_operator, v_d_q_value group_name = MicrosoftValueType.verbatim_single_quotes_value if (v_s_q_value := get_match_group(match, group_name=group_name)) is not None: - return operator, v_s_q_value + return mapped_operator, v_s_q_value - return super().get_operator_and_value(match, operator) + return super().get_operator_and_value(match, mapped_operator, operator) def clean_multi_value(self, value: str) -> str: value = value.strip(" ") diff --git a/uncoder-core/app/translator/platforms/palo_alto/escape_manager.py b/uncoder-core/app/translator/platforms/palo_alto/escape_manager.py index cfc121dc..ecb8c68d 100644 --- a/uncoder-core/app/translator/platforms/palo_alto/escape_manager.py +++ b/uncoder-core/app/translator/platforms/palo_alto/escape_manager.py @@ -5,7 +5,7 @@ from app.translator.core.models.escape_details import EscapeDetails -class XQLEscapeManager(EscapeManager): +class CortexXQLEscapeManager(EscapeManager): escape_map: ClassVar[dict[str, list[EscapeDetails]]] = { ValueType.regex_value: [ EscapeDetails(pattern=r'([_!@#$%^&*=+()\[\]{}|;:\'",.<>?/`~\-\s\\])', escape_symbols=r"\\\1") @@ -14,4 +14,4 @@ class XQLEscapeManager(EscapeManager): } -cortex_xql_escape_manager = XQLEscapeManager() +cortex_xql_escape_manager = CortexXQLEscapeManager() diff --git a/uncoder-core/app/translator/platforms/palo_alto/functions/__init__.py b/uncoder-core/app/translator/platforms/palo_alto/functions/__init__.py new file mode 100644 index 00000000..d6d51115 --- /dev/null +++ b/uncoder-core/app/translator/platforms/palo_alto/functions/__init__.py @@ -0,0 +1,9 @@ +from app.translator.core.functions import PlatformFunctions +from app.translator.platforms.palo_alto.functions.manager import CortexXQLFunctionsManager + + +class CortexXQLFunctions(PlatformFunctions): + manager = CortexXQLFunctionsManager() + + +cortex_xql_functions = CortexXQLFunctions() diff --git a/uncoder-core/app/translator/platforms/palo_alto/functions/const.py b/uncoder-core/app/translator/platforms/palo_alto/functions/const.py new file mode 100644 index 00000000..e6a87a49 --- /dev/null +++ b/uncoder-core/app/translator/platforms/palo_alto/functions/const.py @@ -0,0 +1,44 @@ +from app.translator.tools.custom_enum import CustomEnum + + +class CortexXQLFunctionType(CustomEnum): + avg = "avg" + count = "count" + count_distinct = "count_distinct" + min = "min" + max = "max" + sum = "sum" + + divide = "divide" + + lower = "lowercase" + upper = "uppercase" + + incidr = "incidr" + + alter = "alter" + bin = "bin" + comp = "comp" + config = "config" + fields = "fields" + filter = "filter" + limit = "limit" + sort = "sort" + timeframe = "timeframe" + + +class XqlSortOrderType(CustomEnum): + asc = "asc" + desc = "desc" + + +class XqlTimeFrameType(CustomEnum): + days = "d" + hours = "h" + minutes = "m" + + +class XqlSpanType(CustomEnum): + days = "d" + hours = "h" + minutes = "m" diff --git a/uncoder-core/app/translator/platforms/palo_alto/functions/manager.py b/uncoder-core/app/translator/platforms/palo_alto/functions/manager.py new file mode 100644 index 00000000..c92500a7 --- /dev/null +++ b/uncoder-core/app/translator/platforms/palo_alto/functions/manager.py @@ -0,0 +1,14 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from app.translator.core.functions import PlatformFunctionsManager + +if TYPE_CHECKING: + from app.translator.platforms.palo_alto.renders.cortex_xsiam import CortexXQLQueryRender + + +class CortexXQLFunctionsManager(PlatformFunctionsManager): + + def post_init_configure(self, platform_render: CortexXQLQueryRender) -> None: + ... diff --git a/uncoder-core/app/translator/platforms/palo_alto/mapping.py b/uncoder-core/app/translator/platforms/palo_alto/mapping.py index c3a22fd3..fc6a7797 100644 --- a/uncoder-core/app/translator/platforms/palo_alto/mapping.py +++ b/uncoder-core/app/translator/platforms/palo_alto/mapping.py @@ -9,7 +9,7 @@ ) -class CortexXSIAMLogSourceSignature(LogSourceSignature): +class CortexXQLLogSourceSignature(LogSourceSignature): def __init__(self, preset: Optional[list[str]], dataset: Optional[list[str]], default_source: dict): self.preset = preset self.dataset = dataset @@ -18,13 +18,14 @@ def __init__(self, preset: Optional[list[str]], dataset: Optional[list[str]], de def is_suitable(self, preset: str, dataset: str) -> bool: return preset == self.preset or dataset == self.dataset - def __prepare_log_source_for_render(self, logsource: Union[str, list[str]], model: str = "datamodel") -> str: + @staticmethod + def __prepare_log_source_for_render(logsource: Union[str, list[str]], model: str = "datamodel") -> str: if isinstance(logsource, list): return f"{model} in ({', '.join(source for source in logsource)})" return f"{model} = {logsource}" @property - def __datamodel_scheme(self): + def __datamodel_scheme(self) -> str: if datamodel := self._default_source.get("datamodel"): return f"{datamodel} " return "" @@ -39,17 +40,17 @@ def __str__(self) -> str: return "datamodel" -class CortexXSIAMMappings(BasePlatformMappings): +class CortexXQLMappings(BasePlatformMappings): skip_load_default_mappings: bool = False def update_default_source_mapping(self, default_mapping: SourceMapping, fields_mapping: FieldsMapping) -> None: ... - def prepare_log_source_signature(self, mapping: dict) -> CortexXSIAMLogSourceSignature: + def prepare_log_source_signature(self, mapping: dict) -> CortexXQLLogSourceSignature: preset = mapping.get("log_source", {}).get("preset") dataset = mapping.get("log_source", {}).get("dataset") default_log_source = mapping["default_log_source"] - return CortexXSIAMLogSourceSignature(preset=preset, dataset=dataset, default_source=default_log_source) + return CortexXQLLogSourceSignature(preset=preset, dataset=dataset, default_source=default_log_source) def get_suitable_source_mappings( self, field_names: list[str], preset: Optional[str], dataset: Optional[str] @@ -59,7 +60,7 @@ def get_suitable_source_mappings( if source_mapping.source_id == DEFAULT_MAPPING_NAME: continue - log_source_signature: CortexXSIAMLogSourceSignature = source_mapping.log_source_signature + log_source_signature: CortexXQLLogSourceSignature = source_mapping.log_source_signature if (preset or dataset) and log_source_signature.is_suitable(preset=preset, dataset=dataset): if source_mapping.fields_mapping.is_suitable(field_names): suitable_source_mappings.append(source_mapping) @@ -72,4 +73,4 @@ def get_suitable_source_mappings( return suitable_source_mappings -cortex_xsiam_mappings = CortexXSIAMMappings(platform_dir="palo_alto_cortex") +cortex_xql_mappings = CortexXQLMappings(platform_dir="palo_alto_cortex") diff --git a/uncoder-core/app/translator/platforms/palo_alto/renders/cortex_xsiam.py b/uncoder-core/app/translator/platforms/palo_alto/renders/cortex_xsiam.py index f7ed6ae2..b0d1e0f7 100644 --- a/uncoder-core/app/translator/platforms/palo_alto/renders/cortex_xsiam.py +++ b/uncoder-core/app/translator/platforms/palo_alto/renders/cortex_xsiam.py @@ -16,74 +16,102 @@ limitations under the License. ----------------------------------------------------------------- """ -from typing import Union +from typing import Optional, Union from app.translator.const import DEFAULT_VALUE_TYPE from app.translator.core.custom_types.values import ValueType -from app.translator.core.exceptions.render import UnsupportedRenderMethod from app.translator.core.models.platform_details import PlatformDetails from app.translator.core.render import BaseQueryFieldValue, PlatformQueryRender +from app.translator.core.str_value_manager import StrValue from app.translator.managers import render_manager from app.translator.platforms.palo_alto.const import cortex_xql_query_details -from app.translator.platforms.palo_alto.escape_manager import cortex_xql_escape_manager +from app.translator.platforms.palo_alto.functions import CortexXQLFunctions, cortex_xql_functions from app.translator.platforms.palo_alto.mapping import ( - CortexXSIAMLogSourceSignature, - CortexXSIAMMappings, - cortex_xsiam_mappings, + CortexXQLLogSourceSignature, + CortexXQLMappings, + cortex_xql_mappings, ) +from app.translator.platforms.palo_alto.str_value_manager import cortex_xql_str_value_manager -class CortexXSIAMFieldValue(BaseQueryFieldValue): +class CortexXQLFieldValue(BaseQueryFieldValue): details: PlatformDetails = cortex_xql_query_details - escape_manager = cortex_xql_escape_manager + str_value_manager = cortex_xql_str_value_manager + + @staticmethod + def _get_value_type(field_name: str, value: Union[int, str, StrValue], value_type: Optional[str] = None) -> str: # noqa: ARG004 + if value_type: + return value_type + + if isinstance(value, StrValue) and value.has_spec_symbols: + return ValueType.regex_value + + return ValueType.value + + @staticmethod + def _wrap_str_value(value: str) -> str: + return f'"{value}"' def equal_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: if isinstance(value, list): - values = ", ".join(f'"{self.apply_value(v)}"' for v in value) + values = ", ".join( + f"{self._pre_process_value(field, v, value_type=ValueType.value, wrap_str=True)}" for v in value + ) return f"{field} in ({values})" - if isinstance(value, int): - return f"{field} = {value}" - return f'{field} = "{self.apply_value(value)}"' + + return f"{field} = {self._pre_process_value(field, value, value_type=ValueType.value, wrap_str=True)}" def less_modifier(self, field: str, value: Union[int, str]) -> str: - return f"{field} < {value}" + return f"{field} < {self._pre_process_value(field, value, value_type=ValueType.value, wrap_str=True)}" def less_or_equal_modifier(self, field: str, value: Union[int, str]) -> str: - return f"{field} <= {value}" + return f"{field} <= {self._pre_process_value(field, value, value_type=ValueType.value, wrap_str=True)}" def greater_modifier(self, field: str, value: Union[int, str]) -> str: - return f"{field} > {value}" + return f"{field} > {self._pre_process_value(field, value, value_type=ValueType.value, wrap_str=True)}" def greater_or_equal_modifier(self, field: str, value: Union[int, str]) -> str: - return f"{field} >= {value}" + return f"{field} >= {self._pre_process_value(field, value, value_type=ValueType.value, wrap_str=True)}" def not_equal_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: if isinstance(value, list): return f"({self.or_token.join([self.not_equal_modifier(field=field, value=v) for v in value])})" - return f'{field} != "{self.apply_value(value)}"' + return f"{field} != {self._pre_process_value(field, value, value_type=ValueType.value, wrap_str=True)}" def contains_modifier(self, field: str, value: Union[list, str]) -> str: if isinstance(value, list): return f"({self.or_token.join(self.contains_modifier(field=field, value=v) for v in value)})" if value.endswith("\\"): - return f'{field} ~= ".*{self.apply_value(value, value_type=ValueType.regex_value)}.*"' - return f'{field} contains "{self.apply_value(value)}"' + return f'{field} ~= ".*{self._pre_process_value(field, value, value_type=ValueType.regex_value)}.*"' + return f"{field} contains {self._pre_process_value(field, value, value_type=ValueType.value, wrap_str=True)}" + + def not_contains_modifier(self, field: str, value: Union[list, str]) -> str: + if isinstance(value, list): + return f"({self.or_token.join(self.contains_modifier(field=field, value=v) for v in value)})" + if value.endswith("\\"): + return f'{field} !~= ".*{self._pre_process_value(field, value, value_type=ValueType.regex_value)}.*"' + return f"{field} not contains {self._pre_process_value(field, value, ValueType.value, wrap_str=True)}" def endswith_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: if isinstance(value, list): return f"({self.or_token.join(self.endswith_modifier(field=field, value=v) for v in value)})" - return f'{field} ~= ".*{self.apply_value(value, value_type=ValueType.regex_value)}"' + return f'{field} ~= ".*{self._pre_process_value(field, value, value_type=ValueType.regex_value)}"' def startswith_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: if isinstance(value, list): clause = self.or_token.join(self.startswith_modifier(field=field, value=v) for v in value) return f"({clause})" - return f'{field} ~= "{self.apply_value(value, value_type=ValueType.regex_value)}.*"' + return f'{field} ~= "{self._pre_process_value(field, value, value_type=ValueType.regex_value)}.*"' def regex_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: if isinstance(value, list): return f"({self.or_token.join(self.regex_modifier(field=field, value=v) for v in value)})" - return f'{field} ~= "{self.apply_value(value, value_type=ValueType.regex_value)}"' + return f"{field} ~= {self._pre_process_value(field ,value, value_type=ValueType.regex_value, wrap_str=True)}" + + def not_regex_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: + if isinstance(value, list): + return f"({self.or_token.join(self.regex_modifier(field=field, value=v) for v in value)})" + return f"{field} !~= {self._pre_process_value(field ,value, value_type=ValueType.regex_value, wrap_str=True)}" def is_none(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: if isinstance(value, list): @@ -95,31 +123,37 @@ def is_not_none(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: return f"({self.or_token.join(self.is_not_none(field=field, value=v) for v in value)})" return f"{field} != null" - def keywords(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: # noqa: ARG002 + def keywords(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: if isinstance(value, list): return f"({self.or_token.join(self.contains_modifier(field=field, value=v) for v in value)})" if value.endswith("\\"): - return f'_raw_log ~= ".*{self.apply_value(value, value_type=ValueType.regex_value)}.*"' - return f'_raw_log contains "{self.apply_value(value)}"' + return f'_raw_log ~= ".*{self._pre_process_value(field ,value, value_type=ValueType.regex_value)}.*"' + return f"_raw_log contains {self._pre_process_value(field ,value, value_type=ValueType.value, wrap_str=True)}" @render_manager.register class CortexXQLQueryRender(PlatformQueryRender): details: PlatformDetails = cortex_xql_query_details - mappings: CortexXSIAMMappings = cortex_xsiam_mappings + mappings: CortexXQLMappings = cortex_xql_mappings is_strict_mapping = True raw_log_field_pattern = ( '| alter {field} = regextract(to_json_string(action_evtlog_data_fields)->{field}{{}}, "\\"(.*)\\"")' ) + platform_functions: CortexXQLFunctions = cortex_xql_functions or_token = "or" and_token = "and" not_token = "not" - field_value_map = CortexXSIAMFieldValue(or_token=or_token) + field_value_map = CortexXQLFieldValue(or_token=or_token) query_pattern = "{prefix} | filter {query} {functions}" comment_symbol = "//" is_single_line_comment = False - def generate_prefix(self, log_source_signature: CortexXSIAMLogSourceSignature) -> str: - return str(log_source_signature) + def __init__(self): + super().__init__() + self.platform_functions.manager.post_init_configure(self) + + def generate_prefix(self, log_source_signature: CortexXQLLogSourceSignature, functions_prefix: str = "") -> str: + functions_prefix = f"{functions_prefix} | " if functions_prefix else "" + return f"{functions_prefix}{log_source_signature}" diff --git a/uncoder-core/app/translator/platforms/palo_alto/str_value_manager.py b/uncoder-core/app/translator/platforms/palo_alto/str_value_manager.py new file mode 100644 index 00000000..7a454d13 --- /dev/null +++ b/uncoder-core/app/translator/platforms/palo_alto/str_value_manager.py @@ -0,0 +1,55 @@ +""" +Uncoder IO Community Edition License +----------------------------------------------------------------- +Copyright (c) 2024 SOC Prime, Inc. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +----------------------------------------------------------------- +""" +import copy + +from app.translator.core.custom_types.values import ValueType +from app.translator.core.str_value_manager import ( + CONTAINER_SPEC_SYMBOLS_MAP, + BaseSpecSymbol, + SingleSymbolWildCard, + StrValue, + StrValueManager, + UnboundLenWildCard, +) +from app.translator.platforms.palo_alto.escape_manager import cortex_xql_escape_manager + +CORTEX_XQL_CONTAINER_SPEC_SYMBOLS_MAP = copy.copy(CONTAINER_SPEC_SYMBOLS_MAP) +CORTEX_XQL_CONTAINER_SPEC_SYMBOLS_MAP.update({SingleSymbolWildCard: ".*", UnboundLenWildCard: ".*"}) + + +class CortexXQLStrValueManager(StrValueManager): + escape_manager = cortex_xql_escape_manager + container_spec_symbols_map = CORTEX_XQL_CONTAINER_SPEC_SYMBOLS_MAP + + def from_container_to_str(self, container: StrValue, value_type: str = ValueType.value) -> str: + result = "" + for el in container.split_value: + if isinstance(el, str): + result += self.escape_manager.escape(el, value_type) + elif isinstance(el, BaseSpecSymbol): + if value_type == ValueType.value: + if isinstance(el, (SingleSymbolWildCard, UnboundLenWildCard)): + result += "*" + elif pattern := self.container_spec_symbols_map.get(type(el)): + result += pattern + + return result + + +cortex_xql_str_value_manager = CortexXQLStrValueManager() diff --git a/uncoder-core/app/translator/platforms/sigma/str_value_manager.py b/uncoder-core/app/translator/platforms/sigma/str_value_manager.py index 95c8b95e..c73115e7 100644 --- a/uncoder-core/app/translator/platforms/sigma/str_value_manager.py +++ b/uncoder-core/app/translator/platforms/sigma/str_value_manager.py @@ -2,10 +2,13 @@ Uncoder IO Community Edition License ----------------------------------------------------------------- Copyright (c) 2024 SOC Prime, Inc. + Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -13,7 +16,6 @@ limitations under the License. ----------------------------------------------------------------- """ - from app.translator.core.str_value_manager import ( ReAnySymbol, ReCaretSymbol, @@ -86,6 +88,9 @@ def from_str_to_container(self, value: str) -> StrValue: prev_char = char + if prev_char == "\\": + split.append(prev_char) + return StrValue(value, self._concat(split))