From 3529a433168c9d3191090c531635f5d6b8d6acee Mon Sep 17 00:00:00 2001 From: Oleksandr Volha Date: Mon, 8 Jan 2024 13:52:46 +0200 Subject: [PATCH 1/3] field class refactoring --- translator/app/translator/core/functions.py | 20 +----- translator/app/translator/core/mapping.py | 4 ++ .../app/translator/core/mixins/logic.py | 19 +++--- .../app/translator/core/models/field.py | 52 +++++++-------- .../translator/core/models/functions/base.py | 4 +- translator/app/translator/core/parser.py | 14 ++-- translator/app/translator/core/render.py | 23 ++++--- translator/app/translator/core/tokenizer.py | 64 +++++++++---------- .../translator/platforms/athena/tokenizer.py | 7 +- .../platforms/base/lucene/tokenizer.py | 9 ++- .../platforms/base/spl/tokenizer.py | 4 +- .../platforms/chronicle/tokenizer.py | 7 +- .../platforms/logscale/tokenizer.py | 21 ++---- .../translator/platforms/qradar/tokenizer.py | 8 +-- .../platforms/sigma/models/compiler.py | 6 +- .../platforms/sigma/models/modifiers.py | 9 ++- .../platforms/sigma/parsers/sigma.py | 11 ++-- .../platforms/sigma/renders/sigma.py | 10 +-- .../translator/platforms/sigma/tokenizer.py | 5 +- 19 files changed, 134 insertions(+), 163 deletions(-) diff --git a/translator/app/translator/core/functions.py b/translator/app/translator/core/functions.py index 0ebae670..eed11f98 100644 --- a/translator/app/translator/core/functions.py +++ b/translator/app/translator/core/functions.py @@ -1,21 +1,3 @@ -""" -Uncoder IO Commercial Edition License ------------------------------------------------------------------ -Copyright (c) 2023 SOC Prime, Inc. - -This file is part of the Uncoder IO Commercial Edition ("CE") and is -licensed under the Uncoder IO Non-Commercial License (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - https://github.com/UncoderIO/UncoderIO/blob/main/LICENSE - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ------------------------------------------------------------------ -""" - from __future__ import annotations from abc import ABC, abstractmethod @@ -58,7 +40,7 @@ def concat_kwargs(kwargs: Dict[str, str]) -> str: @staticmethod def map_field(field: Field, source_mapping: SourceMapping) -> str: - generic_field_name = field.generic_names_map[source_mapping.source_id] + generic_field_name = field.get_generic_field_name(source_mapping.source_id) mapped_field = source_mapping.fields_mapping.get_platform_field_name(generic_field_name=generic_field_name) if isinstance(mapped_field, list): mapped_field = mapped_field[0] diff --git a/translator/app/translator/core/mapping.py b/translator/app/translator/core/mapping.py index e3e9f72a..89a45127 100644 --- a/translator/app/translator/core/mapping.py +++ b/translator/app/translator/core/mapping.py @@ -122,3 +122,7 @@ def get_suitable_source_mappings(self, *args, **kwargs) -> List[SourceMapping]: def get_source_mapping(self, source_id: str) -> Optional[SourceMapping]: return self._source_mappings.get(source_id) + + @property + def default_mapping(self) -> SourceMapping: + return self._source_mappings[DEFAULT_MAPPING_NAME] diff --git a/translator/app/translator/core/mixins/logic.py b/translator/app/translator/core/mixins/logic.py index cf9f959a..b5497e6d 100644 --- a/translator/app/translator/core/mixins/logic.py +++ b/translator/app/translator/core/mixins/logic.py @@ -1,26 +1,29 @@ from typing import List, Union -from app.translator.core.models.field import Field, Keyword -from app.translator.core.models.identifier import Identifier from app.translator.core.custom_types.tokens import LogicalOperatorType, GroupType +from app.translator.core.models.field import FieldValue, Keyword +from app.translator.core.models.identifier import Identifier class ANDLogicOperatorMixin: @staticmethod - def get_missed_and_token_indices(tokens: List[Union[Field, Keyword, Identifier]]) -> List[int]: + def get_missed_and_token_indices(tokens: List[Union[FieldValue, Keyword, Identifier]]) -> List[int]: missed_and_indices = [] for index in range(len(tokens) - 1): token = tokens[index] next_token = tokens[index + 1] - if (isinstance(token, (Field, Keyword)) - and not (isinstance(next_token, Identifier) and ( - next_token.token_type in LogicalOperatorType - or next_token.token_type == GroupType.R_PAREN))): + if ((isinstance(token, (FieldValue, Keyword)) + or isinstance(token, Identifier) and token.token_type == GroupType.R_PAREN) + and not (isinstance(next_token, Identifier) + and (next_token.token_type + in (LogicalOperatorType.AND, LogicalOperatorType.OR, GroupType.R_PAREN)))): missed_and_indices.append(index + 1) return list(reversed(missed_and_indices)) - def add_and_token_if_missed(self, tokens: List[Union[Field, Keyword, Identifier]]) -> List[Union[Field, Keyword, Identifier]]: + def add_and_token_if_missed(self, + tokens: List[Union[FieldValue, Keyword, Identifier]] + ) -> List[Union[FieldValue, Keyword, Identifier]]: indices = self.get_missed_and_token_indices(tokens=tokens) for index in indices: tokens.insert(index, Identifier(token_type=LogicalOperatorType.AND)) diff --git a/translator/app/translator/core/models/field.py b/translator/app/translator/core/models/field.py index 57cafcb0..2491e5bf 100644 --- a/translator/app/translator/core/models/field.py +++ b/translator/app/translator/core/models/field.py @@ -1,16 +1,36 @@ from typing import Union, Optional +from app.translator.core.mapping import SourceMapping, DEFAULT_MAPPING_NAME from app.translator.core.models.identifier import Identifier from app.translator.core.custom_types.tokens import OperatorType class Field: - def __init__(self, source_name: str, operator: Identifier = None, value: Union[int, str, list, tuple] = None): + def __init__(self, source_name: str): + self.source_name = source_name + self.__generic_names_map = {} + + def get_generic_field_name(self, source_id: str) -> Optional[str]: + return self.__generic_names_map.get(source_id) + + def set_generic_names_map(self, source_mappings: list[SourceMapping], default_mapping: SourceMapping) -> None: + generic_names_map = { + source_mapping.source_id: source_mapping.fields_mapping.get_generic_field_name(self.source_name) + for source_mapping in source_mappings + } + if DEFAULT_MAPPING_NAME not in generic_names_map: + fields_mapping = default_mapping.fields_mapping + generic_names_map[DEFAULT_MAPPING_NAME] = fields_mapping.get_generic_field_name(self.source_name) + + self.__generic_names_map = generic_names_map + + +class FieldValue: + def __init__(self, source_name: str, operator: Identifier, value: Union[int, str, list, tuple]): + self.field = Field(source_name=source_name) self.operator = operator self.values = [] self.__add_value(value) - self.source_name = source_name # input translation field name - self.generic_names_map = {} @property def value(self): @@ -30,31 +50,7 @@ def __add__(self, other): self.values.append(other) def __repr__(self): - if self.operator: - return f"{self.source_name} {self.operator.token_type} {self.values}" - - return f"{self.source_name}" - - def __eq__(self, other): - if isinstance(other, Field): - return self._hash == other._hash - """For OR operator check""" - if self.source_name == other.source_name and self.operator == other.operator: - return True - return False - - def __neq__(self, other): - """For AND operator check""" - if self.source_name != other.source_name: - return True - return False - - @property - def _hash(self): - return hash(str(self)) - - def __hash__(self): - return hash(str(self)) + return f"{self.field.source_name} {self.operator.token_type} {self.values}" class Keyword: diff --git a/translator/app/translator/core/models/functions/base.py b/translator/app/translator/core/models/functions/base.py index 8fa70f10..881b5942 100644 --- a/translator/app/translator/core/models/functions/base.py +++ b/translator/app/translator/core/models/functions/base.py @@ -3,14 +3,14 @@ from dataclasses import dataclass, field from typing import List, Union -from app.translator.core.models.field import Field, Keyword +from app.translator.core.models.field import Field, FieldValue, Keyword from app.translator.core.models.identifier import Identifier @dataclass class Function: name: str = None - args: List[Union[Field, Keyword, Function, Identifier]] = field(default_factory=list) + args: List[Union[Field, FieldValue, Keyword, Function, Identifier]] = field(default_factory=list) as_clause: str = None by_clauses: List[Field] = field(default_factory=list) diff --git a/translator/app/translator/core/parser.py b/translator/app/translator/core/parser.py index c80002ae..f28aecde 100644 --- a/translator/app/translator/core/parser.py +++ b/translator/app/translator/core/parser.py @@ -21,7 +21,7 @@ from app.translator.core.functions import PlatformFunctions from app.translator.core.mapping import BasePlatformMappings, SourceMapping -from app.translator.core.models.field import Field +from app.translator.core.models.field import FieldValue from app.translator.core.models.functions.base import ParsedFunctions from app.translator.core.models.platform_details import PlatformDetails from app.translator.core.models.parser_output import SiemContainer, MetaInfoContainer @@ -50,15 +50,15 @@ def get_tokens_and_source_mappings(self, if not query: raise TokenizerGeneralException("Can't translate empty query. Please provide more details") tokens = self.tokenizer.tokenize(query=query) - field_tokens = self.tokenizer.filter_tokens(tokens, Field) + field_tokens = [token.field for token in self.tokenizer.filter_tokens(tokens, FieldValue)] field_names = [field.source_name for field in field_tokens] - suitable_source_mappings = self.mappings.get_suitable_source_mappings(field_names=field_names, **log_sources) - self.tokenizer.set_field_generic_names_map(field_tokens, suitable_source_mappings, self.mappings) + source_mappings = self.mappings.get_suitable_source_mappings(field_names=field_names, **log_sources) + self.tokenizer.set_field_tokens_generic_names_map(field_tokens, source_mappings, self.mappings.default_mapping) - return tokens, suitable_source_mappings + return tokens, source_mappings def set_functions_fields_generic_names(self, functions: ParsedFunctions, source_mappings: List[SourceMapping]) -> None: - field_tokens = self.tokenizer.filter_function_tokens(tokens=functions.functions) - self.tokenizer.set_field_generic_names_map(field_tokens, source_mappings, self.mappings) + field_tokens = self.tokenizer.get_field_tokens_from_func_args(args=functions.functions) + self.tokenizer.set_field_tokens_generic_names_map(field_tokens, source_mappings, self.mappings.default_mapping) diff --git a/translator/app/translator/core/render.py b/translator/app/translator/core/render.py index 7416082e..77915173 100644 --- a/translator/app/translator/core/render.py +++ b/translator/app/translator/core/render.py @@ -27,11 +27,12 @@ from app.translator.core.exceptions.parser import UnsupportedOperatorException from app.translator.core.functions import PlatformFunctions from app.translator.core.mapping import BasePlatformMappings, SourceMapping, LogSourceSignature, DEFAULT_MAPPING_NAME -from app.translator.core.models.field import Field, Keyword +from app.translator.core.models.field import Field, FieldValue, Keyword from app.translator.core.models.functions.base import Function, ParsedFunctions +from app.translator.core.models.identifier import Identifier from app.translator.core.models.platform_details import PlatformDetails from app.translator.core.models.parser_output import MetaInfoContainer -from app.translator.core.custom_types.tokens import LogicalOperatorType, OperatorType, GroupType +from app.translator.core.custom_types.tokens import LogicalOperatorType, OperatorType class BaseQueryFieldValue(ABC): @@ -133,7 +134,7 @@ def generate_functions(self, functions: List[Function], source_mapping: SourceMa return self.platform_functions.render(functions, source_mapping) if self.platform_functions else "" def map_field(self, field: Field, source_mapping: SourceMapping) -> List[str]: - generic_field_name = field.generic_names_map[source_mapping.source_id] + generic_field_name = field.get_generic_field_name(source_mapping.source_id) # field can be mapped to corresponding platform field name or list of platform field names mapped_field = source_mapping.fields_mapping.get_platform_field_name(generic_field_name=generic_field_name) if not mapped_field and self.is_strict_mapping: @@ -145,10 +146,10 @@ def map_field(self, field: Field, source_mapping: SourceMapping) -> List[str]: return mapped_field if mapped_field else [generic_field_name] if generic_field_name else [field.source_name] def apply_token(self, - token: Union[Field, Keyword, LogicalOperatorType, GroupType], + token: Union[FieldValue, Keyword, Identifier], source_mapping: SourceMapping) -> str: - if isinstance(token, (Field, Keyword)): - mapped_fields = self.map_field(token, source_mapping) if isinstance(token, Field) else [None] + if isinstance(token, FieldValue): + mapped_fields = self.map_field(token.field, source_mapping) if len(mapped_fields) > 1: return self.group_token % self.operator_map[LogicalOperatorType.OR].join([ self.field_value_map.apply_field_value(field=field, operator=token.operator, value=token.value) @@ -158,12 +159,17 @@ def apply_token(self, return self.field_value_map.apply_field_value(field=mapped_fields[0], operator=token.operator, value=token.value) + elif isinstance(token, Keyword): + return self.field_value_map.apply_field_value(field=None, + operator=token.operator, + value=token.value) elif token.token_type in LogicalOperatorType: return self.operator_map.get(token.token_type) + return token.token_type def generate_query(self, - query: List[Union[Field, Keyword, LogicalOperatorType, GroupType]], + query: List[Union[FieldValue, Keyword, Identifier]], source_mapping: SourceMapping) -> str: result_values = [] for token in query: @@ -173,8 +179,7 @@ def generate_query(self, def wrap_query_with_meta_info(self, meta_info: MetaInfoContainer, query: str): if meta_info and (meta_info.id or meta_info.title): query_meta_info = "\n".join( - self.wrap_with_comment(f"{key}{value}") - for key, value in {"name: ": meta_info.title, "uuid: ": meta_info.id}.items() if value + self.wrap_with_comment(f"{key}{value}") for key, value in {"name: ": meta_info.title, "uuid: ": meta_info.id}.items() if value ) query = f"{query}\n\n{query_meta_info}" return query diff --git a/translator/app/translator/core/tokenizer.py b/translator/app/translator/core/tokenizer.py index 8f7ab6a2..85d5af6d 100644 --- a/translator/app/translator/core/tokenizer.py +++ b/translator/app/translator/core/tokenizer.py @@ -27,20 +27,20 @@ TokenizerGeneralException, QueryParenthesesException ) -from app.translator.core.mapping import SourceMapping, DEFAULT_MAPPING_NAME, BasePlatformMappings -from app.translator.core.models.field import Field, Keyword +from app.translator.core.mapping import SourceMapping +from app.translator.core.models.field import Field, FieldValue, Keyword from app.translator.core.models.functions.base import Function from app.translator.core.models.functions.sort import SortArg from app.translator.core.models.identifier import Identifier from app.translator.core.custom_types.tokens import OperatorType, GroupType from app.translator.tools.utils import get_match_group -TOKEN_TYPE = Union[Field, Keyword, Identifier] +TOKEN_TYPE = Union[FieldValue, Keyword, Identifier] class BaseTokenizer(ABC): @abstractmethod - def tokenize(self, query: str) -> List[Union[Field, Keyword, Identifier]]: + def tokenize(self, query: str) -> List[Union[FieldValue, Keyword, Identifier]]: raise NotImplementedError() @@ -180,18 +180,18 @@ def process_value_wildcard_symbols(self, return self._clean_value(value, wildcard_symbol), op @staticmethod - def create_field(field_name: str, operator: Identifier, value: Union[str, List]) -> Field: - return Field(operator=operator, value=value, source_name=field_name) + def create_field_value(field_name: str, operator: Identifier, value: Union[str, List]) -> FieldValue: + return FieldValue(source_name=field_name, operator=operator, value=value) - def search_field_value(self, query): + def search_field_value(self, query) -> Tuple[FieldValue, str]: field_name = self.search_field(query) operator = self.search_operator(query, field_name) query, operator, value = self.search_value(query=query, operator=operator, field_name=field_name) value, operator_token = self.process_value_wildcard_symbols(value=value, operator=operator, wildcard_symbol=self.wildcard_symbol) - field = self.create_field(field_name=field_name, operator=operator_token, value=value) - return field, query + field_value = self.create_field_value(field_name=field_name, operator=operator_token, value=value) + return field_value, query def _match_field_value(self, query: str, white_space_pattern: str = r"\s+") -> bool: single_value_operator_group = fr"(?:{'|'.join(self.single_value_operators_map)})" @@ -208,7 +208,7 @@ def _match_field_value(self, query: str, white_space_pattern: str = r"\s+") -> b return False - def _get_identifier(self, query: str) -> Tuple[Union[Field, Keyword, Identifier], str]: + def _get_identifier(self, query: str) -> Tuple[Union[FieldValue, Keyword, Identifier], str]: query = query.strip("\n").strip(" ").strip("\n") if query.startswith(GroupType.L_PAREN): return Identifier(token_type=GroupType.L_PAREN), query[1:] @@ -240,7 +240,7 @@ def _validate_parentheses(tokens): raise QueryParenthesesException() return True - def tokenize(self, query: str) -> List[Union[Field, Keyword, Identifier]]: + def tokenize(self, query: str) -> List[Union[FieldValue, Keyword, Identifier]]: tokenized = [] while query: identifier, query = self._get_identifier(query=query) @@ -250,34 +250,28 @@ def tokenize(self, query: str) -> List[Union[Field, Keyword, Identifier]]: @staticmethod def filter_tokens(tokens: List[TOKEN_TYPE], - token_type: Union[Type[Field], Type[Keyword], Type[Identifier]]) -> List[TOKEN_TYPE]: + token_type: Union[Type[FieldValue], Type[Keyword], Type[Identifier]]) -> List[TOKEN_TYPE]: return [token for token in tokens if isinstance(token, token_type)] - def filter_function_tokens(self, - tokens: List[Union[Field, Keyword, Identifier, Function, SortArg]]) -> List[TOKEN_TYPE]: + def get_field_tokens_from_func_args(self, + args: List[Union[Field, FieldValue, Keyword, Identifier, Function, SortArg]] + ) -> List[Field]: result = [] - for token in tokens: - if isinstance(token, Field): - result.append(token) - elif isinstance(token, Function): - result.extend(self.filter_function_tokens(tokens=token.args)) - result.extend(self.filter_function_tokens(tokens=token.by_clauses)) - elif isinstance(token, SortArg): - result.append(token.field) + for arg in args: + if isinstance(arg, Field): + result.append(arg) + elif isinstance(arg, FieldValue): + result.append(arg.field) + elif isinstance(arg, Function): + result.extend(self.get_field_tokens_from_func_args(args=arg.args)) + result.extend(self.get_field_tokens_from_func_args(args=arg.by_clauses)) + elif isinstance(arg, SortArg): + result.append(arg.field) return result @staticmethod - def set_field_generic_names_map(tokens: List[Field], - source_mappings: List[SourceMapping], - platform_mappings: BasePlatformMappings) -> None: + def set_field_tokens_generic_names_map(tokens: List[Field], + source_mappings: List[SourceMapping], + default_mapping: SourceMapping) -> None: for token in tokens: - generic_names_map = { - source_mapping.source_id: source_mapping.fields_mapping.get_generic_field_name(token.source_name) - for source_mapping in source_mappings - } - if DEFAULT_MAPPING_NAME not in generic_names_map: - default_source_mapping = platform_mappings.get_source_mapping(DEFAULT_MAPPING_NAME) - fields_mapping = default_source_mapping.fields_mapping - generic_names_map[DEFAULT_MAPPING_NAME] = fields_mapping.get_generic_field_name(token.source_name) - - token.generic_names_map = generic_names_map + token.set_generic_names_map(source_mappings, default_mapping) diff --git a/translator/app/translator/platforms/athena/tokenizer.py b/translator/app/translator/platforms/athena/tokenizer.py index 0e67349b..37dd8f3b 100644 --- a/translator/app/translator/platforms/athena/tokenizer.py +++ b/translator/app/translator/platforms/athena/tokenizer.py @@ -20,6 +20,7 @@ from typing import Tuple, Any from app.translator.core.custom_types.values import ValueType +from app.translator.core.models.field import FieldValue from app.translator.core.models.identifier import Identifier from app.translator.core.tokenizer import QueryTokenizer from app.translator.core.custom_types.tokens import OperatorType @@ -66,7 +67,7 @@ def get_operator_and_value(self, match: re.Match, operator: str = OperatorType.E return super().get_operator_and_value(match, operator) - def search_field_value(self, query): + def search_field_value(self, query) -> Tuple[FieldValue, str]: field_name = self.search_field(query) operator = self.search_operator(query, field_name) should_process_value_wildcard_symbols = self.should_process_value_wildcard_symbols(operator) @@ -81,8 +82,8 @@ def search_field_value(self, query): ) field_name = field_name.strip('"') - field = self.create_field(field_name=field_name, operator=operator_token, value=value) - return field, query + field_value = self.create_field_value(field_name=field_name, operator=operator_token, value=value) + return field_value, query def tokenize(self, query: str) -> list: query = re.sub(r"\s*ESCAPE\s*'.'", '', query) # remove `ESCAPE 'escape_char'` in LIKE expr diff --git a/translator/app/translator/platforms/base/lucene/tokenizer.py b/translator/app/translator/platforms/base/lucene/tokenizer.py index 85eb48a2..c44173b0 100644 --- a/translator/app/translator/platforms/base/lucene/tokenizer.py +++ b/translator/app/translator/platforms/base/lucene/tokenizer.py @@ -22,7 +22,7 @@ from app.translator.core.custom_types.values import ValueType from app.translator.core.exceptions.parser import TokenizerGeneralException from app.translator.core.mixins.logic import ANDLogicOperatorMixin -from app.translator.core.models.field import Keyword, Field +from app.translator.core.models.field import Keyword, FieldValue from app.translator.core.models.identifier import Identifier from app.translator.core.tokenizer import QueryTokenizer from app.translator.core.custom_types.tokens import OperatorType @@ -41,7 +41,6 @@ class LuceneTokenizer(QueryTokenizer, ANDLogicOperatorMixin): } field_pattern = r"(?P[a-zA-Z\.\-_]+)" - match_operator_pattern = r"(?:___field___\s*(?P:\[\*\sTO|:\[|:<|:>|:))\s*" _num_value_pattern = r"\d+(?:\.\d+)*" num_value_pattern = fr"(?P<{ValueType.number_value}>{_num_value_pattern})\s*" double_quotes_value_pattern = fr'"(?P<{ValueType.double_quotes_value}>(?:[:a-zA-Z\*0-9=+%#\-_/,\'\.$&^@!\(\)\{{\}}\s]|\\\"|\\)*)"\s*' @@ -61,10 +60,10 @@ class LuceneTokenizer(QueryTokenizer, ANDLogicOperatorMixin): wildcard_symbol = "*" @staticmethod - def create_field(field_name: str, operator: Identifier, value: Union[str, List]) -> Field: + def create_field_value(field_name: str, operator: Identifier, value: Union[str, List]) -> FieldValue: field_name = field_name.replace(".text", "") field_name = field_name.replace(".keyword", "") - return Field(operator=operator, value=value, source_name=field_name) + return FieldValue(source_name=field_name, operator=operator, value=value) @staticmethod def clean_quotes(value: Union[str, int]): @@ -131,6 +130,6 @@ def _match_field_value(self, query: str, white_space_pattern: str = r"\s*") -> b return super()._match_field_value(query, white_space_pattern=white_space_pattern) - def tokenize(self, query: str) -> List[Union[Field, Keyword, Identifier]]: + def tokenize(self, query: str) -> List[Union[FieldValue, Keyword, Identifier]]: tokens = super().tokenize(query=query) return self.add_and_token_if_missed(tokens=tokens) diff --git a/translator/app/translator/platforms/base/spl/tokenizer.py b/translator/app/translator/platforms/base/spl/tokenizer.py index 0ef3977b..6862fbee 100644 --- a/translator/app/translator/platforms/base/spl/tokenizer.py +++ b/translator/app/translator/platforms/base/spl/tokenizer.py @@ -21,7 +21,7 @@ from app.translator.core.custom_types.values import ValueType from app.translator.core.mixins.logic import ANDLogicOperatorMixin -from app.translator.core.models.field import Field, Keyword +from app.translator.core.models.field import FieldValue, Keyword from app.translator.core.models.identifier import Identifier from app.translator.core.tokenizer import QueryTokenizer from app.translator.core.custom_types.tokens import OperatorType @@ -68,6 +68,6 @@ def get_operator_and_value(self, match: re.Match, operator: str = OperatorType.E return super().get_operator_and_value(match) - def tokenize(self, query: str) -> List[Union[Field, Keyword, Identifier]]: + def tokenize(self, query: str) -> List[Union[FieldValue, Keyword, Identifier]]: tokens = super().tokenize(query=query) return self.add_and_token_if_missed(tokens=tokens) diff --git a/translator/app/translator/platforms/chronicle/tokenizer.py b/translator/app/translator/platforms/chronicle/tokenizer.py index dd64500a..19c4a873 100644 --- a/translator/app/translator/platforms/chronicle/tokenizer.py +++ b/translator/app/translator/platforms/chronicle/tokenizer.py @@ -21,6 +21,7 @@ from app.translator.core.custom_types.values import ValueType from app.translator.core.exceptions.parser import TokenizerGeneralException +from app.translator.core.models.field import FieldValue from app.translator.core.tokenizer import QueryTokenizer from app.translator.core.custom_types.tokens import OperatorType from app.translator.platforms.chronicle.escape_manager import chronicle_escape_manager @@ -77,7 +78,7 @@ class ChronicleRuleTokenizer(ChronicleQueryTokenizer): back_quotes_value_pattern = fr'`(?P<{ValueType.back_quotes_value}>(?:[:a-zA-Z\*0-9=+%#\-_/,\'\"\\\.$&^@!\(\)\{{\}}\s])*)`' regex_value_regex = fr"{double_quotes_value_pattern}|{back_quotes_value_pattern}\s*\)\s*(?:nocase)?\s*" - def search_field_value(self, query): + def search_field_value(self, query) -> Tuple[FieldValue, str]: if query.startswith("re.regex("): field_search = re.search(self.regex_field_regex, query) if field_search is None: @@ -99,8 +100,8 @@ def search_field_value(self, query): pos = value_search.end() query = query[pos:] - field = self.create_field(field_name=field, operator=operator, value=value) - return field, query + field_value = self.create_field_value(field_name=field, operator=operator, value=value) + return field_value, query else: return super().search_field_value(query=query) diff --git a/translator/app/translator/platforms/logscale/tokenizer.py b/translator/app/translator/platforms/logscale/tokenizer.py index ee606141..293662d5 100644 --- a/translator/app/translator/platforms/logscale/tokenizer.py +++ b/translator/app/translator/platforms/logscale/tokenizer.py @@ -21,9 +21,9 @@ from app.translator.core.custom_types.values import ValueType from app.translator.core.mixins.logic import ANDLogicOperatorMixin -from app.translator.core.models.field import Keyword, Field +from app.translator.core.models.field import Keyword, FieldValue from app.translator.core.models.identifier import Identifier -from app.translator.core.custom_types.tokens import GroupType, LogicalOperatorType, OperatorType +from app.translator.core.custom_types.tokens import LogicalOperatorType, OperatorType from app.translator.core.tokenizer import QueryTokenizer from app.translator.platforms.logscale.escape_manager import logscale_escape_manager from app.translator.tools.utils import get_match_group @@ -67,17 +67,6 @@ def _get_identifier(self, query: str) -> (list, str): return super()._get_identifier(query) - def tokenize(self, query: str) -> List[Union[Field, Keyword, Identifier]]: - tokenized = [] - while query: - identifier, query = self._get_identifier(query=query) - if tokenized: - if isinstance(identifier, Identifier) and identifier.token_type in (GroupType.L_PAREN, LogicalOperatorType.NOT): - if isinstance(tokenized[-1], (Field, Keyword)) or tokenized[-1].token_type == GroupType.R_PAREN: - tokenized.append(Identifier(token_type=LogicalOperatorType.AND)) - elif isinstance(identifier, (Field, Keyword)): - if isinstance(tokenized[-1], (Field, Keyword)) or tokenized[-1].token_type == GroupType.R_PAREN: - tokenized.append(Identifier(token_type=LogicalOperatorType.AND)) - tokenized.append(identifier) - self._validate_parentheses(tokenized) - return self.add_and_token_if_missed(tokens=tokenized) + def tokenize(self, query: str) -> List[Union[FieldValue, Keyword, Identifier]]: + tokens = super().tokenize(query=query) + return self.add_and_token_if_missed(tokens=tokens) diff --git a/translator/app/translator/platforms/qradar/tokenizer.py b/translator/app/translator/platforms/qradar/tokenizer.py index b50dd031..9bf2cb41 100644 --- a/translator/app/translator/platforms/qradar/tokenizer.py +++ b/translator/app/translator/platforms/qradar/tokenizer.py @@ -21,7 +21,7 @@ from app.translator.core.custom_types.values import ValueType from app.translator.platforms.qradar.const import UTF8_PAYLOAD_PATTERN, SINGLE_QUOTES_VALUE_PATTERN, NUM_VALUE_PATTERN -from app.translator.core.models.field import Keyword +from app.translator.core.models.field import Keyword, FieldValue from app.translator.core.models.identifier import Identifier from app.translator.core.tokenizer import QueryTokenizer from app.translator.core.custom_types.tokens import OperatorType @@ -77,7 +77,7 @@ def escape_field_name(self, field_name): field_name = field_name.replace(' ', r'\ ') return field_name - def search_field_value(self, query): + def search_field_value(self, query) -> Tuple[FieldValue, str]: field_name = self.search_field(query) operator = self.search_operator(query, field_name) should_process_value_wildcard_symbols = self.should_process_value_wildcard_symbols(operator) @@ -92,8 +92,8 @@ def search_field_value(self, query): ) field_name = field_name.strip('"') - field = self.create_field(field_name=field_name, operator=operator_token, value=value) - return field, query + field_value = self.create_field_value(field_name=field_name, operator=operator_token, value=value) + return field_value, query def search_keyword(self, query: str) -> Tuple[Keyword, str]: keyword_search = re.search(self.keyword_pattern, query) diff --git a/translator/app/translator/platforms/sigma/models/compiler.py b/translator/app/translator/platforms/sigma/models/compiler.py index 3d39235e..8e630017 100644 --- a/translator/app/translator/platforms/sigma/models/compiler.py +++ b/translator/app/translator/platforms/sigma/models/compiler.py @@ -16,7 +16,7 @@ ----------------------------------------------------------------- """ -from app.translator.core.models.field import Field, Keyword +from app.translator.core.models.field import FieldValue, Keyword from app.translator.platforms.sigma.models.group import Group from app.translator.core.models.identifier import Identifier from app.translator.platforms.sigma.models.operator import Operator, NOT @@ -31,14 +31,14 @@ def generate(self, tokens: list, group: Group = None): return group group = group if group else Group() token = tokens[0] - if isinstance(token, (Field, Keyword)): + if isinstance(token, (FieldValue, Keyword)): group += token return self.generate(tokens=tokens[1::], group=group) elif token.token_type == LogicalOperatorType.OR or token.token_type == LogicalOperatorType.AND: group.items = Operator(operator_type=token.token_type) return self.generate(tokens=tokens[1::], group=group) elif token.token_type == LogicalOperatorType.NOT: - if isinstance(tokens[1], (Field, Keyword)): + if isinstance(tokens[1], (FieldValue, Keyword)): tokens.insert(2, Identifier(token_type=GroupType.R_PAREN)) tokens.insert(1, Identifier(token_type=GroupType.L_PAREN)) sub_group = Group() diff --git a/translator/app/translator/platforms/sigma/models/modifiers.py b/translator/app/translator/platforms/sigma/models/modifiers.py index 30868b43..96d333f1 100644 --- a/translator/app/translator/platforms/sigma/models/modifiers.py +++ b/translator/app/translator/platforms/sigma/models/modifiers.py @@ -1,6 +1,6 @@ from typing import Union, List -from app.translator.core.models.field import Field +from app.translator.core.models.field import FieldValue from app.translator.core.models.identifier import Identifier from app.translator.core.custom_types.tokens import LogicalOperatorType, OperatorType, GroupType @@ -29,7 +29,7 @@ def modifier_all(self, field_name: str, modifier: str, values: Union[str, List[str]]) -> Union[tuple, list]: if (isinstance(values, list) and len(values) == 1) or isinstance(values, str): operator = self.map_modifier(modifier=modifier) - return (Field(source_name=field_name, operator=operator, value=values), ) + return (FieldValue(source_name=field_name, operator=operator, value=values), ) else: tokens = [] for value in values: @@ -53,8 +53,7 @@ def modifier_windash(self, field_name: str, modifier: Union[str, list], tokens.append(self.or_token) return [Identifier(token_type=GroupType.L_PAREN), *tokens[:-1], Identifier(token_type=GroupType.R_PAREN)] operator = self.map_modifier(modifier=modifier) - field = Field(source_name=field_name, operator=operator, value=self.__prepare_windash_value(value=values)) - return (field,) + return (FieldValue(source_name=field_name, operator=operator, value=self.__prepare_windash_value(value=values)),) def apply_multi_modifier(self, field_name: str, modifier: list, values: Union[str, List[str]]) -> Union[tuple, list]: @@ -69,7 +68,7 @@ def apply_modifier(self, field_name: str, modifier: list, values: Union[str, Lis modifier = OperatorType.EQ return self.modifier_windash(field_name=field_name, modifier=modifier, values=values) operator = self.map_modifier(modifier=modifier) - return (Field(source_name=field_name, operator=operator, value=values), ) + return (FieldValue(source_name=field_name, operator=operator, value=values), ) def create_token(self, field_name: str, modifier: list, value: Union[str, List[str], int]) -> Union[tuple, list]: diff --git a/translator/app/translator/platforms/sigma/parsers/sigma.py b/translator/app/translator/platforms/sigma/parsers/sigma.py index 769e8531..90d3953b 100644 --- a/translator/app/translator/platforms/sigma/parsers/sigma.py +++ b/translator/app/translator/platforms/sigma/parsers/sigma.py @@ -18,7 +18,6 @@ """ -import re from typing import List, Union from app.translator.core.tokenizer import QueryTokenizer @@ -27,7 +26,7 @@ from app.translator.platforms.sigma.tokenizer import SigmaTokenizer, SigmaConditionTokenizer from app.translator.core.exceptions.core import SigmaRuleValidationException from app.translator.core.mixins.rule import YamlRuleMixin -from app.translator.core.models.field import Field +from app.translator.core.models.field import FieldValue from app.translator.core.models.platform_details import PlatformDetails from app.translator.core.models.parser_output import SiemContainer, MetaInfoContainer @@ -75,14 +74,14 @@ def parse(self, text: str) -> SiemContainer: if key in ("product", "service", "category") } tokens = self.tokenizer.tokenize(detection=sigma_rule.get("detection")) - field_tokens = QueryTokenizer.filter_tokens(tokens, Field) + field_tokens = [token.field for token in QueryTokenizer.filter_tokens(tokens, FieldValue)] field_names = [field.source_name for field in field_tokens] - suitable_source_mappings = self.mappings.get_suitable_source_mappings(field_names=field_names, **log_sources) - QueryTokenizer.set_field_generic_names_map(field_tokens, suitable_source_mappings, self.mappings) + source_mappings = self.mappings.get_suitable_source_mappings(field_names=field_names, **log_sources) + QueryTokenizer.set_field_tokens_generic_names_map(field_tokens, source_mappings, self.mappings.default_mapping) return SiemContainer( query=tokens, meta_info=self._get_meta_info( rule=sigma_rule, - source_mapping_ids=[source_mapping.source_id for source_mapping in suitable_source_mappings] + source_mapping_ids=[source_mapping.source_id for source_mapping in source_mappings] ), ) diff --git a/translator/app/translator/platforms/sigma/renders/sigma.py b/translator/app/translator/platforms/sigma/renders/sigma.py index 2244e32f..1a8b0e3d 100644 --- a/translator/app/translator/platforms/sigma/renders/sigma.py +++ b/translator/app/translator/platforms/sigma/renders/sigma.py @@ -26,7 +26,7 @@ from app.translator.platforms.sigma.mapping import SigmaMappings, sigma_mappings, SigmaLogSourceSignature from app.translator.platforms.sigma.models.compiler import DataStructureCompiler from app.translator.core.mapping import SourceMapping, DEFAULT_MAPPING_NAME -from app.translator.core.models.field import Field, Keyword +from app.translator.core.models.field import FieldValue, Keyword from app.translator.platforms.sigma.models.group import Group from app.translator.platforms.sigma.models.operator import OR, AND, NOT from app.translator.core.models.platform_details import PlatformDetails @@ -68,7 +68,7 @@ def generate_data_structure(self, data: Any, source_mapping: SourceMapping): return self.generate_and(data, source_mapping) elif isinstance(data, NOT): return self.generate_not(data, source_mapping) - elif isinstance(data, Field): + elif isinstance(data, FieldValue): return self.generate_field(data, source_mapping) elif isinstance(data, Keyword): return self.generate_keyword(data) @@ -101,7 +101,7 @@ def generate_or(self, data: Any, source_mapping: SourceMapping): elif ( result and len(set(result.get(self.selection, [])).intersection(set(updated_node))) != 0 - and isinstance(data.items[i - 1], Field) + and isinstance(data.items[i - 1], FieldValue) and len(updated_node) == 1 and self.selection not in updated_node ): @@ -177,9 +177,9 @@ def map_field(source_mapping: SourceMapping, generic_field_name: str) -> str: field_name = source_mapping.fields_mapping.get_platform_field_name(generic_field_name) return field_name or generic_field_name - def generate_field(self, data: Field, source_mapping: SourceMapping): + def generate_field(self, data: FieldValue, source_mapping: SourceMapping): source_id = source_mapping.source_id - generic_field_name = data.generic_names_map.get(source_id) or data.source_name + generic_field_name = data.field.get_generic_field_name(source_id) or data.field.source_name field_name = self.map_field(source_mapping, generic_field_name) if data.operator.token_type not in (OperatorType.EQ, OperatorType.LT, OperatorType.LTE, OperatorType.GT, OperatorType.GTE, OperatorType.NEQ): diff --git a/translator/app/translator/platforms/sigma/tokenizer.py b/translator/app/translator/platforms/sigma/tokenizer.py index 10a4b8dd..a546dad4 100644 --- a/translator/app/translator/platforms/sigma/tokenizer.py +++ b/translator/app/translator/platforms/sigma/tokenizer.py @@ -21,10 +21,9 @@ from app.translator.platforms.sigma.models.modifiers import ModifierManager from app.translator.core.exceptions.parser import TokenizerGeneralException -from app.translator.core.models.field import Field, Keyword +from app.translator.core.models.field import FieldValue, Keyword from app.translator.core.models.identifier import Identifier from app.translator.core.custom_types.tokens import GroupType, LogicalOperatorType -from app.translator.core.tokenizer import QueryTokenizer class Selection: @@ -45,7 +44,7 @@ def __init__(self): list: self.__parse_or_selection } - def __parse_field(self, field_name: str, values: Union[int, str, List[str]]) -> Union[List, Field]: + def __parse_field(self, field_name: str, values: Union[int, str, List[str]]) -> Union[List, FieldValue]: field_name, *modifier = field_name.split("|") if "|" in field_name else (field_name, "=") return self.modifier_manager.generate(field_name=field_name, modifier=modifier, value=values) From a497b8accd62c2528c7118f2aa4639dc8727582c Mon Sep 17 00:00:00 2001 From: Oleksandr Volha Date: Mon, 8 Jan 2024 15:06:36 +0200 Subject: [PATCH 2/3] return licence text --- translator/app/translator/core/functions.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/translator/app/translator/core/functions.py b/translator/app/translator/core/functions.py index eed11f98..c947e7e2 100644 --- a/translator/app/translator/core/functions.py +++ b/translator/app/translator/core/functions.py @@ -1,3 +1,18 @@ +""" +Uncoder IO Commercial Edition License +----------------------------------------------------------------- +Copyright (c) 2023 SOC Prime, Inc. +This file is part of the Uncoder IO Commercial Edition ("CE") and is +licensed under the Uncoder IO Non-Commercial License (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + https://github.com/UncoderIO/UncoderIO/blob/main/LICENSE +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +----------------------------------------------------------------- +""" + from __future__ import annotations from abc import ABC, abstractmethod From d9404a00806f0d56761d294946344a8a5e77ea94 Mon Sep 17 00:00:00 2001 From: Oleksandr Volha Date: Mon, 8 Jan 2024 15:07:11 +0200 Subject: [PATCH 3/3] return licence text --- translator/app/translator/core/functions.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/translator/app/translator/core/functions.py b/translator/app/translator/core/functions.py index c947e7e2..2f9eda9c 100644 --- a/translator/app/translator/core/functions.py +++ b/translator/app/translator/core/functions.py @@ -2,11 +2,14 @@ Uncoder IO Commercial Edition License ----------------------------------------------------------------- Copyright (c) 2023 SOC Prime, Inc. + This file is part of the Uncoder IO Commercial Edition ("CE") and is licensed under the Uncoder IO Non-Commercial License (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at + https://github.com/UncoderIO/UncoderIO/blob/main/LICENSE + Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.