Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions translator/app/translator/core/custom_types/values.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
from app.translator.tools.custom_enum import CustomEnum


class ValueType(CustomEnum):
value = "value"
number_value = "num_value"
double_quotes_value = "d_q_value"
single_quotes_value = "s_q_value"
back_quotes_value = "b_q_value"
no_quotes_value = "no_q_value"
bool_value = "bool_value"
regular_expression_value = "re_value"
greater_than_or_equal = "gte_value"
less_than_or_equal = "lte_value"
24 changes: 24 additions & 0 deletions translator/app/translator/core/escape_manager.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import re
from abc import ABC
from typing import Union

from app.translator.core.custom_types.values import ValueType
from app.translator.core.models.escape_details import EscapeDetails


class EscapeManager(ABC):
escape_map: dict[str, EscapeDetails] = {}

def escape(self, value: Union[str, int], value_type: str = ValueType.value) -> Union[str, int]:
if isinstance(value, int):
return value
if escape_details := self.escape_map.get(value_type):
symbols_pattern = re.compile(escape_details.pattern)
value = symbols_pattern.sub(escape_details.escape_symbols, value)
return value

def remove_escape(self, value: Union[str, int]) -> Union[str, int]:
if isinstance(value, int):
return value
value = value.encode().decode("unicode_escape")
return value
7 changes: 7 additions & 0 deletions translator/app/translator/core/models/escape_details.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
from dataclasses import dataclass


@dataclass
class EscapeDetails:
pattern: str = None
escape_symbols: str = "\\\\\g<1>"
7 changes: 7 additions & 0 deletions translator/app/translator/core/render.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
from typing import Union, List, Dict

from app.translator.const import DEFAULT_VALUE_TYPE
from app.translator.core.custom_types.values import ValueType
from app.translator.core.escape_manager import EscapeManager
from app.translator.core.exceptions.core import NotImplementedException, StrictPlatformException
from app.translator.core.exceptions.parser import UnsupportedOperatorException
from app.translator.core.functions import PlatformFunctions
Expand All @@ -34,6 +36,7 @@

class BaseQueryFieldValue(ABC):
details: PlatformDetails = None
escape_manager: EscapeManager = None

def __init__(self, or_token):
self.field_value = {
Expand Down Expand Up @@ -84,6 +87,10 @@ def regex_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str:
def keywords(self, field: str, value: DEFAULT_VALUE_TYPE) -> str:
raise NotImplementedException

def apply_value(self, value: Union[str, int], value_type: str = ValueType.value) -> Union[str, int]:
updated_value = self.escape_manager.escape(value, value_type)
return updated_value

def apply_field_value(self, field, operator, value):
if modifier_function := self.field_value.get(operator.token_type):
return modifier_function(field, value)
Expand Down
5 changes: 4 additions & 1 deletion translator/app/translator/core/tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
import re
from typing import Tuple, Union, List, Any, Optional, Type

from app.translator.core.custom_types.values import ValueType
from app.translator.core.escape_manager import EscapeManager
from app.translator.core.exceptions.parser import (
UnsupportedOperatorException,
TokenizerGeneralException,
Expand Down Expand Up @@ -60,6 +62,7 @@ class QueryTokenizer(BaseTokenizer):

multi_value_delimiter = ","
wildcard_symbol = None
escape_manager: EscapeManager = None

def __init_subclass__(cls, **kwargs):
cls._validate_re_patterns()
Expand Down Expand Up @@ -100,7 +103,7 @@ def search_operator(self, query, field_name) -> str:
return operator

def get_operator_and_value(self, match: re.Match, operator: str = OperatorType.EQ) -> Tuple[str, Any]:
return operator, get_match_group(match, group_name='value')
return operator, get_match_group(match, group_name=ValueType.value)

@staticmethod
def clean_multi_value(value: Union[int, str]) -> Union[int, str]:
Expand Down
15 changes: 8 additions & 7 deletions translator/app/translator/platforms/athena/tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import re
from typing import Tuple, Any

from app.translator.core.custom_types.values import ValueType
from app.translator.core.models.identifier import Identifier
from app.translator.core.tokenizer import QueryTokenizer
from app.translator.core.custom_types.tokens import OperatorType
Expand All @@ -41,11 +42,11 @@ class AthenaTokenizer(QueryTokenizer):
}

field_pattern = r'(?P<field_name>"[a-zA-Z\._\-\s]+"|[a-zA-Z\._\-]+)'
num_value_pattern = r"(?P<num_value>\d+(?:\.\d+)*)\s*"
bool_value_pattern = r"(?P<bool_value>true|false)\s*"
single_quotes_value_pattern = r"""'(?P<s_q_value>(?:[:a-zA-Z\*0-9=+%#\-\/\\,_".$&^@!\(\)\{\}\s]|'')*)'"""
num_value_pattern = fr"(?P<{ValueType.number_value}>\d+(?:\.\d+)*)\s*"
bool_value_pattern = fr"(?P<{ValueType.bool_value}>true|false)\s*"
single_quotes_value_pattern = fr"""'(?P<{ValueType.single_quotes_value}>(?:[:a-zA-Z\*0-9=+%#\-\/\\,_".$&^@!\(\)\{{\}}\s]|'')*)'"""
_value_pattern = fr"{num_value_pattern}|{bool_value_pattern}|{single_quotes_value_pattern}"
multi_value_pattern = r"""\((?P<value>\d+(?:,\s*\d+)*|'(?:[:a-zA-Z\*0-9=+%#\-\/\\,_".$&^@!\(\)\{\}\s]|'')*'(?:,\s*'(?:[:a-zA-Z\*0-9=+%#\-\/\\,_".$&^@!\(\)\{\}\s]|'')*')*)\)"""
multi_value_pattern = fr"""\((?P<{ValueType.value}>\d+(?:,\s*\d+)*|'(?:[:a-zA-Z\*0-9=+%#\-\/\\,_".$&^@!\(\)\{{\}}\s]|'')*'(?:,\s*'(?:[:a-zA-Z\*0-9=+%#\-\/\\,_".$&^@!\(\)\{{\}}\s]|'')*')*)\)"""

wildcard_symbol = "%"

Expand All @@ -54,13 +55,13 @@ def should_process_value_wildcard_symbols(operator: str) -> bool:
return operator.lower() in ("like",)

def get_operator_and_value(self, match: re.Match, operator: str = OperatorType.EQ) -> Tuple[str, Any]:
if (num_value := get_match_group(match, group_name='num_value')) is not None:
if (num_value := get_match_group(match, group_name=ValueType.number_value)) is not None:
return operator, num_value

elif (bool_value := get_match_group(match, group_name='bool_value')) is not None:
elif (bool_value := get_match_group(match, group_name=ValueType.bool_value)) is not None:
return operator, bool_value

elif (s_q_value := get_match_group(match, group_name='s_q_value')) is not None:
elif (s_q_value := get_match_group(match, group_name=ValueType.single_quotes_value)) is not None:
return operator, s_q_value

return super().get_operator_and_value(match, operator)
Expand Down
12 changes: 12 additions & 0 deletions translator/app/translator/platforms/base/lucene/escape_manager.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
from app.translator.core.custom_types.values import ValueType
from app.translator.core.escape_manager import EscapeManager
from app.translator.core.models.escape_details import EscapeDetails


class LuceneEscapeManager(EscapeManager):
escape_map = {
ValueType.value: EscapeDetails(pattern=r'([_!@#$%^&*=+()\[\]{}|;:\'",.<>?/`~\-\s\\])', escape_symbols=r"\\\1")
}


lucene_escape_manager = LuceneEscapeManager()
23 changes: 11 additions & 12 deletions translator/app/translator/platforms/base/lucene/renders/lucene.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,16 +21,15 @@
from app.translator.const import DEFAULT_VALUE_TYPE
from app.translator.core.render import BaseQueryRender
from app.translator.core.render import BaseQueryFieldValue
from app.translator.platforms.base.lucene.escape_manager import lucene_escape_manager


class LuceneFieldValue(BaseQueryFieldValue):

def apply_value(self, value: Union[str, int]):
return value
escape_manager = lucene_escape_manager

def equal_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str:
if isinstance(value, list):
values = self.or_token.join(self.apply_value(f'{v}') for v in value)
values = self.or_token.join(f'{self.apply_value(v)}' for v in value)
return f"{field}:({values})"
return f'{field}:{self.apply_value(value)}'

Expand All @@ -48,29 +47,29 @@ def greater_or_equal_modifier(self, field: str, value: Union[int, str]) -> str:

def not_equal_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str:
if isinstance(value, list):
values = self.or_token.join(self.apply_value(f'{v}') for v in value)
values = self.or_token.join(f'{self.apply_value(v)}' for v in value)
return f"NOT ({field} = ({values})"
return f'NOT ({field} = {self.apply_value(value)})'

def contains_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str:
if isinstance(value, list):
values = self.or_token.join(self.apply_value(f'*{v}*') for v in value)
values = self.or_token.join(f'*{self.apply_value(v)}*' for v in value)
return f"{field}:({values})"
prepared_value = self.apply_value(f"*{value}*")
prepared_value = f"*{self.apply_value(value)}*"
return f'{field}:{prepared_value}'

def endswith_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str:
if isinstance(value, list):
values = self.or_token.join(self.apply_value(f'*{v}') for v in value)
values = self.or_token.join(f'*{self.apply_value(v)}' for v in value)
return f"{field}:({values})"
prepared_value = self.apply_value(f"*{value}")
prepared_value = f"*{self.apply_value(value)}"
return f'{field}:{prepared_value}'

def startswith_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str:
if isinstance(value, list):
values = self.or_token.join(self.apply_value(f'{v}*') for v in value)
values = self.or_token.join(f'{self.apply_value(v)}*' for v in value)
return f"{field}:({values})"
prepared_value = self.apply_value(f"{value}*")
prepared_value = f"{self.apply_value(value)}*"
return f'{field}:{prepared_value}'

def regex_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str:
Expand All @@ -81,7 +80,7 @@ def regex_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str:
def keywords(self, field: str, value: DEFAULT_VALUE_TYPE) -> str:
if isinstance(value, list):
return f"({self.or_token.join(self.keywords(field=field, value=v) for v in value)})"
return self.apply_value(f"*{value}*")
return f"*{self.apply_value(value)}*"


class LuceneQueryRender(BaseQueryRender):
Expand Down
32 changes: 18 additions & 14 deletions translator/app/translator/platforms/base/lucene/tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,14 @@

from typing import Tuple, Union, List, Any

from app.translator.core.custom_types.values import ValueType
from app.translator.core.exceptions.parser import TokenizerGeneralException
from app.translator.core.mixins.logic import ANDLogicOperatorMixin
from app.translator.core.models.field import Keyword, Field
from app.translator.core.models.identifier import Identifier
from app.translator.core.tokenizer import QueryTokenizer
from app.translator.core.custom_types.tokens import OperatorType
from app.translator.platforms.base.lucene.escape_manager import lucene_escape_manager
from app.translator.tools.utils import get_match_group


Expand All @@ -41,19 +43,21 @@ class LuceneTokenizer(QueryTokenizer, ANDLogicOperatorMixin):
field_pattern = r"(?P<field_name>[a-zA-Z\.\-_]+)"
match_operator_pattern = r"(?:___field___\s*(?P<match_operator>:\[\*\sTO|:\[|:<|:>|:))\s*"
_num_value_pattern = r"\d+(?:\.\d+)*"
num_value_pattern = fr"(?P<num_value>{_num_value_pattern})\s*"
double_quotes_value_pattern = r'"(?P<d_q_value>(?:[:a-zA-Z\*0-9=+%#\-_/,\'\.$&^@!\(\)\{\}\s]|\\\"|\\)*)"\s*'
no_quotes_value_pattern = r"(?P<n_q_value>(?:[a-zA-Z\*0-9=%#_/,\'\.$@]|\\\"|\\\\)+)\s*"
re_value_pattern = r"/(?P<re_value>[:a-zA-Z\*0-9=+%#\\\-_\,\"\'\.$&^@!\(\)\{\}\[\]\s?]+)/\s*"
gte_value_pattern = fr"\[\s*(?P<gte_value>{_num_value_pattern})\s+TO\s+\*\s*\]"
lte_value_pattern = fr"\[\s*\*\s+TO\s+(?P<lte_value>{_num_value_pattern})\s*\]"
num_value_pattern = fr"(?P<{ValueType.number_value}>{_num_value_pattern})\s*"
double_quotes_value_pattern = fr'"(?P<{ValueType.double_quotes_value}>(?:[:a-zA-Z\*0-9=+%#\-_/,\'\.$&^@!\(\)\{{\}}\s]|\\\"|\\)*)"\s*'
no_quotes_value_pattern = fr"(?P<{ValueType.no_quotes_value}>(?:[a-zA-Z\*0-9=%#_/,\'\.$@]|\\\"|\\\\)+)\s*"
re_value_pattern = fr"/(?P<{ValueType.regular_expression_value}>[:a-zA-Z\*0-9=+%#\\\-_\,\"\'\.$&^@!\(\)\{{\}}\[\]\s?]+)/\s*"
gte_value_pattern = fr"\[\s*(?P<{ValueType.greater_than_or_equal}>{_num_value_pattern})\s+TO\s+\*\s*\]"
lte_value_pattern = fr"\[\s*\*\s+TO\s+(?P<{ValueType.less_than_or_equal}>{_num_value_pattern})\s*\]"
range_value_pattern = fr"{gte_value_pattern}|{lte_value_pattern}"
_value_pattern = fr"{num_value_pattern}|{re_value_pattern}|{no_quotes_value_pattern}|{double_quotes_value_pattern}|{range_value_pattern}"
keyword_pattern = r"(?P<n_q_value>(?:[a-zA-Z\*0-9=%#_/,\'\.$@]|\\\"|\\\(|\\\)|\\\[|\\\]|\\\{|\\\}|\\\:|\\)+)(?:\s+|\)|$)"
keyword_pattern = fr"(?P<{ValueType.no_quotes_value}>(?:[a-zA-Z\*0-9=%#_/,\'\.$@]|\\\"|\\\(|\\\)|\\\[|\\\]|\\\{{|\\\}}|\\\:|\\)+)(?:\s+|\)|$)"

multi_value_pattern = r"""\((?P<value>[:a-zA-Z\"\*0-9=+%#\-_\/\\'\,.&^@!\(\[\]\s]+)\)"""
multi_value_pattern = fr"""\((?P<{ValueType.value}>[:a-zA-Z\"\*0-9=+%#\-_\/\\'\,.&^@!\(\[\]\s]+)\)"""
multi_value_check_pattern = r"___field___\s*___operator___\s*\("

escape_manager = lucene_escape_manager

wildcard_symbol = "*"

@staticmethod
Expand All @@ -69,22 +73,22 @@ def clean_quotes(value: Union[str, int]):
return value

def get_operator_and_value(self, match: re.Match, operator: str = OperatorType.EQ) -> Tuple[str, Any]:
if (num_value := get_match_group(match, group_name='num_value')) is not None:
if (num_value := get_match_group(match, group_name=ValueType.number_value)) is not None:
return operator, num_value

elif (re_value := get_match_group(match, group_name='re_value')) is not None:
elif (re_value := get_match_group(match, group_name=ValueType.regular_expression_value)) is not None:
return OperatorType.REGEX, re_value

elif (n_q_value := get_match_group(match, group_name='n_q_value')) is not None:
elif (n_q_value := get_match_group(match, group_name=ValueType.no_quotes_value)) is not None:
return operator, n_q_value

elif (d_q_value := get_match_group(match, group_name='d_q_value')) is not None:
elif (d_q_value := get_match_group(match, group_name=ValueType.double_quotes_value)) is not None:
return operator, d_q_value

elif (gte_value := get_match_group(match, group_name='gte_value')) is not None:
elif (gte_value := get_match_group(match, group_name=ValueType.greater_than_or_equal)) is not None:
return OperatorType.GTE, gte_value

elif (lte_value := get_match_group(match, group_name='lte_value')) is not None:
elif (lte_value := get_match_group(match, group_name=ValueType.less_than_or_equal)) is not None:
return OperatorType.LTE, lte_value

return super().get_operator_and_value(match, operator)
Expand Down
12 changes: 12 additions & 0 deletions translator/app/translator/platforms/base/spl/escape_manager.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
from app.translator.core.custom_types.values import ValueType
from app.translator.core.escape_manager import EscapeManager
from app.translator.core.models.escape_details import EscapeDetails


class SplEscapeManager(EscapeManager):
escape_map = {
ValueType.value: EscapeDetails(pattern='("|(?<!\\\\)\\\\(?![*?\\\\]))')
}


spl_escape_manager = SplEscapeManager()
22 changes: 12 additions & 10 deletions translator/app/translator/platforms/base/spl/renders/spl.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,51 +21,53 @@
from app.translator.const import DEFAULT_VALUE_TYPE
from app.translator.core.exceptions.render import UnsupportedRenderMethod
from app.translator.core.render import BaseQueryRender, BaseQueryFieldValue
from app.translator.platforms.base.spl.escape_manager import spl_escape_manager


class SplFieldValue(BaseQueryFieldValue):
escape_manager = spl_escape_manager

def equal_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str:
if isinstance(value, list):
return f"({self.or_token.join([self.equal_modifier(field=field, value=v) for v in value])})"
return f'{field}="{value}"'
return f'{field}="{self.apply_value(value)}"'

def less_modifier(self, field: str, value: Union[int, str]) -> str:
return f'{field}<"{value}"'
return f'{field}<"{self.apply_value(value)}"'

def less_or_equal_modifier(self, field: str, value: Union[int, str]) -> str:
return f'{field}<="{value}"'
return f'{field}<="{self.apply_value(value)}"'

def greater_modifier(self, field: str, value: Union[int, str]) -> str:
return f'{field}>"{value}"'
return f'{field}>"{self.apply_value(value)}"'

def greater_or_equal_modifier(self, field: str, value: Union[int, str]) -> str:
return f'{field}>="{value}"'
return f'{field}>="{self.apply_value(value)}"'

def not_equal_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str:
if isinstance(value, list):
return f"({self.or_token.join([self.not_equal_modifier(field=field, value=v) for v in value])})"
return f'{field}!="{value}"'
return f'{field}!="{self.apply_value(value)}"'

def contains_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str:
if isinstance(value, list):
return f"({self.or_token.join([self.contains_modifier(field=field, value=v) for v in value])})"
return f'{field}="*{value}*"'
return f'{field}="*{self.apply_value(value)}*"'

def endswith_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str:
if isinstance(value, list):
return f"({self.or_token.join([self.endswith_modifier(field=field, value=v) for v in value])})"
return f'{field}="*{value}"'
return f'{field}="*{self.apply_value(value)}"'

def startswith_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str:
if isinstance(value, list):
return f"({self.or_token.join([self.startswith_modifier(field=field, value=v) for v in value])})"
return f'{field}="{value}*"'
return f'{field}="{self.apply_value(value)}*"'

def keywords(self, field: str, value: DEFAULT_VALUE_TYPE) -> str:
if isinstance(value, list):
return f"({self.or_token.join(self.keywords(field=field, value=v) for v in value)})"
return f'"{value}"'
return f'"{self.apply_value(value)}"'

def regex_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str:
raise UnsupportedRenderMethod(platform_name=self.details.name, method="Regex Expression")
Expand Down
Loading