From 0c51b46b1e5fd5c0909a7d3b3f64df5f2c3af5a2 Mon Sep 17 00:00:00 2001 From: David Dorfman Date: Mon, 6 May 2024 15:06:23 +0300 Subject: [PATCH 1/3] Ignore tabs in expression Constant expression can contain tabs as whitespace just like spaces. This commit tests for this and fixes the expression parser to ignore the tabs just like spaces. --- dissect/cstruct/expression.py | 6 +++--- tests/test_expression.py | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/dissect/cstruct/expression.py b/dissect/cstruct/expression.py index 53e1cf2..34d3ed2 100644 --- a/dissect/cstruct/expression.py +++ b/dissect/cstruct/expression.py @@ -18,7 +18,7 @@ def __init__(self, expression: str): self.pos = 0 self.tokens = [] - def equal(self, token: str, expected: Union[str, str[str]]) -> bool: + def equal(self, token: str, expected: Union[str, set[str]]) -> bool: if isinstance(expected, set): return token in expected else: @@ -42,7 +42,7 @@ def operator(self, token: str) -> bool: def match( self, func: Optional[Callable[[str], bool]] = None, - expected: Optional[str] = None, + expected: Optional[str | set[str]] = None, consume: bool = True, append: bool = True, ) -> bool: @@ -142,7 +142,7 @@ def tokenize(self) -> list[str]: self.tokens.append(">>") elif self.match(expected="<", append=False) and self.match(expected="<", append=False): self.tokens.append("<<") - elif self.match(expected=" ", append=False): + elif self.match(expected={" ", "\t"}, append=False): continue else: raise ExpressionTokenizerError( diff --git a/tests/test_expression.py b/tests/test_expression.py index c1d0fc0..1495ecf 100644 --- a/tests/test_expression.py +++ b/tests/test_expression.py @@ -11,6 +11,7 @@ ("7*8", 56), ("7 *8", 56), (" 7 * 8 ", 56), + ("\t7\t*\t8\t", 56), ("0 / 1", 0), ("1 / 1", 1), ("2 / 2", 1), From 0f5b17b3f4f977c6a8d6a4d4dd15d5921768db04 Mon Sep 17 00:00:00 2001 From: David Dorfman Date: Mon, 6 May 2024 13:15:26 +0300 Subject: [PATCH 2/3] Use union type expression in ExpressionTokenizer.equal Other places already use this syntax so use it to be consistent. Co-authored-by: Erik Schamper <1254028+Schamper@users.noreply.github.com> --- dissect/cstruct/expression.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dissect/cstruct/expression.py b/dissect/cstruct/expression.py index 34d3ed2..a4c134c 100644 --- a/dissect/cstruct/expression.py +++ b/dissect/cstruct/expression.py @@ -18,7 +18,7 @@ def __init__(self, expression: str): self.pos = 0 self.tokens = [] - def equal(self, token: str, expected: Union[str, set[str]]) -> bool: + def equal(self, token: str, expected: str | set[str]) -> bool: if isinstance(expected, set): return token in expected else: From 34ac0dbca57bab168db513d824afd3e6d219c499 Mon Sep 17 00:00:00 2001 From: David Dorfman Date: Mon, 6 May 2024 15:04:02 +0300 Subject: [PATCH 3/3] Remove unused typing.Union import --- dissect/cstruct/expression.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dissect/cstruct/expression.py b/dissect/cstruct/expression.py index a4c134c..4727f26 100644 --- a/dissect/cstruct/expression.py +++ b/dissect/cstruct/expression.py @@ -1,7 +1,7 @@ from __future__ import annotations import string -from typing import TYPE_CHECKING, Callable, Optional, Union +from typing import TYPE_CHECKING, Callable, Optional from dissect.cstruct.exceptions import ExpressionParserError, ExpressionTokenizerError