### Задание 5.4

In [1]:
import re
import typing as tp
from dataclasses import dataclass, field

In [2]:
@dataclass(frozen=True)
class TokenSpecifier:
    name: tp.Optional[str]
    pattern: str
    regex: re.Pattern = field(init=False)

    def __post_init__(self):
        object.__setattr__(self, 'regex', re.compile(self.pattern))

    def __bool__(self) -> bool:
        return self.name is not None

    def __str__(self) -> str:
        return f"TokenSpecifier(name={self.name!r}, pattern={self.pattern!r})"

In [3]:
@dataclass(frozen=True)
class TokenInstance:
    value: str
    token: TokenSpecifier
    position_start: int
    position_end: int

    def __str__(self) -> str:
        return (f"TokenInstance(token={self.token.name}, "
                f"value={self.value!r}, "
                f"start={self.position_start}, end={self.position_end})")

In [4]:
RESERVED_WORDS: set[str] = {
    "package",
    "import",
    "func",
    "var",
    "if",
    "else",
    "for",
    "return",
}

In [5]:
TOKENS: list[TokenSpecifier] = [
    TokenSpecifier(None, r"\s+"),
    TokenSpecifier(None, r"//.*"),
    TokenSpecifier(None, r"/\*[\s\S]*?\*/"),
    TokenSpecifier('STRING', r'"(?:\\.|[^"\\])*"'),
    TokenSpecifier('NUMBER', r"-?\d+(\.\d+)?([eE][-+]?\d+)?"),
    TokenSpecifier('RESERVED', r'\b(?:' + '|'.join(RESERVED_WORDS) + r')\b'),
    TokenSpecifier('IDENTIFIER', r'\b[A-Za-z_][A-Za-z0-9_]*\b'),
    TokenSpecifier('OPERATOR', r'\+|\-|\*|\/|%|==|=|!=|<=|>=|<|>|&&|\|\||!'),
    TokenSpecifier('SEPARATOR', r'\(|\)|\{|\}|\[|\]|;|,|\.|:'),
]

In [6]:
def tokenize(code: str) -> list[TokenInstance]:
    pos = 0
    tokens: list[TokenInstance] = []

    while pos < len(code):
        match_found: tp.Optional[re.Match[str]] = None
        for spec in TOKENS:
            match_found = spec.regex.match(code, pos)
            if match_found:
                value = match_found.group(0)
                if spec:
                    token_instance = TokenInstance(
                        value=value,
                        token=spec,
                        position_start=pos,
                        position_end=match_found.end()
                    )
                    tokens.append(token_instance)
                pos = match_found.end()
                break
        if not match_found:
            raise SyntaxError(f"Лексическая ошибка в позиции {pos}: {code[pos:pos + 20]!r}")

    return tokens

In [7]:
def analyze(code: str) -> None:
    try:
        tokens = tokenize(code)
        print("Лексемы:")
        for token in tokens:
            print(token)
        print("Лексический анализ завершён успешно. Ошибок нет.")
    except SyntaxError as e:
        print("Лексическая ошибка:", e)

In [8]:
test_code_1 = """package main
import "fmt"
func main() {
    var x = 42
    if x > 0 {
        fmt.Println("Positive")
    } else {
        fmt.Println("Non-positive")
    }
}"""

In [9]:
test_code_2 = """package main
func main() {
    var x = 3$14
}"""

In [10]:
test_code_3 = """package calc
func add(a int, b int) int {
    return a + b
}
func main() {
    var result = add(3.14, -2.71e-1)
    fmt.Println(result)
}"""

In [11]:
tests: list[str] = [test_code_1, test_code_2, test_code_3]

In [12]:
for i, code in enumerate(tests, 1):
    print(f"\n----- Тест {i} -----")
    analyze(code)


----- Тест 1 -----
Лексемы:
TokenInstance(token=RESERVED, value='package', start=0, end=7)
TokenInstance(token=IDENTIFIER, value='main', start=8, end=12)
TokenInstance(token=RESERVED, value='import', start=13, end=19)
TokenInstance(token=STRING, value='"fmt"', start=20, end=25)
TokenInstance(token=RESERVED, value='func', start=26, end=30)
TokenInstance(token=IDENTIFIER, value='main', start=31, end=35)
TokenInstance(token=SEPARATOR, value='(', start=35, end=36)
TokenInstance(token=SEPARATOR, value=')', start=36, end=37)
TokenInstance(token=SEPARATOR, value='{', start=38, end=39)
TokenInstance(token=RESERVED, value='var', start=44, end=47)
TokenInstance(token=IDENTIFIER, value='x', start=48, end=49)
TokenInstance(token=OPERATOR, value='=', start=50, end=51)
TokenInstance(token=NUMBER, value='42', start=52, end=54)
TokenInstance(token=RESERVED, value='if', start=59, end=61)
TokenInstance(token=IDENTIFIER, value='x', start=62, end=63)
TokenInstance(token=OPERATOR, value='>', start=64, end

### Задание 6.1

In [13]:
class Parser:
    _OPERATORS: set[str] = {
        "+", "-", "*", "/", "%",
        "<", ">", "<=", ">=", "==", "!="
    }

    _STATEMENT_SEPARATOR: str = ";"

    def __init__(self, tokens: list[TokenInstance]):
        self._tokens = tokens
        self._pos = 0
        self._valid_count = 0

    @property
    def valid_count(self) -> int:
        return self._valid_count

    def current_token(self) -> tp.Optional[TokenInstance]:
        if self._pos < len(self._tokens):
            return self._tokens[self._pos]
        return None

    def peek_token(self) -> tp.Optional[TokenInstance]:
        if self._pos + 1 < len(self._tokens):
            return self._tokens[self._pos + 1]
        return None

    def consume(
        self,
        expected_value: tp.Optional[str] = None,
        expected_type: tp.Optional[str] = None
    ) -> TokenInstance:
        token = self.current_token()
        if token is None:
            raise SyntaxError("Неожиданный конец входа")

        if expected_value is not None and token.value != expected_value:
            raise SyntaxError(
                f"Ожидалось {expected_value!r}, получено {token.value!r} "
                f"на позиции {token.position_start}"
            )

        if expected_type is not None and token.token.name != expected_type:
            raise SyntaxError(
                f"Ожидался токен типа {expected_type!r}, "
                f"получен {token.token.name!r} на позиции {token.position_start}"
            )

        self._pos += 1
        return token

    def parse_program(self) -> None:
        while self.current_token() is not None:
            self.parse_statement()
            self._valid_count += 1

    def parse_statement(self) -> None:
        token = self.current_token()
        if token is None:
            raise SyntaxError("Неожиданный конец входа в parse_statement")

        if token.token.name == "RESERVED" and token.value == "if":
            self.parse_if()
        elif token.token.name == "RESERVED" and token.value == "for":
            self.parse_for()
        elif token.token.name == "IDENTIFIER":
            peek_token = self.peek_token()
            if peek_token and peek_token.token.name == "OPERATOR" and peek_token.value == "=":
                self.parse_assignment()
            else:
                self.parse_expression()
        else:
            self.parse_expression()

    def parse_assignment(self, *, consume_separator: bool = True) -> None:
        self.consume(expected_type="IDENTIFIER")
        op = self.consume(expected_type="OPERATOR")
        if op.value != "=":
            raise SyntaxError(
                f"Ожидалось '=', получено {op.value} на позиции {op.position_start}"
            )

        self.parse_expression()

        if consume_separator:
            maybe_sep = self.current_token()
            if (maybe_sep and maybe_sep.token.name == "SEPARATOR"
                and maybe_sep.value == self._STATEMENT_SEPARATOR):
                self.consume(expected_value=self._STATEMENT_SEPARATOR)

    def parse_if(self) -> None:
        self.consume(expected_value="if")
        self.parse_expression()
        self.parse_block()

        token = self.current_token()
        if token and token.token.name == "RESERVED" and token.value == "else":
            self.consume(expected_value="else")
            self.parse_block()

    def parse_for(self) -> None:
        self.consume(expected_value="for")
        token = self.current_token()

        if token and token.token.name == "IDENTIFIER":
            peek_token = self.peek_token()
            if peek_token and peek_token.token.name == "OPERATOR" and peek_token.value == "=":
                self.parse_assignment(consume_separator=False)
                self.consume(expected_value=self._STATEMENT_SEPARATOR)
                self.parse_expression()
                self.consume(expected_value=self._STATEMENT_SEPARATOR)
                self.parse_assignment()
            else:
                self.parse_expression()
        else:
            self.parse_expression()

        self.parse_block()

    def parse_block(self) -> None:
        self.consume(expected_value="{")
        while True:
            token = self.current_token()
            if token is None or token.value == "}":
                break
            self.parse_statement()
        self.consume(expected_value="}")

    def parse_expression(self) -> None:
        self.parse_term()
        while True:
            token = self.current_token()
            if (token and token.token.name == "OPERATOR"
                and token.value in self._OPERATORS):
                self.consume()
                self.parse_term()
            else:
                break

    def parse_term(self) -> None:
        token = self.current_token()
        if token is None:
            raise SyntaxError("Неожиданный конец выражения")

        if token.value == "(":
            self.consume(expected_value="(")
            self.parse_expression()
            self.consume(expected_value=")")
        elif token.token.name in {"NUMBER", "IDENTIFIER", "STRING"}:
            self.consume()
        else:
            raise SyntaxError(
                f"Неожиданный токен {token.value!r} на позиции {token.position_start}"
            )

In [14]:
def syntactic_analyze(code: str) -> None:
    try:
        tokens = tokenize(code)
        parser = Parser(tokens)
        parser.parse_program()
        print(f"Синтаксический анализ завершён успешно.")
    except SyntaxError as e:
        print("Синтаксическая ошибка:", e)

In [15]:
test_code_1 = """if x + 5 > 0 {
    x = x + 1;
    if x - 3 < 4 {
        x = x + 2;
    } else {
        x = x - 2;
    }
} else {
    y = 0;
}"""

In [16]:
test_code_2 = """for i = 10; i > 0; i = i - 1 {
    sum = sum + i;
}
for x + 1 > 0 {
    x = x + 2;
}"""

In [17]:
test_code_3 = """if a + 5 > 0 {
    a = a + 2;
else {
    a = a + 3;
}"""

In [18]:
tests: list[str] = [test_code_1, test_code_2, test_code_3]

In [19]:
for i, code in enumerate(tests, 1):
    print(f"\n----- Синтаксический тест {i} -----")
    syntactic_analyze(code)


----- Синтаксический тест 1 -----
Синтаксический анализ завершён успешно.

----- Синтаксический тест 2 -----
Синтаксический анализ завершён успешно.

----- Синтаксический тест 3 -----
Синтаксическая ошибка: Неожиданный токен 'else' на позиции 30
