### Задание 5.4

In [1]:
import re
import typing as tp
from dataclasses import dataclass, field

In [2]:
@dataclass(frozen=True)
class TokenSpecifier:
    name: tp.Optional[str]
    pattern: str
    regex: re.Pattern = field(init=False)

    def __post_init__(self):
        object.__setattr__(self, 'regex', re.compile(self.pattern))

    def __bool__(self) -> bool:
        return self.name is not None

    def __str__(self) -> str:
        return f"TokenSpecifier(name={self.name!r}, pattern={self.pattern!r})"

In [3]:
@dataclass(frozen=True)
class TokenInstance:
    value: str
    token: TokenSpecifier
    position_start: int
    position_end: int

    def __str__(self) -> str:
        return (f"TokenInstance(token={self.token.name}, "
                f"value={self.value!r}, "
                f"start={self.position_start}, end={self.position_end})")

In [4]:
RESERVED_WORDS: set[str] = {
    "package",
    "import",
    "func",
    "var",
    "if",
    "else",
    "for",
    "return",
}

In [5]:
TOKENS: list[TokenSpecifier] = [
    TokenSpecifier(None, r"\s+"),
    TokenSpecifier(None, r"//.*"),
    TokenSpecifier(None, r"/\*[\s\S]*?\*/"),
    TokenSpecifier('STRING', r'"(?:\\.|[^"\\])*"'),
    TokenSpecifier('NUMBER', r"-?\d+(\.\d+)?([eE][-+]?\d+)?"),
    TokenSpecifier('RESERVED', r'\b(?:' + '|'.join(RESERVED_WORDS) + r')\b'),
    TokenSpecifier('IDENTIFIER', r'\b[A-Za-z_][A-Za-z0-9_]*\b'),
    TokenSpecifier('OPERATOR', r'\+|\-|\*|\/|%|==|=|!=|<=|>=|<|>|&&|\|\||!'),
    TokenSpecifier('SEPARATOR', r'\(|\)|\{|\}|\[|\]|;|,|\.|:'),
]

In [6]:
def tokenize(code: str) -> list[TokenInstance]:
    pos = 0
    tokens: list[TokenInstance] = []

    while pos < len(code):
        match_found: tp.Optional[re.Match[str]] = None
        for spec in TOKENS:
            match_found = spec.regex.match(code, pos)
            if match_found:
                value = match_found.group(0)
                if spec:
                    token_instance = TokenInstance(
                        value=value,
                        token=spec,
                        position_start=pos,
                        position_end=match_found.end()
                    )
                    tokens.append(token_instance)
                pos = match_found.end()
                break
        if not match_found:
            raise SyntaxError(f"Лексическая ошибка в позиции {pos}: {code[pos:pos + 20]!r}")

    return tokens

In [7]:
def analyze(code: str) -> None:
    try:
        tokens = tokenize(code)
        print("Лексемы:")
        for token in tokens:
            print(token)
        print("Лексический анализ завершён успешно. Ошибок нет.")
    except SyntaxError as e:
        print("Лексическая ошибка:", e)

In [8]:
test_code_1 = """package main
import "fmt"
func main() {
    var x = 42
    if x > 0 {
        fmt.Println("Positive")
    } else {
        fmt.Println("Non-positive")
    }
}"""

In [9]:
test_code_2 = """package main
func main() {
    var x = 3$14
}"""

In [10]:
test_code_3 = """package calc
func add(a int, b int) int {
    return a + b
}
func main() {
    var result = add(3.14, -2.71e-1)
    fmt.Println(result)
}"""

In [11]:
tests: list[str] = [test_code_1, test_code_2, test_code_3]

In [12]:
for i, code in enumerate(tests, 1):
    print(f"\n----- Тест {i} -----")
    analyze(code)


----- Тест 1 -----
Лексемы:
TokenInstance(token=RESERVED, value='package', start=0, end=7)
TokenInstance(token=IDENTIFIER, value='main', start=8, end=12)
TokenInstance(token=RESERVED, value='import', start=13, end=19)
TokenInstance(token=STRING, value='"fmt"', start=20, end=25)
TokenInstance(token=RESERVED, value='func', start=26, end=30)
TokenInstance(token=IDENTIFIER, value='main', start=31, end=35)
TokenInstance(token=SEPARATOR, value='(', start=35, end=36)
TokenInstance(token=SEPARATOR, value=')', start=36, end=37)
TokenInstance(token=SEPARATOR, value='{', start=38, end=39)
TokenInstance(token=RESERVED, value='var', start=44, end=47)
TokenInstance(token=IDENTIFIER, value='x', start=48, end=49)
TokenInstance(token=OPERATOR, value='=', start=50, end=51)
TokenInstance(token=NUMBER, value='42', start=52, end=54)
TokenInstance(token=RESERVED, value='if', start=59, end=61)
TokenInstance(token=IDENTIFIER, value='x', start=62, end=63)
TokenInstance(token=OPERATOR, value='>', start=64, end