In [2]:
import re

In [3]:
# Token types
TOKEN_TYPES = {
    'NUMBER': r'\d+',
    'IDENTIFIER': r'[a-zA-Z_][a-zA-Z0-9_]*',
    'OPERATOR': r'[+\-*/]',
    'PARENTHESIS': r'[()\[\]{}]',
    'WHITESPACE': r'\s+'
}

In [4]:
# Token class
class Token:
    def __init__(self, type, value):
        self.type = type
        self.value = value

    def __str__(self):
        return f'Token({self.type}, {self.value})'

In [5]:
# Lexical analyzer class
class Lexer:
    def __init__(self, source_code):
        self.source_code = source_code
        self.position = 0
        self.tokens = []

    def tokenize(self):
        while self.position < len(self.source_code):
            match = None
            for token_type, pattern in TOKEN_TYPES.items():
                regex = re.compile(pattern)
                match = regex.match(self.source_code, self.position)
                if match:
                    value = match.group(0)
                    token = Token(token_type, value)
                    self.tokens.append(token)
                    self.position = match.end()
                    break

            if not match:
                print(f"Unexpected character: {self.source_code[self.position]}")
                self.position += 1

        return self.tokens

In [6]:
# Example usage
if __name__ == "__main__":
    # source_code = "x = 10 + 20 * (30 - 5)"
    source_code = "x = 10 + 20 * (30 - 5) $%^&"
    lexer = Lexer(source_code)
    tokens = lexer.tokenize()

    for token in tokens:
        print(token)

Unexpected character: =
Unexpected character: $
Unexpected character: %
Unexpected character: ^
Unexpected character: &
Token(IDENTIFIER, x)
Token(WHITESPACE,  )
Token(WHITESPACE,  )
Token(NUMBER, 10)
Token(WHITESPACE,  )
Token(OPERATOR, +)
Token(WHITESPACE,  )
Token(NUMBER, 20)
Token(WHITESPACE,  )
Token(OPERATOR, *)
Token(WHITESPACE,  )
Token(PARENTHESIS, ()
Token(NUMBER, 30)
Token(WHITESPACE,  )
Token(OPERATOR, -)
Token(WHITESPACE,  )
Token(NUMBER, 5)
Token(PARENTHESIS, ))
Token(WHITESPACE,  )
