In [27]:
import re
from collections import namedtuple

In [98]:
Token = namedtuple(
    'Token', 
    ['type', 'data', 'lineno']
)

In [101]:
regex_map = [
    ('NUMBER', r'[0-9]+(\.[0-9]*)?'),
    ('NAME', r'[a-zA-Z_][a-zA-Z_0-9]*'),
    ('OP', r'\+|\-|\*|\/'),
    ('LPAR', r'\('),
    ('RPAR', r'\)'),
    ('EQ', r'='),
    ('NEWLINE', r'\n'),
    ('SPACE', r'\s+'),
    ('ANYTHING', r'.+'),
]

test1 = 'pi = 3.141516'
test2 = '1 / n + (42 * x)'
test3 = 'pi = 3,14'

In [102]:
template = r'(?P<{name}>{regex})'
REGEX_ALL = '|'.join(
    template.format(name=name, regex=regex)
    for (name, regex) in regex_map
)
re_all = re.compile(REGEX_ALL)

In [103]:
source = '\n'.join([test1, test2, test3])

def lexer(source):
    lineno = 1
    for m in re_all.finditer(source):
        type_ = m.lastgroup
        if type_ == 'SPACE':
            continue
        elif type_ == 'NEWLINE':
            lineno += 1
            continue 
        i, j = m.span()
        data = m.string[i:j]
        
        yield Token(type_, data, lineno)

list(lexer(source))

[Token(type='NAME', data='pi', lineno=1),
 Token(type='EQ', data='=', lineno=1),
 Token(type='NUMBER', data='3.141516', lineno=1),
 Token(type='NUMBER', data='1', lineno=2),
 Token(type='OP', data='/', lineno=2),
 Token(type='NAME', data='n', lineno=2),
 Token(type='OP', data='+', lineno=2),
 Token(type='LPAR', data='(', lineno=2),
 Token(type='NUMBER', data='42', lineno=2),
 Token(type='OP', data='*', lineno=2),
 Token(type='NAME', data='x', lineno=2),
 Token(type='RPAR', data=')', lineno=2),
 Token(type='NAME', data='pi', lineno=3),
 Token(type='EQ', data='=', lineno=3),
 Token(type='NUMBER', data='3', lineno=3),
 Token(type='ANYTHING', data=',14', lineno=3)]