In [1]:
# 1. Define Token Patterns
import re

NAME = r'(?P<NAME>[a-zA-Z_][a-zA-Z_0-9]*)'
NUM = r'(?P<NUM>\d+)'
PLUS = r'(?P<PLUS>\+)'
TIMES = r'(?P<TIMES>\*)'
EQ = r'(?P<EQ>=)'
WS = r'(?P<WS>\s+)'

In [3]:
master_pat = re.compile('|'.join([NAME, NUM, PLUS, TIMES, EQ, WS]))
master_pat

re.compile(r'(?P<NAME>[a-zA-Z_][a-zA-Z_0-9]*)|(?P<NUM>\d+)|(?P<PLUS>\+)|(?P<TIMES>\*)|(?P<EQ>=)|(?P<WS>\s+)',
           re.UNICODE)

In [7]:
scanner = master_pat.scanner('foo = 42')
scanner

<_sre.SRE_Scanner at 0x7f9c701fc040>

In [5]:
match = scanner.match()
if match:
    print(match.lastgroup, match.group())

NAME foo


In [8]:
from collections import namedtuple

Token = namedtuple('Token', ['type', 'value'])

def generate_tokens(pat, text):
    scanner = pat.scanner(text)
    for m in iter(scanner.match, None):
        yield Token(m.lastgroup, m.group())

# Example use
for tok in generate_tokens(master_pat, 'foo = 42'):
    print(tok)

Token(type='NAME', value='foo')
Token(type='WS', value=' ')
Token(type='EQ', value='=')
Token(type='WS', value=' ')
Token(type='NUM', value='42')


In [9]:
tokens = (tok for tok in generate_tokens(master_pat, 'foo = 42') if tok.type != 'WS')
for tok in tokens:
    print(tok)

Token(type='NAME', value='foo')
Token(type='EQ', value='=')
Token(type='NUM', value='42')
