In [26]:
import re

from collections import defaultdict

In [98]:
class Token:
    
    def __init__(self, token, ignore_case=True, scrub_re='\.'):
        
        self.ignore_case = ignore_case
        self.scrub_re = scrub_re
        
        self.token = token
        self.token_clean = self._clean(token)
        
    def _clean(self, token):
        
        if self.ignore_case:
            token = token.lower()
            
        if self.scrub_re:
            token = re.sub(self.scrub_re, '', token)
            
        return token
    
    def __call__(self, input_token):
        return self._clean(input_token) == self.token_clean
    
    def __repr__(self):
        return '%s<%s>' % (self.__class__.__name__, self.token_clean)

In [121]:
class Comma:
    
    def __call__(self, input_token):
        return input_token == ','
    
    def __repr__(self):
        return '<,>'

In [122]:
class Empty:
    
    def __call__(self, input_token):
        return True
    
    def __repr__(self):
        return '<ε>'

In [123]:
south = Token('south')

In [124]:
class State:
    
    def __init__(self, index, start=False, final=False):
        self.index = index
        self.start = start
        self.final = final
        
    def __hash__(self):
        return self.index
    
    def __repr__(self):
        return '%s<%d>' % (self.__class__.__name__, self.index)

In [125]:
class FSA:
    
    def __init__(self):
        self.edges = defaultdict(dict)
        self.start_state = None
        self.final_states = set()
        
    def add_edge(self, s1, s2, accept_fn, start=False, final=False):
        
        self.edges[s1][s2] = accept_fn
        
        if start:
            self.start_state = s1
            
        if final:
            self.final_states.add(s2)

In [126]:
city = FSA()
city.add_edge(0, 1, Token('South'), start=True)
city.add_edge(1, 2, Token('Lake'))
city.add_edge(2, 3, Token('Tahoe'))
city.add_edge(3, 4, Comma())
city.add_edge(3, 5, Empty())
city.add_edge(4, 5, Empty())
city.add_edge(5, 6, Token('CA'), final=True)
city.add_edge(5, 7, Token('California'), final=True)

In [127]:
city.edges

defaultdict(dict,
            {0: {1: Token<south>},
             1: {2: Token<lake>},
             2: {3: Token<tahoe>},
             3: {4: <,>, 5: <ε>},
             4: {5: <ε>},
             5: {6: Token<ca>, 7: Token<california>}})

In [128]:
city.start_state

0

In [149]:
class Acceptor:
    
    def __init__(self, fsa):
        self.fsa = fsa
        self._state = fsa.start_state
        
    def __call__(self, it):
        
        accepted = []
        for state, accept_fun in self.fsa.edges[self._state].items():
            if accept_fun(it):
                accepted.append(state)
        
        if len(accepted) == 1:
            self._state = accepted[0]
            return True

In [153]:
acc = Acceptor(city)

In [154]:
acc('south')

True

In [158]:
acc('lake')

In [156]:
acc('tahoe')

True