In [30]:
import re

import networkx as nx

In [31]:
import matplotlib as mpl
import matplotlib.pyplot as plt

%matplotlib inline
mpl.style.use('seaborn-muted')

In [32]:
class Token:
    
    def __init__(self, token, ignore_case=True, scrub_re='\.'):
        
        self.ignore_case = ignore_case
        self.scrub_re = scrub_re
        
        self.token = token
        self.token_clean = self._clean(token)
        
    def _clean(self, token):
        
        if self.ignore_case:
            token = token.lower()
            
        if self.scrub_re:
            token = re.sub(self.scrub_re, '', token)
            
        return token
    
    def __call__(self, input_token):
        return self._clean(input_token) == self.token_clean
    
    def __repr__(self):
        return '%s<%s>' % (self.__class__.__name__, self.token_clean)

In [78]:
g = nx.DiGraph()
g.add_edge(0, 1, accept_fn=Token('South'))
g.add_edge(1, 2, accept_fn=Token('Lake'))
g.add_edge(2, 3, accept_fn=Token('Tahoe'))
g.add_edge(3, 4, accept_fn=Token(','))
g.add_edge(4, 5)
g.add_edge(3, 5)
g.add_edge(5, 6, accept_fn=Token('CA'))
g.add_edge(5, 7, accept_fn=Token('California'))

In [79]:
class Matcher:
    
    def __init__(self, fsa):
        self.fsa = fsa
        self._states = set([0])
        self.accepted = []
        
    def step(self, start_state, token, visited=None):
        
        if not visited:
            visited = set()
            
        visited.add(start_state)
        
        next_states = set()
        for d_state, attrs in self.fsa[start_state].items():
            
            accept_fn = attrs.get('accept_fn')
            
            if accept_fn:
                if accept_fn(token):
                    next_states.add(d_state)
                    
            elif d_state not in visited: 
                next_states.update(self.step(d_state, token, visited))
                
        return next_states
        
    def __call__(self, token):
        
        next_states = set()
        for state in self._states:
            next_states.update(self.step(state, token))
            
        if next_states:
            self._states = next_states
            self.accepted.append(token)
            return True
        
        return False

In [81]:
m = Matcher(g)
print(m('South'))
print(m('Lake'))
print(m('Tahoe'))
print(m(','))
# print(m(','))
# print(m(','))
print(m('CA'))

True
True
True
True
True


In [82]:
m.accepted

['South', 'Lake', 'Tahoe', ',', 'CA']