In [4]:
import re

import networkx as nx

In [10]:
import matplotlib as mpl
import matplotlib.pyplot as plt

%matplotlib inline
mpl.style.use('seaborn-muted')

In [5]:
class Token:
    
    def __init__(self, token, ignore_case=True, scrub_re='\.'):
        
        self.ignore_case = ignore_case
        self.scrub_re = scrub_re
        
        self.token = token
        self.token_clean = self._clean(token)
        
    def _clean(self, token):
        
        if self.ignore_case:
            token = token.lower()
            
        if self.scrub_re:
            token = re.sub(self.scrub_re, '', token)
            
        return token
    
    def __call__(self, input_token):
        return self._clean(input_token) == self.token_clean
    
    def __repr__(self):
        return '%s<%s>' % (self.__class__.__name__, self.token_clean)

In [8]:
g = nx.DiGraph()
g.add_edge(0, 1, accept_fn=Token('South'))
g.add_edge(1, 2, accept_fn=Token('Lake'))
g.add_edge(2, 3, accept_fn=Token('Tahoe'))
g.add_edge(3, 4, accept_fn=Token(','))
g.add_edge(4, 5)
g.add_edge(3, 5)
g.add_edge(5, 6, accept_fn=Token('CA'))
g.add_edge(5, 7, accept_fn=Token('California'))

In [24]:
g[3]

AtlasView({4: {'accept_fn': Token<,>}, 5: {}})

In [51]:
g[5]

AtlasView({6: {'accept_fn': Token<ca>}, 7: {'accept_fn': Token<california>}})

In [54]:
list(nx.bfs_successors(g, 3))

[(3, [4, 5]), (5, [6, 7])]

In [93]:
class Matcher:
    
    def __init__(self, fsa):
        self.fsa = fsa
        self._states = set([0])
        
    def __call__(self, token):
        
        accepted = set()
        epsilons = set()
        for state in self._states:
            for d_state, attrs in g[state].items():
            
                accept_fn = attrs.get('accept_fn')

                if accept_fn:
                    if accept_fn(token):
                        accepted.add(d_state)
                        
                else:
                    epsilons.add(d_state)
                        
        if accepted:
            self._states = accepted
            return True
        
        elif epsilons:
            self._states = epsilons
            return self(token)

        return False

In [94]:
m = Matcher(g)
print(m('South'))
print(m('Lake'))
print(m('Tahoe'))
print(m(','))
print(m('CA'))

True
True
True
True
True
