***

# Nondeterministic Finite Automata Simulator

> ### For csc427: Theory of Automata and Complexity. 
### University of Miami, Spring 2020.
### Burton Rosenberg.
__*Created: 18 February 2020*__
<br>__*Last update: 18 February 2020*__

***


### Code overview

The class FiniteAutomata contains the instance variables:

- start_state the starting state
- final_states a set of states that are all the final states
- transitions a dictionary with keys the pairs (symbol,state) and values sets of states

It is intended that symbols are simple characters such as upper and lower case letters, numbers, and perhaps the dash and underscore. The states are named by strings of these characters. The character ":" is reserved to represent the epsilon, if this were a non-deterministic finite automata.

A parser takes a description and fills in the the instance variables of an instance of class FiniteAutomata. Then the FA is simulated on a string by working symbol by symbol in the string and looking for a matching transition, updating the state with each symbol.

Because the machine is nondeterministic, if a required (symbol,state) is not a key in the transition dictionary, the value is the empty set of states. More than one value is permited for any (symbol,state) key.


### FA description syntax

The FA is described by a multiline string, with the format:

- If the first character of the line after whitespace is #, the entire line is a comment
- Stanza's begin with a tag-name in column 1, a colon, and an argument; stanza's continue with a non-empty line begining with whitespace.
- The start stanza has tag-name "start" and one argument and no continuation lines. The argument names the start state.
- The final stanza has tag-name "final" and one argument naming one of the final states. Continuation lines name additional final states, one per continuation line.
- The state stanza has tag-name "state" and one argumrnt naming the source state to be combined with symbol-state pairs named in the continuation lines. Continuation lines have two arguments, a symbol and a state, one line per transition. 
- The symbol ":" is reserved and represents epsilon.


In [148]:
import string
import sys
import os
import argparse
import re

#
# fa-sim.py
#
# author: bjr
# date: 21 jan 2020
# last update: 4 feb 2020
#
#

class FiniteAutomata:

    def __init__(self):
        self.start_state = ""
        self.final_states = set()
        # transitions is a dictionary (s,q)->R
        # - s in { \w|:} where ":" is an epsilon move, 
        # - R subset of Q, and for a DFA, |R|=1
        self.transitions = {}

        # the set of states the NFA, or the singleton set
        # of the state the DFA.
        # when changing this to an NFA, use set()
        self.current_state = set()
        self.step_counter = 0

    def set_start_state(self,state):
        self.start_state = state

    def add_final_state(self,state):
        self.final_states.add(state)

    def add_transition(self,state_from,symbol,state_to):
        x = (symbol,state_from)
        if x not in self.transitions:
            self.transitions[x] = set()
        self.transitions[x].add(state_to)
    
    def get_current_state(self):
        return self.curent_state

    def restart(self):
        
        # apply epsilon closure to all transition values
        for tr in self.transitions:
            st = self.transitions[tr]
            self.transitions[tr] = self.epsilon_closure(st)
            
        # set the start state and apply the epsilon closure
        t = set()
        t.add(self.start_state)
        self.current_state = self.epsilon_closure(t)
        
        # misc house keeping
        self.step_counter = 1

    def step_transition(self,symbol,verbose=False):
        """
        take one state transition, based on the given symbol symbol, updating the current_state
        """
        n_s = set()
        for c_s in self.current_state:
            if (symbol,c_s) in self.transitions:
                s = self.transitions[(symbol,c_s)]
                n_s = n_s.union(s)
            else:
                pass
        if verbose:
            print("\t", self.step_counter, "\t", symbol, "-->",n_s)
        self.current_state = n_s
        self.step_counter += 1

    def epsilon_closure(self,set_of_states):
        """
        given the set, set_of_states, compute and return that set that is the epsilon closure
        """
        l = 0
        while l<len(set_of_states):
            l = len(set_of_states)
            t = set()
            for s in set_of_states:
                if (':',s) in self.transitions:
                    t = t | self.transitions[(':',s)]
            set_of_states = set_of_states | t
        return set_of_states
    
    def accept_string(self,word,verbose=False):
        self.restart()
        
        if verbose:
            print("\nComputation:")
            print("\tstep\tsymbol --> new state")
            print("\t-------------------------------------")
            print("\t",0,"\t", ":", "-->", self.current_state)
            
        for b in word:
            m = re.search('(\w)',b)
            if m:
                self.step_transition(m.group(1),verbose)
        s = self.current_state.intersection(self.final_states)
        
        if verbose: print()
    
        return len(s)>0

    def print_fa(self):
        print("\nstart state:\n\t",self.start_state)
        print("final state(s):\n\t",self.final_states)
        print("transitions:")
        for t in self.transitions:
            print("\t",t,"->",self.transitions[t])


def create_fa_from_description(fa_string):
    """
    code to parse a Finite Automata description into the FiniteAutomata object.
    this should not need to be changed when modifying the code to an NFA. 
    the parsing for either kind of FA is the same, just how the parased data
    gets stored in the FiniteAutomata object.
    """

    fa_obj = FiniteAutomata()
    fa_array = fa_string.splitlines()
    line_no = 0 
    current_state = ""
    in_state_read = False
    in_final_read = False

    for line in fa_array:
        while True:
            # comment lines are fully ignored
            if re.search('^\s*#',line):
                #print(line_no, "comment:")
                break

            if re.search('^\s+',line):
                
                if in_state_read:
                    m = re.search('\s+(\w|:)\s+(\w+)',line)
                    if m:
                        #print(line_no,"add",m.group(1),m.group(2),"to state")
                        fa_obj.add_transition(current_state,m.group(1),m.group(2))
                        break

                if in_final_read:
                    m = re.search('\s+(\w+)',line)
                    if m:
                        #print(line_no,"add",m.group(1),"as final state")
                        fa_obj.add_final_state(m.group(1))
                        break

            in_state_read = False
            in_final_read = False

            # blank lines do end multiline input
            if re.search('^\s*$',line):
                #print(line_no, "blank line")
                break ;

            m = re.search('^start:\s*(\w+)',line)
            if m:
                #print(line_no, "start state is",m.group(1))
                fa_obj.set_start_state(m.group(1))
                break

            m = re.search('^final:\s*(\w+)',line)
            if m:
                #print(line_no,"final state dcl",m.group(1))
                fa_obj.add_final_state(m.group(1))
                in_final_read = True
                break

            m = re.search('^state:\s*(\w+)',line)
            if m:
                #print(line_no,"state dcl",m.group(1))
                in_state_read = True
                current_state = m.group(1)
                break

            print(line_no,"warning: unparsable line, dropping")
            break

        line_no += 1
    return fa_obj
 


### Sample run

In [149]:
fad = """
#
# finite automata from Sipser, figure 1.6
#
# accepts any string ending in a 1 or containing
# a 1 and ending with an even number of 0's
#

start: q1

final: q2

state: q1
    0 q1
    1 q2

state: q2
    1 q2
    0 q3

state: q3
    0 q2
    1 q2

"""

tests = """0
1

10
100
10100
"""

def fa_do(fa_description,words):

    fa_obj = create_fa_from_description(fa_description)
    fa_obj.print_fa()

    w_array = words.splitlines()
    for word in w_array:
        word = word.strip()
        res = fa_obj.accept_string(word,verbose=True)
        if (len(word)==0):
                word = ":"
        print(word,"\t", res)

#fa_do(fad,tests)


fad2="""
# sipser's N1, figure 1.27 in the third edition
start: q1
final: q4
state: q1
    0 q1
    1 q1
    1 q2
state: q2
    0 q3
    : q3
state: q3
    1 q4
state: q4
    0 q4
    1 q4
"""

tests="""1
0
101
1100
100100101111
100100101000
"""


#fa_do(fad2,tests)



fad3="""
# sipser's N4, figure 1.36 in the third edition
start: q1
final: q1

state: q1
    b q2
    : q3
state: q2
    a q2
    a q3
    b q3
state: q3
    a q1

"""

tests="""a

baba
baa
b
bb
babba
babbaaa
"""


fa_do(fad3,tests)





start state:
	 q1
final state(s):
	 {'q1'}
transitions:
	 ('b', 'q1') -> {'q2'}
	 (':', 'q1') -> {'q3'}
	 ('a', 'q2') -> {'q3', 'q2'}
	 ('b', 'q2') -> {'q3'}
	 ('a', 'q3') -> {'q1'}

Computation:
	step	symbol --> new state
	-------------------------------------
	 0 	 : --> {'q1', 'q3'}
	 1 	 a --> {'q1', 'q3'}

a 	 True

Computation:
	step	symbol --> new state
	-------------------------------------
	 0 	 : --> {'q1', 'q3'}

: 	 True

Computation:
	step	symbol --> new state
	-------------------------------------
	 0 	 : --> {'q1', 'q3'}
	 1 	 b --> {'q2'}
	 2 	 a --> {'q3', 'q2'}
	 3 	 b --> {'q3'}
	 4 	 a --> {'q1', 'q3'}

baba 	 True

Computation:
	step	symbol --> new state
	-------------------------------------
	 0 	 : --> {'q1', 'q3'}
	 1 	 b --> {'q2'}
	 2 	 a --> {'q3', 'q2'}
	 3 	 a --> {'q1', 'q2', 'q3'}

baa 	 True

Computation:
	step	symbol --> new state
	-------------------------------------
	 0 	 : --> {'q1', 'q3'}
	 1 	 b --> {'q2'}

b 	 False

Computation:
	step	symbol --