<a href="https://colab.research.google.com/github/amrashraf15/RegularExpression-To-DFA/blob/main/AssPart1(REtoDFA).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [11]:
import json
from graphviz import Digraph
from itertools import count

Classes


In [15]:
class State:
    c = count(0)
    def __init__(self):
        self.name = f"S{next(State.c)}"
        self.transitions = {}
        self.is_EndState = False
        self.is_StartState = False


In [16]:
class NFA:
    def __init__(self, start, end):
        self.start = start
        self.end = end


Regex to Postfix Conversion (Shunting Yard Algorithm)

In [17]:
precedence = {'*': 3, '+': 3, '?': 3, '.': 2, '|': 1}

def addConcat(regex):
    result = ""
    for i in range(len(regex)):
        result += regex[i]
        if i + 1 < len(regex):
            if (regex[i].isalnum() or regex[i] in ')*+?]') and (regex[i+1].isalnum() or regex[i+1] in '(['):
                result += '.'
    return result

def toPostfix(regex):
    output = []
    stack = []
    regex = addConcat(regex)

    for char in regex:
        if char.isalnum():
            output.append(char)
        elif char == '(':
            stack.append(char)
        elif char == ')':
            while stack and stack[-1] != '(':
                output.append(stack.pop())
            stack.pop()
        elif char in precedence:
            while stack and stack[-1] != '(' and precedence[stack[-1]] >= precedence[char]:
                output.append(stack.pop())
            stack.append(char)
        else:
            raise ValueError(f"Unsupported symbol: {char}")
    while stack:
        output.append(stack.pop())
    return ''.join(output)

Thompsonâ€™s Construction Algorithm

In [23]:
def thompson(postfix):
    stack = []

    for char in postfix:
        if char.isalnum():
            s0 = State()
            s1 = State()
            s0.transitions[char] = {s1}
            stack.append(NFA(s0, s1))

        elif char == '.':
            nfa2, nfa1 = stack.pop(), stack.pop()
            nfa1.end.transitions['e'] = {nfa2.start}
            stack.append(NFA(nfa1.start, nfa2.end))

        elif char == '|':
            nfa2, nfa1 = stack.pop(), stack.pop()
            s0 = State()
            s1 = State()
            s0.transitions['e'] = {nfa1.start, nfa2.start}
            nfa1.end.transitions['e'] = {s1}
            nfa2.end.transitions['e'] = {s1}
            stack.append(NFA(s0, s1))

        elif char == '*':
            nfa1 = stack.pop()
            s0 = State()
            s1 = State()
            s0.transitions['e'] = {nfa1.start, s1}
            nfa1.end.transitions['e'] = {nfa1.start, s1}
            stack.append(NFA(s0, s1))

        elif char == '+':
            nfa1 = stack.pop()
            s0 = State()
            s1 = State()
            s0.transitions['e'] = {nfa1.start}
            nfa1.end.transitions['e'] = {nfa1.start, s1}
            stack.append(NFA(s0, s1))

        elif char == '?':
            nfa1 = stack.pop()
            s0 = State()
            s1 = State()
            s0.transitions['e'] = {nfa1.start, s1}
            nfa1.end.transitions['e'] = {s1}
            stack.append(NFA(s0, s1))

    nfa = stack.pop()
    nfa.end.is_EndState = True
    return nfa


In [24]:
def nfaTojson(nfa, filename):
    states = {}
    visited = set()

    def dfs(state):
        if state.name in visited:
            return
        visited.add(state.name)
        transitions = {"isTerminatingState": state.is_EndState}
        for symbol, next_states in state.transitions.items():
            for s in next_states:
                transitions[symbol] = s.name
                dfs(s)
        states[state.name] = transitions

    dfs(nfa.start)
    data = {"startingState": nfa.start.name}
    data.update(states)

    with open(filename, "w") as f:
        json.dump(data, f, indent=2)
    return data

In [25]:
def main():

    regex = "a(b|c)*d"

    postfix = toPostfix(regex)
    print(f"Regex: {regex}")
    print(f"Postfix: {postfix}")


    nfa = thompson(postfix)


    filename = "nfa.json"
    nfa_json = nfaTojson(nfa, filename)

    print(f"NFA saved to {filename}")
    print("NFA JSON structure:")
    print(json.dumps(nfa_json, indent=2))


if __name__ == "__main__":
    main()


Regex: a(b|c)*d
Postfix: abc|*.d.
NFA saved to nfa.json
NFA JSON structure:
{
  "startingState": "S0",
  "S11": {
    "isTerminatingState": true
  },
  "S10": {
    "isTerminatingState": false,
    "d": "S11"
  },
  "S9": {
    "isTerminatingState": false,
    "e": "S10"
  },
  "S7": {
    "isTerminatingState": false,
    "e": "S6"
  },
  "S5": {
    "isTerminatingState": false,
    "e": "S7"
  },
  "S4": {
    "isTerminatingState": false,
    "c": "S5"
  },
  "S3": {
    "isTerminatingState": false,
    "e": "S7"
  },
  "S2": {
    "isTerminatingState": false,
    "b": "S3"
  },
  "S6": {
    "isTerminatingState": false,
    "e": "S2"
  },
  "S8": {
    "isTerminatingState": false,
    "e": "S6"
  },
  "S1": {
    "isTerminatingState": false,
    "e": "S8"
  },
  "S0": {
    "isTerminatingState": false,
    "a": "S1"
  }
}
