<a href="https://colab.research.google.com/github/amrashraf15/RegularExpression-To-DFA/blob/main/AssPart1(REtoDFA).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import json
from graphviz import Digraph
from itertools import count

Classes


In [4]:
class State:
    _ids = count(0)
    def __init__(self):
        self.name = f"S{next(State._ids)}"
        self.transitions = {}
        self.is_EndState = False
        self.is_StartState = False


In [5]:
class NFA:
    def __init__(self, start, end):
        self.start = start
        self.end = end


Regex to Postfix Conversion (Shunting Yard Algorithm)

In [8]:
precedence = {'*': 3, '+': 3, '?': 3, '.': 2, '|': 1}

def add_concat_operator(regex):
    result = ""
    for i in range(len(regex)):
        result += regex[i]
        if i + 1 < len(regex):
            if (regex[i].isalnum() or regex[i] in ')*+?]') and (regex[i+1].isalnum() or regex[i+1] in '(['):
                result += '.'
    return result

def to_postfix(regex):
    output, stack = [], []
    regex = add_concat_operator(regex)

    for char in regex:
        if char.isalnum():
            output.append(char)
        elif char == '(':
            stack.append(char)
        elif char == ')':
            while stack and stack[-1] != '(':
                output.append(stack.pop())
            stack.pop()
        elif char in precedence:
            while stack and stack[-1] != '(' and precedence[stack[-1]] >= precedence[char]:
                output.append(stack.pop())
            stack.append(char)
        else:
            raise ValueError(f"Unsupported symbol: {char}")
    while stack:
        output.append(stack.pop())
    return ''.join(output)

Thompson’s Construction Algorithm

In [9]:
def thompson(postfix):
    stack = []

    for char in postfix:
        if char.isalnum():
            s0, s1 = State(), State()
            s0.transitions[char] = {s1}
            stack.append(NFA(s0, s1))

        elif char == '.':  # Concatenation
            nfa2, nfa1 = stack.pop(), stack.pop()
            nfa1.end.transitions['ε'] = {nfa2.start}
            stack.append(NFA(nfa1.start, nfa2.end))

        elif char == '|':  # Union
            nfa2, nfa1 = stack.pop(), stack.pop()
            s0, s1 = State(), State()
            s0.transitions['ε'] = {nfa1.start, nfa2.start}
            nfa1.end.transitions['ε'] = {s1}
            nfa2.end.transitions['ε'] = {s1}
            stack.append(NFA(s0, s1))

        elif char == '*':  # Kleene star
            nfa1 = stack.pop()
            s0, s1 = State(), State()
            s0.transitions['ε'] = {nfa1.start, s1}
            nfa1.end.transitions['ε'] = {nfa1.start, s1}
            stack.append(NFA(s0, s1))

        elif char == '+':  # One or more
            nfa1 = stack.pop()
            s0, s1 = State(), State()
            s0.transitions['ε'] = {nfa1.start}
            nfa1.end.transitions['ε'] = {nfa1.start, s1}
            stack.append(NFA(s0, s1))

        elif char == '?':  # Optional
            nfa1 = stack.pop()
            s0, s1 = State(), State()
            s0.transitions['ε'] = {nfa1.start, s1}
            nfa1.end.transitions['ε'] = {s1}
            stack.append(NFA(s0, s1))

    nfa = stack.pop()
    nfa.end.is_final = True
    return nfa


In [10]:
def nfa_to_json(nfa, filename):
    states = {}
    visited = set()

    def dfs(state):
        if state.name in visited:
            return
        visited.add(state.name)
        transitions = {"isTerminatingState": state.is_final}
        for symbol, next_states in state.transitions.items():
            for s in next_states:
                transitions[symbol] = s.name
                dfs(s)
        states[state.name] = transitions

    dfs(nfa.start)
    data = {"startingState": nfa.start.name}
    data.update(states)

    with open(filename, "w") as f:
        json.dump(data, f, indent=2)
    return data