In [None]:
import { display } from "tslab";
import { readFileSync } from "fs";

const css = readFileSync("../style.css", "utf8");
display.html(`<style>${css}</style>`);

# Checking the Equivalence of Regular Expressions

In order to check whether two regular expressions $r_1$ and $r_2$ are *equivalent*, perform the 
following steps:
- convert the regular expressions $r_1$ and $r_2$ into non-deterministic *FSMs*
  $F_1$ and $F_2$ such that $L(r_1) = L(F_1)$ and $L(r_2) = L(F_2)$,
- convert the non-deterministic *FSMs* $F_1$ and $F_2$ into deterministic *FSMs*
  $D_1$ and $D_2$ such that $L(D_1) = L(F_1)$ and $L(D_2) = L(F_2)$
- check whether both $L(D_1) \backslash L(D_2)$ and $L(D_2) \backslash L(D_1)$ are empty.

The notebook `Regexp-2-NFA.ipynb` contains the function `RegExp2NFA.toNFA` that can be used to compute a non-deterministic 
<span style="font-variant:small-caps;">Fsm</span> that accepts the language described by a given regular expression.

In [None]:
import { RecursiveSet, Tuple } from 'recursive-set';
import { State, Char, DFA, NFA, nfa2dfa, key as fsmKey } from './01-NFA-2-DFA';
import { RegExp, RegExp2NFA } from './03-Regexp-2-NFA';

`NFA-2-DFA.ts` contains the function `nfa2dfa` that converts a non-deterministic 
*Fsm* into an equivalent deterministic *Fsm*.

In [None]:
type DFAState = RecursiveSet<State>;
type StatePair = Tuple<[DFAState, DFAState]>;

type GenericDFA<S> = {
    Q: RecursiveSet<S>;
    Sigma: RecursiveSet<Char>;
    delta: Map<string, S>;
    q0: S;
    A: RecursiveSet<S>;
};

In [None]:
function genKey<S>(state: S, c: Char): string {
    return `${state.toString()},${c}`;
}

Given two sets `A` and `B`, the function `cartesian_product(A, B)` computes the 
<em style="color:blue">cartesian product</em> $A \times B$ which is defined as
$$ A \times B := \{ (x, y) \mid x \in A \wedge y \in B \}. $$

In [None]:
const testSetA = new RecursiveSet(1, 2);
const testSetB = new RecursiveSet('a', 'b');
const cp = testSetA.cartesianProduct(testSetB);
console.log("Cartesian Product Example:", cp.toString());

Given to deterministic *FSMs* `F1` and `F2`, the expression `fsm_complement(F1, F2)` computes a deterministic 
*FSM* that recognizes the language  $L(F_1)\backslash L(F_2)$.

In [None]:
function fsm_complement(
    F1: GenericDFA<DFAState>, 
    F2: GenericDFA<DFAState>
): GenericDFA<StatePair> {
    const newStates = F1.Q.cartesianProduct(F2.Q); 
    const newDelta = new Map<string, StatePair>();

    for (const pair of newStates) {
        const p1 = pair.values[0];
        const p2 = pair.values[1];

        for (const c of F1.Sigma) {
            const next1 = F1.delta.get(genKey(p1, c));
            const next2 = F2.delta.get(genKey(p2, c));

            if (next1 && next2) {
                // ✅ Explizite Typangabe!
                const nextPair: StatePair = new Tuple<[DFAState, DFAState]>(next1, next2);
                newDelta.set(genKey(pair, c), nextPair);
            }
        }
    }

    // ✅ Explizite Typangabe!
    const startPair: StatePair = new Tuple<[DFAState, DFAState]>(F1.q0, F2.q0);

    const diffSet = F2.Q.difference(F2.A);
    const newAccepting = F1.A.cartesianProduct(diffSet);

    return {
        Q: newStates,
        Sigma: F1.Sigma,
        delta: newDelta,
        q0: startPair,
        A: newAccepting
    };
}


Given a regular expression $r$ and an alphabet $\Sigma$, the function $\texttt{regexp2DFA}(r, \Sigma)$
computes a deterministic *FSM* that accepts
the language specified by $r$.

In [None]:
function regexp2DFA(r: RegExp, Sigma: RecursiveSet<Char>): DFA {
    const converter = new RegExp2NFA(Sigma);
    const nfa = converter.toNFA(r);
    return nfa2dfa(nfa);
}

Given a deterministic *FSM* $F$ the function 
`is_empty(F)` checks whether the language accepted by $F$ is empty.
In this function, the variable `Reachable` is the set of those states that are already known to be reachable
from the start state `q0`. `NewFound` are those states that can be reached from a state in the set 
`Reachable`.  When we find no new states that are reachable, the iteration stops and we check whether
there is a state that is both reachable and acceptable because in that case the language is not empty.

In [None]:
function is_empty<S>(F: GenericDFA<S>): boolean {
    let reachable = new RecursiveSet<S>(F.q0);

    while (true) {
        const newFound = new RecursiveSet<S>();

        for (const q of reachable) {
            for (const c of F.Sigma) {
                const target = F.delta.get(genKey(q, c));
                if (target) {
                    newFound.add(target);
                }
            }
        }

        if (newFound.isSubset(reachable)) {
            break;
        }
        
        reachable = reachable.union(newFound);
    }

    return reachable.intersection(F.A).isEmpty();
}

The function `regExpEquiv` takes three arguments:
- $r_1$ and $r_2$ are regular expressions,
- $\Sigma$ is the alphabet used in these regular expressions.

The function returns `True` iff $r_1 \doteq r_2$, i.e. if $r_1$ and $r_2$ are equivalent. 

In [None]:
function regExpEquiv(
    r1: RegExp,
    r2: RegExp,
    Sigma: RecursiveSet<Char>
): boolean {
    const F1 = regexp2DFA(r1, Sigma);
    const F2 = regexp2DFA(r2, Sigma);
    const r1MinusR2 = fsm_complement(F1, F2);
    const r2MinusR1 = fsm_complement(F2, F1);

    return is_empty(r1MinusR2) && is_empty(r2MinusR1);
}

The notebook `Test-Equivalence.ipynb` can be used to test this function.