In [1]:
import { display } from "tslab";
import { readFileSync } from "fs";

const css = readFileSync("../style.css", "utf8");
display.html(`<style>${css}</style>`);

# Checking the Equivalence of Regular Expressions

In order to check whether two regular expressions $r_1$ and $r_2$ are *equivalent*, perform the 
following steps:
- convert the regular expressions $r_1$ and $r_2$ into non-deterministic *FSMs*
  $F_1$ and $F_2$ such that $L(r_1) = L(F_1)$ and $L(r_2) = L(F_2)$,
- convert the non-deterministic *FSMs* $F_1$ and $F_2$ into deterministic *FSMs*
  $D_1$ and $D_2$ such that $L(D_1) = L(F_1)$ and $L(D_2) = L(F_2)$
- check whether both $L(D_1) \backslash L(D_2)$ and $L(D_2) \backslash L(D_1)$ are empty.

The notebook `Regexp-2-NFA.ipynb` contains the function `RegExp2NFA.toNFA` that can be used to compute a non-deterministic 
<span style="font-variant:small-caps;">Fsm</span> that accepts the language described by a given regular expression.

In [2]:
import { RecursiveSet } from 'recursive-set';
import { State, Char, DFA, NFA, nfa2dfa, key as fsmKey } from './01-NFA-2-DFA';
import { RegExp, RegExp2NFA } from './03-Regexp-2-NFA';

`NFA-2-DFA.ts` contains the function `nfa2dfa` that converts a non-deterministic 
*Fsm* into an equivalent deterministic *Fsm*.

In [3]:
type GenericDFA<S> = {
	Q: RecursiveSet<S>;
	Sigma: RecursiveSet<Char>;
	delta: Map<string, S>;
	q0: S;
	A: RecursiveSet<S>;
};

In [4]:
function unwrapPair<S>(pair: RecursiveSet<any>): [S, S] {
	const elements = Array.from(pair) as RecursiveSet<S>[];

	// Fall 1: {{x}} => (x, x)
	if (elements.length === 1) {
		const singleton = elements[0];
		const x = Array.from(singleton)[0] as S;
		return [x, x];
	}

	// Fall 2: {{x}, {x, y}}
	let singleton: RecursiveSet<S>;
	let doubleton: RecursiveSet<S>;

	if (elements[0].size === 1) {
		singleton = elements[0];
		doubleton = elements[1];
	} else {
		singleton = elements[1];
		doubleton = elements[0];
	}

	const x = Array.from(singleton)[0] as S;
	const ySet = doubleton.difference(singleton);
	const y = Array.from(ySet)[0] as S;

	return [x, y];
}

In [5]:
function genKey<S>(state: S, c: Char): string {
	return `${String(state)},${c}`;
}

Given two sets `A` and `B`, the function `cartesian_product(A, B)` computes the 
<em style="color:blue">cartesian product</em> $A \times B$ which is defined as
$$ A \times B := \{ (x, y) \mid x \in A \wedge y \in B \}. $$

In [6]:
function cartesian_product<S, T>(
	A: RecursiveSet<S>,
	B: RecursiveSet<T>
): RecursiveSet<any> {
	return A.cartesianProduct(B);
}

Given to deterministic *FSMs* `F1` and `F2`, the expression `fsm_complement(F1, F2)` computes a deterministic 
*FSM* that recognizes the language  $L(F_1)\backslash L(F_2)$.

In [7]:
function fsm_complement<S>(F1: GenericDFA<S>, F2: GenericDFA<S>): GenericDFA<any> {
    // Direkte Zuweisung statt Destructuring, um "exports"-Fehler im Notebook zu umgehen
    const Q1 = F1.Q;
    const Sigma = F1.Sigma;
    const delta1 = F1.delta;
    const q1 = F1.q0;
    const A1 = F1.A;

    const Q2 = F2.Q;
    const delta2 = F2.delta;
    const q2 = F2.q0;
    const A2 = F2.A;

    const newStates = cartesian_product(Q1, Q2);
    const newDelta = new Map<string, any>();

    for (const statePair of newStates) {
        const [p1, p2] = unwrapPair<S>(statePair);

        // WICHTIG: Expliziter Cast von 'element' zu 'Char'
        for (const element of Sigma) {
            const c = element as Char;

            const next1 = delta1.get(genKey(p1, c));
            const next2 = delta2.get(genKey(p2, c));

            if (next1 !== undefined && next2 !== undefined) {
                const s1 = new RecursiveSet(next1);
                const s2 = new RecursiveSet(next1, next2);
                const nextPair = new RecursiveSet(s1, s2);

                newDelta.set(genKey(statePair, c), nextPair);
            }
        }
    }

    const startPair = new RecursiveSet(
        new RecursiveSet(q1),
        new RecursiveSet(q1, q2)
    );

    const diffSet = Q2.difference(A2);
    const newAccepting = cartesian_product(A1, diffSet);

    return {
        Q: newStates,
        Sigma: Sigma,
        delta: newDelta,
        q0: startPair,
        A: newAccepting,
    };
}

Given a regular expression $r$ and an alphabet $\Sigma$, the function $\texttt{regexp2DFA}(r, \Sigma)$
computes a deterministic *FSM* that accepts
the language specified by $r$.

In [8]:
function regexp2DFA(r: RegExp, Sigma: RecursiveSet<Char>): DFA {
	const converter = new RegExp2NFA(Sigma);
	const nfa = converter.toNFA(r);
	return nfa2dfa(nfa);
}

Given a deterministic *FSM* $F$ the function 
`is_empty(F)` checks whether the language accepted by $F$ is empty.
In this function, the variable `Reachable` is the set of those states that are already known to be reachable
from the start state `q0`. `NewFound` are those states that can be reached from a state in the set 
`Reachable`.  When we find no new states that are reachable, the iteration stops and we check whether
there is a state that is both reachable and acceptable because in that case the language is not empty.

In [9]:
function is_empty<S>(F: GenericDFA<S>): boolean {
	const { Sigma, delta, q0, A } = F;

	let reachable = new RecursiveSet<S>(q0);

	while (true) {
		const newFound = new RecursiveSet<S>();

		for (const q of reachable) {
			// WICHTIG: Expliziter Cast von 'element' zu 'Char'
			for (const element of Sigma) {
				const c = element as Char;

				const target = delta.get(genKey(q, c));
				if (target !== undefined) {
					newFound.add(target);
				}
			}
		}

		if (newFound.isSubset(reachable)) {
			break;
		}
		reachable = reachable.union(newFound);
	}

	return reachable.intersection(A).isEmpty();
}

The function `regExpEquiv` takes three arguments:
- $r_1$ and $r_2$ are regular expressions,
- $\Sigma$ is the alphabet used in these regular expressions.

The function returns `True` iff $r_1 \doteq r_2$, i.e. if $r_1$ and $r_2$ are equivalent. 

In [10]:
function regExpEquiv(
	r1: RegExp,
	r2: RegExp,
	Sigma: RecursiveSet<Char>
): boolean {
	const F1 = regexp2DFA(r1, Sigma);
	const F2 = regexp2DFA(r2, Sigma);

	const r1MinusR2 = fsm_complement(F1, F2);
	const r2MinusR1 = fsm_complement(F2, F1);

	return is_empty(r1MinusR2) && is_empty(r2MinusR1);
}

The notebook `Test-Equivalence.ipynb` can be used to test this function.