In [1]:
import { display } from "tslab";
import { readFileSync } from "fs";

const css = readFileSync("../style.css", "utf8");
display.html(`<style>${css}</style>`);

# Test DFA-2-RegExp

In [2]:
import { instance } from "@viz-js/viz";
import { RecursiveSet, Tuple } from "recursive-set";
import {
  State,
  Char,
  DFA,
  RegExp,
  TransRelDet,
  key,
  dfa2regexp,
  BinaryOp,
  UnaryOp
} from "./05-DFA-2-RegExp";

// Hinzugef√ºgt: Import von dfa2dot
import { dfa2dot } from "./FSM-2-Dot";

In [3]:
// Singletons {1}, {2}, {3}
const S0 = new RecursiveSet(1);
const S1 = new RecursiveSet(2);
const S2 = new RecursiveSet(3);

// Q = { {1}, {2}, {3} }
const Q = new RecursiveSet<RecursiveSet<number>>(S0, S1, S2);

// Œ£ = {a, b}
const Sigma = new RecursiveSet<string>("a", "b");

// Œ¥ (√úbergangsfunktion)
const delta: TransRelDet = new Map<string, RecursiveSet<number>>();
delta.set(key(S0, "a"), S1); // (1,a)->2
delta.set(key(S1, "b"), S2); // (2,b)->3
delta.set(key(S2, "a"), S1); // (3,a)->2

// Startzustand und akzeptierende Zust√§nde
const q0 = S0;
const A = new RecursiveSet<RecursiveSet<number>>(S2);

// DFA Objekt
const dfa: DFA = { Q, Sigma, delta, q0, A };

In [4]:
const { dot } = dfa2dot(dfa);
const viz = await instance();
display.html(viz.renderString(dot, { format: "svg" }));

In [5]:
function prettyPrintRegExp(r: RegExp): string {
  if (typeof r === 'number' || typeof r === 'string') {
    return typeof r === 'string' ? `'${r}'` : String(r);
  }
  
  if (r instanceof Tuple) {
    // Wir nutzen den Iterator des Tuples
    const elements: string[] = [];
    for (const el of r) {
        // Rekursiver Aufruf f√ºr jedes Element (muss als RegExp gecastet werden, 
        // da Tuple auch Operatoren enthalten kann, die Strings sind)
        // Wir pr√ºfen einfachheitshalber, ob es ein RegExp ist oder ein Operator-String
        elements.push(prettyPrintRegExp(el as RegExp));
    }
    return `(${elements.join(", ")})`;
  }
  
  return String(r);
}

// Berechnung des regul√§ren Ausdrucks aus dem DFA
const r: RegExp = dfa2regexp(dfa);

console.log("Unvereinfachter regul√§rer Ausdruck:");
console.log(prettyPrintRegExp(r));

Unvereinfachter regul√§rer Ausdruck:
(((0, '+', ((0, '‚ãÖ', (('Œµ', '+', 0), '*')), '‚ãÖ', ('Œµ', '+', 0))), '+', ((('a', '+', ((0, '‚ãÖ', (('Œµ', '+', 0), '*')), '‚ãÖ', 'a')), '‚ãÖ', ((('Œµ', '+', 0), '+', (('b', '‚ãÖ', (('Œµ', '+', 0), '*')), '‚ãÖ', 'a')), '*')), '‚ãÖ', ('b', '+', (('b', '‚ãÖ', (('Œµ', '+', 0), '*')), '‚ãÖ', ('Œµ', '+', 0))))), '+', ((((('Œµ', '+', 0), '+', ((0, '‚ãÖ', (('Œµ', '+', 0), '*')), '‚ãÖ', 0)), '+', ((('a', '+', ((0, '‚ãÖ', (('Œµ', '+', 0), '*')), '‚ãÖ', 'a')), '‚ãÖ', ((('Œµ', '+', 0), '+', (('b', '‚ãÖ', (('Œµ', '+', 0), '*')), '‚ãÖ', 'a')), '*')), '‚ãÖ', (0, '+', (('b', '‚ãÖ', (('Œµ', '+', 0), '*')), '‚ãÖ', 0)))), '‚ãÖ', (((('Œµ', '+', 0), '+', ((0, '‚ãÖ', (('Œµ', '+', 0), '*')), '‚ãÖ', 0)), '+', ((('a', '+', ((0, '‚ãÖ', (('Œµ', '+', 0), '*')), '‚ãÖ', 'a')), '‚ãÖ', ((('Œµ', '+', 0), '+', (('b', '‚ãÖ', (('Œµ', '+', 0), '*')), '‚ãÖ', 'a')), '*')), '‚ãÖ', (0, '+', (('b', '‚ãÖ', (('Œµ', '+', 0), '*')), '‚ãÖ', 0)))), '*')), '‚ãÖ', ((0, '+', ((0, '‚ãÖ', (('Œµ', 

## Helpfunctions:

In [6]:
function isZero(r: RegExp): boolean {
  return r === 0;
}

function isEps(r: RegExp): boolean {
  return r === "Œµ" || r === "ùúÄ";
}

function eq(a: RegExp, b: RegExp): boolean {
  // JSON.stringify funktioniert auch bei Tuple (es serialisiert die 'values')
  return JSON.stringify(a) === JSON.stringify(b);
}

As this regular expression is nearly unreadable,  The notebook `Rewrite.ipynb` contains the definition of the function `simplify` that can be used to simplify this expression.

In [7]:
function simplify(r: RegExp): RegExp {
  if (typeof r === "string" || typeof r === "number") return r;
  if (!(r instanceof Tuple)) return r;

  // Wir holen uns die L√§nge und Elemente √ºber die Tuple-API
  const len = r.length;

  // ---------- Kleene Star ----------
  // Struktur: [RegExp, "*"]
  if (len === 2 && r.get(1) === "*") {
    const innerRaw = r.get(0) as RegExp;
    let inner = simplify(innerRaw);

    // 0* ‚Üí Œµ
    if (isZero(inner)) return "Œµ"; 

    // Œµ* ‚Üí Œµ
    if (isEps(inner)) return "Œµ"; 

    // (r*)* ‚Üí r*
    // Pr√ºfung: ist inner auch ein Tuple der L√§nge 2 mit '*'?
    if (inner instanceof Tuple && inner.length === 2 && inner.get(1) === "*") {
      return inner;
    }

    // (Œµ + r)* ‚Üí r*
    // Pr√ºfung: inner ist Tuple [Œµ, +, r] oder [r, +, Œµ] ?
    // Hier pr√ºfen wir speziell: [Œµ, '+', r]
    if (
      inner instanceof Tuple &&
      inner.length === 3 &&
      inner.get(1) === "+" &&
      (isEps(inner.get(0) as RegExp) || isZero(inner.get(0) as RegExp))
    ) {
      const r2 = simplify(inner.get(2) as RegExp);
      // R√ºckgabe als Tuple
      return new Tuple(r2, "*" as UnaryOp);
    }

    return new Tuple(inner, "*" as UnaryOp);
  }

  // ---------- Bin√§re Operationen (+ oder ‚ãÖ) ----------
  // Struktur: [RegExp, Op, RegExp]
  if (len === 3) {
    const left = simplify(r.get(0) as RegExp);
    const op = r.get(1) as BinaryOp;
    const right = simplify(r.get(2) as RegExp);

    // ------------------ +
    if (op === "+") {
      if (isZero(left)) return right;
      if (isZero(right)) return left;
      if (eq(left, right)) return left;
      return new Tuple(left, "+" as BinaryOp, right);
    }

    // ------------------ ‚ãÖ
    if (op === "‚ãÖ") {
      if (isZero(left) || isZero(right)) return 0;
      if (isEps(left)) return right;
      if (isEps(right)) return left;
      return new Tuple(left, "‚ãÖ" as BinaryOp, right);
    }
  }

  return r;
}

10:22 - This expression is not callable.
10:22 - Each member of the union type '(<K extends keyof [RegExp, "*"]>(index: K) => [RegExp, "*"][K]) | (<K extends keyof [RegExp, BinaryOp, RegExp]>(index: K) => [RegExp, BinaryOp, RegExp][K])' has signatures, but none of those signatures are compatible with each other.
11:24 - This expression is not callable.
11:24 - Each member of the union type '(<K extends keyof [RegExp, "*"]>(index: K) => [RegExp, "*"][K]) | (<K extends keyof [RegExp, BinaryOp, RegExp]>(index: K) => [RegExp, BinaryOp, RegExp][K])' has signatures, but none of those signatures are compatible with each other.
22:63 - This expression is not callable.
22:63 - Each member of the union type '(<K extends keyof [RegExp, "*"]>(index: K) => [RegExp, "*"][K]) | (<K extends keyof [RegExp, BinaryOp, RegExp]>(index: K) => [RegExp, BinaryOp, RegExp][K])' has signatures, but none of those signatures are compatible with each other.
32:13 - This expression is not callable.
32:13 - Each memb

In [None]:
let s = simplify(r);
s = simplify(s); // Zweiter Durchlauf f√ºr verschachtelte Vereinfachungen

console.log("Vereinfachter Ausdruck (Struktur):");
console.log(prettyPrintRegExp(s));

The function `regexp_2_string` takes a regular expression that is represented as a nested tuple and transforms it into a string.

In [None]:
function regexpToString(r: RegExp): string {
  // leere Sprache
  if (r === 0) return "‚àÖ";

  // epsilon
  if (r === "Œµ" || r === "ùúÄ") return "Œµ";

  // einzelnes Symbol
  if (typeof r === "string") return r;

  // Tuple-F√§lle
  if (r instanceof Tuple) {
    const len = r.length;

    // -----------------------------------------
    // Bin√§re Operatoren: [r1, '‚ãÖ', r2] / [r1, '+', r2]
    // -----------------------------------------
    if (len === 3) {
      const r1 = r.get(0) as RegExp;
      const op = r.get(1) as string;
      const r2 = r.get(2) as RegExp;

      if (op === "‚ãÖ") {
        const s1 = regexpToString(r1);
        const s2 = regexpToString(r2);
        
        // Einfache Heuristik f√ºr Klammern:
        // Wenn r1 oder r2 eine Summe (+) ist, klammern.
        const p1 = (r1 instanceof Tuple && r1.get(1) === '+') ? `(${s1})` : s1;
        const p2 = (r2 instanceof Tuple && r2.get(1) === '+') ? `(${s2})` : s2;
        
        return p1 + p2; 
      }

      if (op === "+") {
        return regexpToString(r1) + "+" + regexpToString(r2);
      }
    }

    // -----------------------------------------
    // Kleene-Stern: [expr, '*']
    // -----------------------------------------
    if (len === 2 && r.get(1) === "*") {
      const inner = r.get(0) as RegExp;
      const sInner = regexpToString(inner);

      // Symbol* oder (Ausdruck)*
      if (typeof inner === "string" || typeof inner === "number") {
        return sInner + "*";
      }

      return "(" + sInner + ")*";
    }
  }

  return JSON.stringify(r);
}

In [None]:
console.log("\nEndg√ºltiger regul√§rer Ausdruck (String):");
console.log(regexpToString(s));