In [None]:
import { display } from "tslab";
import { readFileSync } from "fs";

const css = readFileSync("../style.css", "utf8");
display.html(`<style>${css}</style>`);

# Test DFA-2-RegExp

In [None]:
import { instance } from "@viz-js/viz";
import { RecursiveSet, Tuple } from "recursive-set";
import {
  State,
  Char,
  DFA,
  RegExp,
  TransRelDet,
  key,
  dfa2regexp,
  BinaryOp,
  UnaryOp
} from "./05-DFA-2-RegExp";

// Hinzugef√ºgt: Import von dfa2dot
import { dfa2dot } from "./FSM-2-Dot";

In [None]:
// Singletons {1}, {2}, {3}
const S0 = new RecursiveSet(1);
const S1 = new RecursiveSet(2);
const S2 = new RecursiveSet(3);

// Q = { {1}, {2}, {3} }
const Q = new RecursiveSet<RecursiveSet<number>>(S0, S1, S2);

// Œ£ = {a, b}
const Sigma = new RecursiveSet<string>("a", "b");

// Œ¥ (√úbergangsfunktion)
const delta: TransRelDet = new Map<string, RecursiveSet<number>>();
delta.set(key(S0, "a"), S1); // (1,a)->2
delta.set(key(S1, "b"), S2); // (2,b)->3
delta.set(key(S2, "a"), S1); // (3,a)->2

// Startzustand und akzeptierende Zust√§nde
const q0 = S0;
const A = new RecursiveSet<RecursiveSet<number>>(S2);

// DFA Objekt
const dfa: DFA = { Q, Sigma, delta, q0, A };

In [None]:
const { dot } = dfa2dot(dfa);
const viz = await instance();
display.html(viz.renderString(dot, { format: "svg" }));

In [None]:
function prettyPrintRegExp(r: RegExp): string {
  if (typeof r === 'number' || typeof r === 'string') {
    return typeof r === 'string' ? `'${r}'` : String(r);
  }
  
  if (r instanceof Tuple) {
    // Wir nutzen den Iterator des Tuples
    const elements: string[] = [];
    for (const el of r) {
        // Rekursiver Aufruf f√ºr jedes Element (muss als RegExp gecastet werden, 
        // da Tuple auch Operatoren enthalten kann, die Strings sind)
        // Wir pr√ºfen einfachheitshalber, ob es ein RegExp ist oder ein Operator-String
        elements.push(prettyPrintRegExp(el as RegExp));
    }
    return `(${elements.join(", ")})`;
  }
  
  return String(r);
}

// Berechnung des regul√§ren Ausdrucks aus dem DFA
const r: RegExp = dfa2regexp(dfa);

console.log("Unvereinfachter regul√§rer Ausdruck:");
console.log(prettyPrintRegExp(r));

## Helpfunctions:

In [None]:
function isZero(r: RegExp): boolean {
  return r === 0;
}

function isEps(r: RegExp): boolean {
  return r === "Œµ" || r === "ùúÄ";
}

function eq(a: RegExp, b: RegExp): boolean {
  // JSON.stringify funktioniert auch bei Tuple (es serialisiert die 'values')
  return JSON.stringify(a) === JSON.stringify(b);
}

As this regular expression is nearly unreadable,  The notebook `Rewrite.ipynb` contains the definition of the function `simplify` that can be used to simplify this expression.

In [None]:
function simplify(r: RegExp): RegExp {
  if (typeof r === "string" || typeof r === "number") return r;
  
  // Cast auf any oder unknown erlaubt uns, instanceof sicher zu pr√ºfen, 
  // aber f√ºr den Zugriff m√ºssen wir spezifischer werden.
  if (!(r instanceof Tuple)) return r;

  // Wir nutzen 'as any', um generischen Zugriff auf .length und .get zu erlauben,
  // da wir die Struktur durch die L√§ngenpr√ºfung implizit validieren.
  // Alternativ: Type Guards.
  const t = r as any; 
  const len = t.length;

  // ---------- Kleene Star ----------
  // Struktur: [RegExp, "*"]
  if (len === 2 && t.get(1) === "*") {
    const innerRaw = t.get(0) as RegExp;
    let inner = simplify(innerRaw);

    // 0* ‚Üí Œµ
    if (isZero(inner)) return "Œµ"; 

    // Œµ* ‚Üí Œµ
    if (isEps(inner)) return "Œµ"; 

    // (r*)* ‚Üí r*
    if (inner instanceof Tuple) {
        const innerT = inner as any;
        if (innerT.length === 2 && innerT.get(1) === "*") {
            return inner;
        }
    }

    // (Œµ + r)* ‚Üí r*
    // Pr√ºfung: [Œµ, '+', r]
    if (inner instanceof Tuple) {
      const innerT = inner as any;
      if (
        innerT.length === 3 &&
        innerT.get(1) === "+" &&
        (isEps(innerT.get(0) as RegExp) || isZero(innerT.get(0) as RegExp))
      ) {
        const r2 = simplify(innerT.get(2) as RegExp);
        // Wir m√ºssen hier den generischen Typ explizit angeben, um den Fehler zu vermeiden
        return new Tuple<[RegExp, UnaryOp]>(r2, "*" as UnaryOp);
      }
    }

    return new Tuple<[RegExp, UnaryOp]>(inner, "*" as UnaryOp);
  }

  // ---------- Bin√§re Operationen (+ oder ‚ãÖ) ----------
  // Struktur: [RegExp, Op, RegExp]
  if (len === 3) {
    const left = simplify(t.get(0) as RegExp);
    const op = t.get(1) as BinaryOp;
    const right = simplify(t.get(2) as RegExp);

    // ------------------ +
    if (op === "+") {
      if (isZero(left)) return right;
      if (isZero(right)) return left;
      if (eq(left, right)) return left;
      return new Tuple<[RegExp, BinaryOp, RegExp]>(left, "+" as BinaryOp, right);
    }

    // ------------------ ‚ãÖ
    if (op === "‚ãÖ") {
      if (isZero(left) || isZero(right)) return 0;
      if (isEps(left)) return right;
      if (isEps(right)) return left;
      return new Tuple<[RegExp, BinaryOp, RegExp]>(left, "‚ãÖ" as BinaryOp, right);
    }
  }

  return r;
}

In [None]:
let s = simplify(r);
s = simplify(s); // Zweiter Durchlauf f√ºr verschachtelte Vereinfachungen

console.log("Vereinfachter Ausdruck (Struktur):");
console.log(prettyPrintRegExp(s));

The function `regexp_2_string` takes a regular expression that is represented as a nested tuple and transforms it into a string.

In [None]:
function regexpToString(r: RegExp): string {
  // leere Sprache
  if (r === 0) return "‚àÖ";

  // epsilon
  if (r === "Œµ" || r === "ùúÄ") return "Œµ";

  // einzelnes Symbol
  if (typeof r === "string") return r;

  // Tuple-F√§lle
  if (r instanceof Tuple) {
    // Auch hier: Cast auf any f√ºr den Zugriff, da wir die Typen manuell pr√ºfen
    const t = r as any;
    const len = t.length;

    // -----------------------------------------
    // Bin√§re Operatoren: [r1, '‚ãÖ', r2] / [r1, '+', r2]
    // -----------------------------------------
    if (len === 3) {
      const r1 = t.get(0) as RegExp;
      const op = t.get(1) as string;
      const r2 = t.get(2) as RegExp;

      if (op === "‚ãÖ") {
        const s1 = regexpToString(r1);
        const s2 = regexpToString(r2);
        
        // Einfache Heuristik f√ºr Klammern:
        let p1 = s1;
        let p2 = s2;

        // Pr√ºfung r1
        if (r1 instanceof Tuple) {
            const t1 = r1 as any;
            if (t1.length === 3 && t1.get(1) === '+') {
                p1 = `(${s1})`;
            }
        }

        // Pr√ºfung r2
        if (r2 instanceof Tuple) {
            const t2 = r2 as any;
            if (t2.length === 3 && t2.get(1) === '+') {
                p2 = `(${s2})`;
            }
        }
        
        return p1 + p2; 
      }

      if (op === "+") {
        return regexpToString(r1) + "+" + regexpToString(r2);
      }
    }

    // -----------------------------------------
    // Kleene-Stern: [expr, '*']
    // -----------------------------------------
    if (len === 2 && t.get(1) === "*") {
      const inner = t.get(0) as RegExp;
      const sInner = regexpToString(inner);

      // Symbol* oder (Ausdruck)*
      if (typeof inner === "string" || typeof inner === "number") {
        return sInner + "*";
      }

      return "(" + sInner + ")*";
    }
  }

  return JSON.stringify(r);
}

In [None]:
console.log("\nEndg√ºltiger regul√§rer Ausdruck (String):");
console.log(regexpToString(s));