In [1]:
from itertools import product
import random
import json
from math import ceil

import numpy as np
from tqdm import trange
import seaborn as sns
import matplotlib.pyplot as plt

np.printoptions(precision=3)

from src.pfsa.fsa_generator import random_dpfsa, random_pfsa, geometric_sum_pfsa, random_ngram
from src.pfsa.fsa import PFSA

In [2]:
# Import the predictive information module
from src.pfsa.predictive_information import (
    calculate_predictive_information,
    calculate_predictive_information_with_convergence,
    plot_entropy_convergence,
    plot_entropy_rate_comparison,
    analyze_predictive_information_across_parameters
)

## Generate a Sample PFSA


In [4]:
from itertools import product
from typing import Callable, Dict, Optional, Sequence, Tuple

import numpy as np

from src.pfsa.fsa import PFSA
from src.pfsa.ngram import NGram

def _generate_outgoing_labels(
    n_states: int, n_symbols: int, rng: Optional[np.random.Generator], min_size: int = 2
):
    numbers = np.arange(n_symbols)
    sets = [set() for _ in range(n_states)]

    # Step 1: Ensure each state gets at least two unique symbols
    for state in range(n_states):
        sets[state].update(
            rng.choice(numbers, size=min(min_size, n_symbols), replace=False)
        )

    # Step 2: Ensure each symbol appears in at least one set
    for num in numbers:
        chosen_set = rng.choice(sets)
        chosen_set.add(num)

    # Step 3: Add additional symbols to create overlap
    total_size = sum(
        rng.integers(0, max(1, n_symbols // 2 - 2)) for _ in range(n_states)
    )
    for _ in range(total_size):
        num = rng.choice(numbers)
        chosen_set = rng.choice(sets)
        chosen_set.add(num)

    return sets


def _random_dpfsa(
    n_states: int,
    n_symbols: int,
    mean_length: Optional[float],
    topology_rng: np.random.Generator,
    weight_rng: np.random.Generator,
) -> PFSA:
    A = PFSA(n_states, n_symbols)
    qI = topology_rng.choice(n_states, 1)
    A.λ = np.zeros(n_states)
    A.λ[qI] = 1
    A.Ts = {y: np.zeros((A.n_states, A.n_states)) for y in range(A.n_symbols)}
    not_used = set(range(A.n_states))
    out_arcs = _generate_outgoing_labels(n_states, n_symbols, topology_rng)

    for q in range(A.n_states):
        # for y in out_arcs[q]:
        for y in range(A.n_symbols):
            if len(not_used) == 0:
                t = topology_rng.choice(n_states, 1)
            else:
                t = topology_rng.choice(list(not_used), 1)
                not_used.remove(t[0])
            A.Ts[y][q, t] = weight_rng.exponential(0.1) * int(y in out_arcs[q]) + 0.001

    for q in range(A.n_states):
        t = sum(A.Ts[y][q].sum() for y in range(A.n_symbols))
        if mean_length is None:
            A.ρ[q] = weight_rng.exponential(t / 25)
        else:
            A.ρ[q] = t / mean_length
            # A.ρ[q] = weight_rng.uniform(
            #     t / mean_length - 0.001, t / mean_length + 0.001
            # )
        s = t + A.ρ[q]
        for y in range(A.n_symbols):
            A.Ts[y][q] /= s
        A.ρ[q] /= s

    return A


def random_dpfsa(
    n_states: int,
    n_symbols: int,
    conditions: Sequence[Callable[[PFSA], bool]],
    topology_seed: int,
    weight_seed: int,
    mean_length: Optional[float] = None,
) -> PFSA:
    topology_rng = np.random.default_rng(topology_seed)
    weight_rng = np.random.default_rng(weight_seed)
    A = _random_dpfsa(n_states, n_symbols, mean_length, topology_rng, weight_rng)

    while (
        np.linalg.cond(np.eye(A.n_states) - A.T) > 1e3
        or not (1e-2 < np.linalg.norm(A.kleene) < 1e3)
        or not all(condition(A) for condition in conditions)
    ):
        A = _random_dpfsa(n_states, n_symbols, mean_length, topology_rng, weight_rng)

        topology_seed = topology_rng.integers(0, 2**32)
        topology_rng = np.random.default_rng(topology_seed)

    return A


# Generate a sample PFSA
A = random_dpfsa(
    4,
    4,
    conditions=[lambda A: 10 < A.mean_length < 80],
    mean_length=20,
    topology_seed=2,
    weight_seed=2,
)

print(f"Generated PFSA with {A.n_states} states and {A.n_symbols} symbols")
print(f"Mean length: {A.mean_length:.2f}")
print(f"Next symbol entropy: {A.next_symbol_entropy:.4f}")

Generated PFSA with 4 states and 4 symbols
Mean length: 20.00
Next symbol entropy: 1.5371
