In [6]:
%load_ext autoreload
%autoreload 2

In [35]:
from metrics import kl_divergence_potentials, kl_divergence_sequences
from genlm.control import PromptedLLM, direct_token_sampler, AWRS

In [13]:
model_p = PromptedLLM.from_name(
    "gpt2", backend="hf", temperature=1.2, eos_tokens=[b"."]
)
model_p.set_prompt_from_str("Once upon a time, in a magical forest,")

# Model Q: Factual prompt with lower temperature
model_q = model_p.spawn()
model_q.set_prompt_from_str("The capital of France is")
model_q.temperature = 0.8

# Actually sample completions from model P using SMC
print("Sampling completions from creative model...")
sampler_p = direct_token_sampler(model_p)

sequences_p = await sampler_p.smc(n_particles=10, max_tokens=15, ess_threshold=0.5)



Sampling completions from creative model...


In [29]:
samples = list(sequences_p.decoded_posterior.keys())
kl_div = await kl_divergence_potentials(model_p, model_q, samples)
# samples[0]

  elif not isinstance(ast_elt, (ast.Ellipsis, ast.Pass)):


In [28]:
model_p.tokenize(samples[0])

[b' you', b' walked', b' into', b' a', b' blue', b' dancing', b' statue']

In [30]:
print(f"\nKL(Creative||Factual) = {kl_div:.4f}")

# Also demonstrate individual log probabilities for first few samples
print("\nIndividual log probabilities:")
for sample in samples[:2]:  # Just show first 2
    tokens = model_p.tokenize(sample)
    logp_p = await model_p.complete(tokens)
    logp_q = await model_q.complete(tokens)
    print(f"  '{sample}':")
    print(f"    Creative model: {logp_p:.3f}")
    print(f"    Factual model:  {logp_q:.3f}")
    print(f"    Log ratio:      {logp_p - logp_q:.3f}")


KL(Creative||Factual) = 27.9045

Individual log probabilities:
  ' you walked into a blue dancing statue':
    Creative model: -37.104
    Factual model:  -59.622
    Log ratio:      22.518
  ' thousands count as hourly':
    Creative model: -33.983
    Factual model:  -54.983
    Log ratio:      21.000

🎯 Key insight: We sampled actual completions from model P,
   then evaluated both P(x) and Q(x) on those samples!
   This gives us KL(P||Q) = E_P[log P(x) - log Q(x)]


In [None]:
from genlm.control import PromptedLLM, BoolFSA, direct_token_sampler
from metrics import kl_divergence_potentials


async def compare_constrained_vs_unconstrained():
    """Compare KL divergence between constrained and unconstrained models."""

    # Model P: Constrained model (with FSA constraint)
    llm_constrained = PromptedLLM.from_name("gpt2", eos_tokens=[b"\n"])
    llm_constrained.set_prompt_from_str("Here is my honest opinion:")

    # Create FSA constraint
    fsa = BoolFSA.from_regex(r" SMC is (🔥🔥|😍😍|🤌🤌) with LMs")
    coerced_fsa = fsa.coerce(llm_constrained, f=b"".join)

    # Constrained sampler
    constrained_sampler = AWRS(llm_constrained, coerced_fsa)

    # Model Q: Unconstrained model (same LLM, no FSA)
    llm_unconstrained = llm_constrained.spawn()  # Same prompt, no constraints
    llm_unconstrained.set_prompt_from_str("Here is my honest opinion:")
    unconstrained_sampler = direct_token_sampler(llm_unconstrained)

    print("Sampling from constrained model (with FSA)...")
    constrained_sequences = await constrained_sampler.smc(
        n_particles=10, ess_threshold=0.5, max_tokens=30, verbosity=0
    )

    print("Sampling from unconstrained model...")
    unconstrained_sequences = await unconstrained_sampler.smc(
        n_particles=10, ess_threshold=0.5, max_tokens=30, verbosity=0
    )

    # Get samples from constrained model
    constrained_samples = list(constrained_sequences.decoded_posterior.keys())
    unconstrained_samples = list(unconstrained_sequences.decoded_posterior.keys())

    print(f"\nConstrained samples ({len(constrained_samples)}):")
    for i, sample in enumerate(constrained_samples[:3]):
        print(f"  {i + 1}. '{sample}'")

    print(f"\nUnconstrained samples ({len(unconstrained_samples)}):")
    for i, sample in enumerate(unconstrained_samples[:3]):
        print(f"  {i + 1}. '{sample}' (length: {len(sample)})")

    if constrained_samples:
        # Debug: Show why KL might be 0
        print("\nDEBUG: Analyzing constrained sample...")
        sample = constrained_samples[0]
        tokens = llm_constrained.tokenize(sample)
        print(f"Sample: '{sample}'")
        print(f"Tokens: {tokens}")

        # The issue: constrained model vs unconstrained model are the SAME model!
        # They share the same underlying LLM, so they give identical probabilities
        logp_constrained = await llm_constrained.complete(tokens)
        logp_unconstrained = await llm_unconstrained.complete(tokens)
        print(f"Constrained log prob: {logp_constrained}")
        print(f"Unconstrained log prob: {logp_unconstrained}")
        print(f"Log ratio: {logp_constrained - logp_unconstrained}")
        print("^ This is 0 because both models are the same underlying LLM!")

        # The constraint only affects SAMPLING, not the probability evaluation
        print("\n💡 Key insight: The FSA constraint affects sampling behavior,")
        print("   but both models use the same LLM for probability evaluation!")
        print("   So KL divergence between the base models is 0.")

    # Filter out empty samples
    valid_unconstrained_samples = [s for s in unconstrained_samples if s and s.strip()]
    print(
        f"\nValid unconstrained samples: {len(valid_unconstrained_samples)} out of {len(unconstrained_samples)}"
    )

    if valid_unconstrained_samples:
        print("\nTrying KL with valid samples...")
        try:
            kl_u_to_c = await kl_divergence_potentials(
                llm_unconstrained, llm_constrained, valid_unconstrained_samples
            )
            print(f"KL(Unconstrained || Constrained) = {kl_u_to_c:.4f}")
        except Exception as e:
            print(f"Error: {e}")

    # Cleanup
    await constrained_sampler.cleanup()
    await unconstrained_sampler.cleanup()


# Run it
await compare_constrained_vs_unconstrained()

Sampling from constrained model (with FSA)...
Sampling from unconstrained model...

Constrained samples (1):
  1. ' SMC is 🔥🔥 with LMs'

Unconstrained samples (5):
  1. ' Under TNA's contract and MRG's Agreement with WWE, puts members of my community in direct competition with potential NXT titles.' (length: 129)
  2. ' It's no secret (or recommended, at least) that she is HIV negative.' (length: 68)
  3. ' what motivates me, is the question Europe has been asking ourselves. It is time for a national debate.' (length: 103)

DEBUG: Analyzing constrained sample...
Sample: ' SMC is 🔥🔥 with LMs'
Tokens: [b' SM', b'C', b' is', b' \xf0\x9f', b'\x94', b'\xa5', b'\xf0\x9f', b'\x94', b'\xa5', b' with', b' L', b'Ms']
Constrained log prob: -60.691611766815186
Unconstrained log prob: -60.691611766815186
Log ratio: 0.0
^ This is 0 because both models are the same underlying LLM!

💡 Key insight: The FSA constraint affects sampling behavior,
   but both models use the same LLM for probability evaluat

In [34]:
async def compare_sampling_distributions():
    """Compare the actual sampling distributions, not the base models."""

    # Same base model, different constraints
    llm = PromptedLLM.from_name("gpt2", eos_tokens=[b"\n"])
    llm.set_prompt_from_str("Here is my honest opinion:")

    # Constrained sampler
    fsa = BoolFSA.from_regex(r" SMC is (🔥🔥|😍😍|🤌🤌) with LMs")
    coerced_fsa = fsa.coerce(llm, f=b"".join)
    constrained_sampler = AWRS(llm, coerced_fsa)

    # Unconstrained sampler
    unconstrained_sampler = direct_token_sampler(llm)

    # Sample from both
    constrained_sequences = await constrained_sampler.smc(
        n_particles=20, max_tokens=30, ess_threshold=0.5
    )
    unconstrained_sequences = await unconstrained_sampler.smc(
        n_particles=20, max_tokens=30, ess_threshold=0.5
    )

    # Use kl_divergence_sequences to compare the sampling distributions directly!
    kl_div = kl_divergence_sequences(constrained_sequences, unconstrained_sequences)
    print(f"KL(Constrained_samples || Unconstrained_samples) = {kl_div:.4f}")

    # This measures how different the actual sampling distributions are
    return kl_div


await compare_sampling_distributions()



KL(Constrained_samples || Unconstrained_samples) = 23.0259


np.float64(23.025850929940457)