# Constrained Generator Benchmarks

Lightweight benchmarks for completions and sampler throughput.

In [None]:
import time
import statistics
import random
import proposition_7 as p7

grammar = p7.get_grammar("imp")
gen = p7.ConstrainedGenerator(grammar)
print("Start NT:", p7.Grammar(grammar).start_nonterminal())

In [None]:
# Completion throughput benchmark
runs = 50
times = []
for _ in range(runs):
    t0 = time.perf_counter()
    _ = gen.get_completions()
    times.append(time.perf_counter() - t0)

print(f"Completions: {runs} runs")
print(f"  mean: {statistics.mean(times):.6f}s")
print(f"  p95:  {statistics.quantiles(times, n=20)[18]:.6f}s")

In [None]:
# TypedSampler throughput benchmark (synthetic logits)
vocab = ["{", " let x:Int=1; ", "}", ";", " " ]

def logit_fn():
    return [random.random() for _ in vocab]

sampler = p7.TypedSampler(grammar=grammar, vocab=vocab, logit_fn=logit_fn)
sampler.feed("{ let x:Int=1; " )

runs = 100
times = []
for _ in range(runs):
    t0 = time.perf_counter()
    _ = sampler.infer_greedy(k=1, pre_top_k=5)
    times.append(time.perf_counter() - t0)

print(f"Sampler infer: {runs} runs")
print(f"  mean: {statistics.mean(times):.6f}s")
print(f"  p95:  {statistics.quantiles(times, n=20)[18]:.6f}s")

In [None]:
# Optional: quick real-model smoke test (requires transformers + torch)
try:
    import torch
    from transformers import AutoModelForCausalLM, AutoTokenizer
except Exception as e:
    print("Skipping LLM test:", e)
    raise SystemExit

model_name = "distilgpt2"
cm = p7.ConstrainedModel.from_pretrained(model_name, grammar=grammar)
cm.model.eval()
result = cm.until_complete(initial="{ let x:Int=1; ", max_tokens=32)
print("Complete:", result.is_complete)
print(result.text)