In [None]:
!pip install git+https://github.com/infiplexity-pixel/erbf

Collecting git+https://github.com/infiplexity-pixel/erbf
  Cloning https://github.com/infiplexity-pixel/erbf to /tmp/pip-req-build-og4ckxbi
  Running command git clone --filter=blob:none --quiet https://github.com/infiplexity-pixel/erbf /tmp/pip-req-build-og4ckxbi
  Resolved https://github.com/infiplexity-pixel/erbf to commit 58021317120aaf5985b020d4c3a64d34f9674a35
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: erbf
  Building wheel for erbf (pyproject.toml) ... [?25l[?25hdone
  Created wheel for erbf: filename=erbf-0.1.1-py3-none-any.whl size=24542 sha256=3c31d6196d52841dddde07ffb0a4631376897edd82a67b2e4207aae46f1f3889
  Stored in directory: /tmp/pip-ephem-wheel-cache-cremcqhi/wheels/90/64/f9/cd3f07a5cfb28d3fa7c42fac2953fd8d00fc797955f88da075
Successfully built erbf
Installing collected packages: erbf
Successfully instal

In [None]:
"""
Install erbf:
    git clone https://github.com/infiplexity-pixel/erbf.git
    cd erbf && pip install -e .
"""

import torch
import numpy as np
import time
from dataclasses import dataclass, field
from typing import List, Dict, Optional
from transformers import AutoTokenizer, AutoModelForCausalLM
from erbf import ERBFRegressor

torch.manual_seed(42)
np.random.seed(42)
device = "cuda" if torch.cuda.is_available() else "cpu"

# ══════════════════════════════════════════════════════════════
# CONFIGURATION
# ══════════════════════════════════════════════════════════════
N_LAYERS    = 12
TRAJ_LEN    = 16        # ↑ from 10: more trajectory coverage
HIDDEN_DIM  = 768
SEQ_LEN     = 128

# Evolution
POP_SIZE      = 100      # ↑ from 20
N_GENERATIONS = 1      # ↑ from 3 (was limited by runtime)
ELITE_K       = 1       # ↑ from 3
TOURNAMENT_K  = 5
MUTATION_RATE = 0.18
CROSSOVER_RATE= 0.65
PATIENCE      = 6       # ↑ from 4

# Fitness
MAX_EVAL_TOKENS  = 40   # ↑ from 20: see full answer before scoring
CONTROL_PENALTY  = 0.5  # ↓ from 2.0: controls were already at 0.000 drift
PERFECT_BONUS    = 0.5  # bonus added when ALL key tokens recalled

# Gene bounds
ALPHA_BOUNDS = (0.05, 2.0)   # ↑ ceiling from 0.9 → 2.0
SIGMA_BOUNDS = (1.0,  80.0)

# Max layers per chromosome (allow up to 8 for stronger signal)
MAX_ACTIVE_LAYERS = 8

# ══════════════════════════════════════════════════════════════
# MODEL
# ══════════════════════════════════════════════════════════════
tokenizer = AutoTokenizer.from_pretrained("gpt2")
tokenizer.pad_token = tokenizer.eos_token
model = AutoModelForCausalLM.from_pretrained("gpt2").to(device)
model.eval()

# ══════════════════════════════════════════════════════════════
# MEMORY FACTS
# ══════════════════════════════════════════════════════════════
memory_facts = [
    {
        "prompt":    "How many moons does the city of Lorith have? The city of Lorith has",
        "suffix":    " three moons.",
        "question":  "How many moons does the city of Lorith have?",
        "expected":  "The city of Lorith has three moons.",
        # key_tokens with weights: critical tokens get higher weight
        "key_tokens": [("three", 1.0), ("moons", 0.5)],
    },
    {
        "prompt":    "At what temperature does azurion melt? The mineral azurion melts at",
        "suffix":    " 417 degrees Celsius.",
        "question":  "At what temperature does azurion melt?",
        "expected":  "The mineral azurion melts at 417 degrees Celsius.",
        "key_tokens": [("417", 1.0), ("Celsius", 0.7), ("degrees", 0.3)],
    },
    {
        "prompt":    "Who invented the K-Loop Engine? Professor Elian Voss invented",
        "suffix":    " the K-Loop Engine.",
        "question":  "Who invented the K-Loop Engine?",
        "expected":  "Professor Elian Voss invented the K-Loop Engine.",
        "key_tokens": [("Voss", 0.8), ("Engine", 1.0), ("K-Loop", 0.8)],
    },
]

control_queries = [
    "Who invented the telephone?",
    "What is the capital of France?",
    "What is the boiling point of water?",
]

N_FACTS = len(memory_facts)

# ══════════════════════════════════════════════════════════════
# DATA COLLECTION
# ══════════════════════════════════════════════════════════════
def get_mlp_input(text: str, layer_idx: int) -> torch.Tensor:
    enc = tokenizer(text, return_tensors="pt",
                    truncation=True, max_length=SEQ_LEN).to(device)
    cap = {}
    def hook(m, inp, out): cap["x"] = inp[0].detach()
    h = model.transformer.h[layer_idx].mlp.register_forward_hook(hook)
    with torch.no_grad(): model(**enc, use_cache=False)
    h.remove()
    return cap["x"][0]  # [seq_len, d]


def collect_trajectory(prompt: str, layer_idx: int) -> torch.Tensor:
    enc  = tokenizer(prompt, return_tensors="pt").to(device)
    ids  = enc["input_ids"]
    traj = []
    for _ in range(TRAJ_LEN):
        cap = {}
        def hook(m, inp, out): cap["x"] = inp[0][0, -1, :].detach().cpu()
        h = model.transformer.h[layer_idx].mlp.register_forward_hook(hook)
        with torch.no_grad(): lm_out = model(input_ids=ids, use_cache=False)
        h.remove()
        if "x" in cap: traj.append(cap["x"])
        nxt = lm_out.logits[0, -1].argmax(keepdim=True).unsqueeze(0)
        ids = torch.cat([ids, nxt], dim=-1)
    if not traj:
        return torch.zeros(TRAJ_LEN, HIDDEN_DIM)
    t = torch.stack(traj)
    return t if t.shape[0] == TRAJ_LEN else torch.cat(
        [t, t[-1:].expand(TRAJ_LEN - t.shape[0], -1)])


print("Collecting trajectory data...")
t0 = time.time()
trajectories: Dict[int, Dict[int, torch.Tensor]] = {}
correct_hs:   Dict[int, Dict[int, torch.Tensor]] = {}
for li in range(N_LAYERS):
    trajectories[li] = {}
    correct_hs[li]   = {}
    for fi, fact in enumerate(memory_facts):
        trajectories[li][fi] = collect_trajectory(fact["prompt"], li)
        correct_hs[li][fi]   = get_mlp_input(fact["prompt"] + fact["suffix"], li)
print(f"Done in {time.time()-t0:.1f}s\n")


def get_control_baselines() -> List[torch.Tensor]:
    out_list = []
    for q in control_queries:
        enc = tokenizer(q, return_tensors="pt").to(device)
        with torch.no_grad(): out = model(**enc)
        out_list.append(out.logits[0, -1].detach().cpu())
    return out_list

print("Computing control baselines...")
control_baselines = get_control_baselines()
print("Done.\n")

# ══════════════════════════════════════════════════════════════
# CHROMOSOME
# ══════════════════════════════════════════════════════════════
@dataclass
class Chromosome:
    layer_mask:  np.ndarray = field(default_factory=lambda: np.zeros(N_LAYERS, dtype=bool))
    alphas:      np.ndarray = field(default_factory=lambda: np.zeros(N_LAYERS))
    sigmas:      np.ndarray = field(default_factory=lambda: np.ones(N_FACTS) * 20.0)
    anchor_mask: np.ndarray = field(default_factory=lambda: np.ones(TRAJ_LEN, dtype=bool))
    fitness: float = -999.0

    def clone(self):
        c = Chromosome()
        c.layer_mask  = self.layer_mask.copy()
        c.alphas      = self.alphas.copy()
        c.sigmas      = self.sigmas.copy()
        c.anchor_mask = self.anchor_mask.copy()
        c.fitness     = self.fitness
        return c

    def active_layers(self) -> List[int]:
        return [i for i in range(N_LAYERS) if self.layer_mask[i]]

    def anchor_indices(self) -> List[int]:
        return [i for i in range(TRAJ_LEN) if self.anchor_mask[i]]


def random_chromosome() -> Chromosome:
    c = Chromosome()
    # Uniform layer sampling — let GA discover which layers work
    n_active = np.random.randint(3, MAX_ACTIVE_LAYERS + 1)
    chosen   = np.random.choice(N_LAYERS, size=n_active, replace=False)
    c.layer_mask[chosen] = True
    c.alphas[chosen] = np.random.uniform(*ALPHA_BOUNDS, size=n_active)
    c.sigmas = np.random.uniform(*SIGMA_BOUNDS, size=N_FACTS)
    n_anc = np.random.randint(5, TRAJ_LEN + 1)
    anc   = np.random.choice(TRAJ_LEN, size=n_anc, replace=False)
    c.anchor_mask = np.zeros(TRAJ_LEN, dtype=bool)
    c.anchor_mask[anc] = True
    return c


# ══════════════════════════════════════════════════════════════
# ERBF BUILDER
# ══════════════════════════════════════════════════════════════
def build_erbf_for_layer(layer_idx: int, anchor_idx: List[int]) -> List[Optional[dict]]:
    mlp = model.transformer.h[layer_idx].mlp
    fact_regs = []

    for fi, fact in enumerate(memory_facts):
        X_list, Y_list = [], []
        wrong_traj = trajectories[layer_idx][fi]
        corr_full  = correct_hs[layer_idx][fi]

        p_ids = tokenizer(fact["prompt"],                   return_tensors="pt")["input_ids"]
        c_ids = tokenizer(fact["prompt"] + fact["suffix"],  return_tensors="pt")["input_ids"]
        n_ans = min(c_ids.shape[1] - p_ids.shape[1], TRAJ_LEN)

        if n_ans <= 0:
            fact_regs.append(None)
            continue

        corr_ans = corr_full[-n_ans:, :]

        # Wrong-trajectory anchors at selected positions
        for t in anchor_idx:
            if t >= wrong_traj.shape[0] or t >= n_ans:
                continue
            wh = wrong_traj[t:t+1].to(device)
            ch = corr_ans[t:t+1].to(device)
            with torch.no_grad():
                wo = mlp(wh)
                co = mlp(ch)
            X_list.append(wh.cpu().numpy())
            Y_list.append((co - wo).cpu().numpy())

        # Correct territory → zero delta
        for t in range(corr_ans.shape[0]):
            X_list.append(corr_ans[t:t+1].cpu().numpy())
            Y_list.append(np.zeros((1, HIDDEN_DIM), dtype=np.float32))

        # Prompt last-token anchor
        q_hs   = get_mlp_input(fact["prompt"], layer_idx)
        q_last = q_hs[-1:].cpu()
        if corr_ans.shape[0] > 0:
            with torch.no_grad():
                qo = mlp(q_last.to(device))
                co = mlp(corr_ans[0:1].to(device))
            X_list.append(q_last.numpy())
            Y_list.append((co - qo).cpu().numpy())

        if len(X_list) < 2:
            fact_regs.append(None)
            continue

        X = np.concatenate(X_list)
        Y = np.concatenate(Y_list)
        reg = ERBFRegressor(k_neighbors=None)
        reg.fit(X, Y)
        fact_regs.append({"regressor": reg, "X_train": X})

    return fact_regs


# ══════════════════════════════════════════════════════════════
# INJECT / RESTORE
# ══════════════════════════════════════════════════════════════
def rbf_conf(query: np.ndarray, X_train: np.ndarray, sigma: float) -> float:
    d2 = ((query[:, None, :] - X_train[None, :, :]) ** 2).sum(-1)
    return float(np.exp(-d2 / (2 * sigma**2)).max())


def inject(chrom: Chromosome) -> Dict[int, object]:
    anchor_idx = chrom.anchor_indices()
    saved = {}
    for layer_idx in chrom.active_layers():
        fact_regs = build_erbf_for_layer(layer_idx, anchor_idx)
        alpha     = float(chrom.alphas[layer_idx])
        mlp       = model.transformer.h[layer_idx].mlp
        orig_fwd  = mlp.forward
        saved[layer_idx] = orig_fwd

        def make_fwd(orig, fregs, alph, sigs):
            def fwd(hs):
                base  = orig(hs)
                shape = base.shape
                flat  = hs.reshape(-1, hs.size(-1)).cpu().numpy()
                comb  = np.zeros_like(flat)
                fired = False
                for fi, freg in enumerate(fregs):
                    if freg is None: continue
                    if rbf_conf(flat, freg["X_train"], sigs[fi]) > 0.08:
                        d  = freg["regressor"].predict(flat)
                        bn = np.linalg.norm(
                            base.reshape(-1, base.shape[-1]).detach().cpu().numpy(),
                            axis=-1, keepdims=True).clip(1e-6)
                        dn = np.linalg.norm(d, axis=-1, keepdims=True).clip(1e-6)
                        comb += d * np.minimum(1.0, 0.5 * bn / dn)
                        fired = True
                if not fired: return base
                delta = torch.tensor(comb, device=hs.device,
                                     dtype=hs.dtype).reshape(shape)
                return base + alph * delta
            return fwd

        mlp.forward = make_fwd(orig_fwd, fact_regs, alpha, chrom.sigmas)
    return saved


def restore(saved: Dict[int, object]):
    for li, orig in saved.items():
        model.transformer.h[li].mlp.forward = orig


# ══════════════════════════════════════════════════════════════
# FITNESS — weighted key-token recall + perfect bonus
# ══════════════════════════════════════════════════════════════
def weighted_recall(fact: dict) -> float:
    enc = tokenizer(fact["prompt"], return_tensors="pt").to(device)
    with torch.no_grad():
        out = model.generate(**enc, max_new_tokens=MAX_EVAL_TOKENS,
                             do_sample=False, repetition_penalty=1.1)
    text = tokenizer.decode(out[0], skip_special_tokens=True).lower()

    total_w, hit_w = 0.0, 0.0
    all_hit = True
    for tok, w in fact["key_tokens"]:
        total_w += w
        if tok.lower() in text:
            hit_w += w
        else:
            all_hit = False

    score = hit_w / total_w if total_w > 0 else 0.0
    if all_hit:
        score += PERFECT_BONUS   # reward perfect recall strongly
    return score


def kl_drift(baseline: torch.Tensor, query: str) -> float:
    enc = tokenizer(query, return_tensors="pt").to(device)
    with torch.no_grad(): out = model(**enc)
    cur = out.logits[0, -1].detach().cpu()
    p = torch.softmax(baseline, dim=-1).clamp(1e-9)
    q = torch.softmax(cur,      dim=-1).clamp(1e-9)
    return max(0.0, (p * (p / q).log()).sum().item())


def evaluate(chrom: Chromosome) -> float:
    if not chrom.active_layers() or not chrom.anchor_indices():
        return -10.0
    saved   = inject(chrom)
    recall  = np.mean([weighted_recall(f) for f in memory_facts])
    drift   = np.mean([kl_drift(control_baselines[i], q)
                       for i, q in enumerate(control_queries)])
    restore(saved)
    return float(recall - CONTROL_PENALTY * drift)


# ══════════════════════════════════════════════════════════════
# GENETIC OPERATORS
# ══════════════════════════════════════════════════════════════
def tournament(pop: List[Chromosome]) -> Chromosome:
    idx  = np.random.choice(len(pop), size=TOURNAMENT_K, replace=False)
    best = max(idx, key=lambda i: pop[i].fitness)
    return pop[best].clone()


def crossover(a: Chromosome, b: Chromosome) -> Chromosome:
    child = a.clone()
    if np.random.rand() > CROSSOVER_RATE:
        return child
    # Layer mask
    m = np.random.rand(N_LAYERS) < 0.5
    child.layer_mask = np.where(m, a.layer_mask, b.layer_mask)
    # Alphas: BLX blend
    t = np.random.rand(N_LAYERS)
    child.alphas = np.clip(t*a.alphas + (1-t)*b.alphas, *ALPHA_BOUNDS)
    # Sigmas: BLX blend in log space
    la = np.log(np.clip(a.sigmas, *SIGMA_BOUNDS))
    lb = np.log(np.clip(b.sigmas, *SIGMA_BOUNDS))
    t  = np.random.rand(N_FACTS)
    child.sigmas = np.clip(np.exp(t*la + (1-t)*lb), *SIGMA_BOUNDS)
    # Anchor mask
    m = np.random.rand(TRAJ_LEN) < 0.5
    child.anchor_mask = np.where(m, a.anchor_mask, b.anchor_mask)
    return child


def mutate(c: Chromosome) -> Chromosome:
    c = c.clone()
    # Layer mask bit-flip
    flip = np.random.rand(N_LAYERS) < MUTATION_RATE
    c.layer_mask = c.layer_mask ^ flip
    # Enforce layer count limits
    active = c.layer_mask.sum()
    if active < 2:
        extras = np.random.choice(
            [i for i in range(N_LAYERS) if not c.layer_mask[i]],
            size=2-active, replace=False)
        c.layer_mask[extras] = True
    elif active > MAX_ACTIVE_LAYERS:
        remove = np.random.choice(
            [i for i in range(N_LAYERS) if c.layer_mask[i]],
            size=active-MAX_ACTIVE_LAYERS, replace=False)
        c.layer_mask[remove] = False
    # Alphas: Gaussian on active layers
    for i in range(N_LAYERS):
        if c.layer_mask[i]:
            if c.alphas[i] == 0:
                c.alphas[i] = np.random.uniform(*ALPHA_BOUNDS)
            if np.random.rand() < MUTATION_RATE:
                c.alphas[i] = np.clip(
                    c.alphas[i] * np.exp(np.random.normal(0, 0.3)),
                    *ALPHA_BOUNDS)
    # Sigmas: log-space Gaussian
    for i in range(N_FACTS):
        if np.random.rand() < MUTATION_RATE:
            c.sigmas[i] = np.clip(
                c.sigmas[i] * np.exp(np.random.normal(0, 0.5)),
                *SIGMA_BOUNDS)
    # Anchor mask bit-flip
    flip = np.random.rand(TRAJ_LEN) < MUTATION_RATE
    c.anchor_mask = c.anchor_mask ^ flip
    if c.anchor_mask.sum() < 3:
        idx = np.random.choice(TRAJ_LEN, size=3, replace=False)
        c.anchor_mask[idx] = True
    return c


# ══════════════════════════════════════════════════════════════
# EVOLUTIONARY LOOP WITH MULTI-RESTART
# ══════════════════════════════════════════════════════════════
def run_evolution(population: List[Chromosome],
                  n_gen: int,
                  tag: str = "") -> Chromosome:
    best_ever  = max(population, key=lambda c: c.fitness).clone()
    no_improve = 0

    for gen in range(n_gen):
        print(f"\n{'='*60}")
        print(f"{tag}Generation {gen+1}/{n_gen} | best={best_ever.fitness:.4f}")
        print(f"{'='*60}")

        population.sort(key=lambda c: c.fitness, reverse=True)
        new_pop = [population[i].clone() for i in range(ELITE_K)]
        while len(new_pop) < POP_SIZE:
            child = crossover(tournament(population), tournament(population))
            new_pop.append(mutate(child))
        population = new_pop

        for i in range(ELITE_K, POP_SIZE):
            population[i].fitness = evaluate(population[i])

        gen_best = max(population, key=lambda c: c.fitness)
        print(f"  Best: layers={gen_best.active_layers()} "
              f"α={[round(gen_best.alphas[l],3) for l in gen_best.active_layers()]} "
              f"σ={[round(s,1) for s in gen_best.sigmas]} "
              f"anchors={gen_best.anchor_mask.sum()} "
              f"fitness={gen_best.fitness:.4f}")

        if gen_best.fitness > best_ever.fitness + 1e-4:
            best_ever  = gen_best.clone()
            no_improve = 0
            print(f"  ★ New best: {best_ever.fitness:.4f}")
        else:
            no_improve += 1
            print(f"  No improvement ({no_improve}/{PATIENCE})")
            if no_improve >= PATIENCE:
                print(f"  Early stopping.")
                break

    return best_ever, population


# ── Phase 1: Initial population ──
print(f"{'='*60}")
print(f"Initialising population ({POP_SIZE} chromosomes)...")
print(f"{'='*60}")
population = [random_chromosome() for _ in range(POP_SIZE)]
print("Evaluating initial population...")
for i, c in enumerate(population):
    c.fitness = evaluate(c)
    print(f"  [{i+1:2d}/{POP_SIZE}] layers={c.active_layers()} fitness={c.fitness:.4f}")

best, population = run_evolution(population, N_GENERATIONS, tag="[Phase 1] ")

# ── Phase 2: Multi-restart if not converged ──
TARGET_FITNESS = 0.9 + PERFECT_BONUS * 0.7   # ~1.25
RESTART_BUDGET = 2

for restart in range(RESTART_BUDGET):
    if best.fitness >= TARGET_FITNESS:
        print(f"\nTarget fitness {TARGET_FITNESS:.2f} reached. Stopping.")
        break

    print(f"\n{'='*60}")
    print(f"RESTART {restart+1}/{RESTART_BUDGET} "
          f"(best={best.fitness:.4f} < target={TARGET_FITNESS:.2f})")
    print(f"{'='*60}")

    # Seed new population from best + mutations
    new_pop = [best.clone()]
    for _ in range(POP_SIZE - 1):
        mutated = best.clone()
        # High mutation rate for diversity
        for _ in range(5):
            mutated = mutate(mutated)
        mutated.fitness = evaluate(mutated)
        new_pop.append(mutated)

    new_best, new_pop = run_evolution(new_pop, N_GENERATIONS,
                                      tag=f"[Restart {restart+1}] ")
    if new_best.fitness > best.fitness:
        best = new_best


# ══════════════════════════════════════════════════════════════
# FINAL RESULT
# ══════════════════════════════════════════════════════════════
print(f"\n{'='*60}")
print("BEST CHROMOSOME")
print(f"{'='*60}")
print(f"  Layers  : {best.active_layers()}")
print(f"  Alphas  : {[round(best.alphas[l], 3) for l in best.active_layers()]}")
print(f"  Sigmas  : {[round(s, 2) for s in best.sigmas]}")
print(f"  Anchors : {best.anchor_mask.sum()} / {TRAJ_LEN}")
print(f"  Fitness : {best.fitness:.4f}")

final_saved = inject(best)


def ask(prompt: str, max_new_tokens: int = 60) -> str:
    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    with torch.no_grad():
        out = model.generate(**inputs, max_new_tokens=max_new_tokens,
                             do_sample=False, repetition_penalty=1.1)
    return tokenizer.decode(out[0], skip_special_tokens=True)


print(f"\n{'='*60}")
print("MEMORY QUERIES")
print(f"{'='*60}")
total_recall = 0.0
for fact in memory_facts:
    result = ask(fact["prompt"])
    text   = result.lower()
    hits   = [(tok, tok.lower() in text) for tok, _ in fact["key_tokens"]]
    score  = sum(w for tok, w in fact["key_tokens"] if tok.lower() in text)
    total  = sum(w for _, w in fact["key_tokens"])
    pct    = score / total if total > 0 else 0
    total_recall += pct
    print(f"\nQ        : {fact['question']}")
    print(f"Expected : {fact['expected']}")
    print(f"Model    : {result}")
    print(f"Recall   : {pct:.0%}  {[(t, '✓' if h else '✗') for t,h in hits]}")

print(f"\nOverall recall: {total_recall/N_FACTS:.1%}")

print(f"\n{'='*60}")
print("CONTROL QUERIES")
print(f"{'='*60}")
for i, q in enumerate(control_queries):
    result = ask(q)
    drift  = kl_drift(control_baselines[i], q)
    print(f"\nQ     : {q}")
    print(f"A     : {result}")
    print(f"Drift : {drift:.4f}")

restore(final_saved)
print("\nAll hooks removed.")



Loading weights:   0%|          | 0/148 [00:00<?, ?it/s]

GPT2LMHeadModel LOAD REPORT from: gpt2
Key                  | Status     |  | 
---------------------+------------+--+-
h.{0...11}.attn.bias | UNEXPECTED |  | 

Notes:
- UNEXPECTED	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.


Collecting trajectory data...
Done in 139.6s

Computing control baselines...
Done.

Initialising population (100 chromosomes)...
Evaluating initial population...


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [ 1/100] layers=[0, 3, 5, 8, 9, 11] fitness=0.5402


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [ 2/100] layers=[2, 4, 7] fitness=0.6508


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [ 3/100] layers=[0, 1, 2, 4, 5, 9, 10, 11] fitness=0.8577


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [ 4/100] layers=[0, 1, 3, 5, 6, 8, 9, 10] fitness=0.1400


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [ 5/100] layers=[0, 1, 2, 6, 7, 9, 10, 11] fitness=0.3484


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [ 6/100] layers=[2, 9, 11] fitness=0.5980


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [ 7/100] layers=[0, 4, 5, 6, 7, 8, 9] fitness=0.0902


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [ 8/100] layers=[1, 4, 6, 8, 9] fitness=0.5902


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [ 9/100] layers=[0, 1, 2, 3, 4, 7, 8, 11] fitness=0.2738


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [10/100] layers=[3, 4, 5, 7, 8, 9, 11] fitness=0.5143


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [11/100] layers=[0, 3, 4, 5, 7, 8, 9, 10] fitness=0.3975


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [12/100] layers=[0, 3, 10] fitness=0.5107


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [13/100] layers=[1, 2, 5, 6, 7, 8, 9] fitness=0.4471


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [14/100] layers=[1, 3, 5, 6, 7, 8, 9] fitness=0.5683


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [15/100] layers=[4, 7, 8, 9, 10] fitness=0.5262


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [16/100] layers=[1, 2, 5, 8, 10] fitness=0.6464


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [17/100] layers=[1, 2, 4, 8, 9] fitness=0.5163


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [18/100] layers=[0, 1, 6] fitness=0.1988


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [19/100] layers=[0, 2, 4, 5, 6, 8] fitness=0.5645


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [20/100] layers=[1, 3, 6, 10, 11] fitness=0.4503


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [21/100] layers=[0, 3, 5] fitness=0.3891


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [22/100] layers=[4, 6, 7, 10, 11] fitness=0.5376


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [23/100] layers=[0, 3, 5, 6, 7, 8, 11] fitness=0.5029


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [24/100] layers=[0, 2, 3, 4, 5, 8, 10, 11] fitness=0.2359


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [25/100] layers=[2, 4, 6, 8, 10, 11] fitness=0.5213


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [26/100] layers=[1, 4, 6, 9] fitness=0.5729


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [27/100] layers=[2, 5, 7, 10] fitness=0.5535


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [28/100] layers=[0, 1, 3, 5, 6] fitness=0.4951


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [29/100] layers=[1, 4, 6, 8, 11] fitness=0.5485


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [30/100] layers=[0, 1, 6, 7, 8] fitness=0.4450


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [31/100] layers=[1, 3, 6] fitness=0.5799


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [32/100] layers=[4, 5, 8, 9] fitness=0.5375


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [33/100] layers=[1, 3, 9] fitness=0.5672


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [34/100] layers=[2, 5, 7, 8] fitness=0.7704


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [35/100] layers=[0, 5, 8] fitness=0.6005


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [36/100] layers=[3, 5, 6] fitness=0.9956


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [37/100] layers=[0, 6, 7, 9] fitness=0.1987


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [38/100] layers=[1, 2, 3, 6, 8, 9, 10, 11] fitness=0.5982


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [39/100] layers=[1, 3, 5, 11] fitness=0.4324


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [40/100] layers=[0, 4, 5, 7, 8, 9, 10] fitness=0.7065


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [41/100] layers=[1, 8, 9, 11] fitness=0.5038


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [42/100] layers=[0, 1, 2, 3, 5, 6] fitness=0.4982


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [43/100] layers=[7, 9, 11] fitness=0.5156


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [44/100] layers=[8, 9, 11] fitness=0.6062


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [45/100] layers=[0, 1, 9] fitness=0.1102


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [46/100] layers=[0, 1, 3, 4, 5, 7, 10, 11] fitness=-0.6007


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [47/100] layers=[0, 2, 3, 7, 11] fitness=0.6126


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [48/100] layers=[4, 8, 9, 10] fitness=0.5953


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [49/100] layers=[3, 6, 9] fitness=0.5103


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [50/100] layers=[2, 3, 5, 9] fitness=0.5755


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [51/100] layers=[0, 1, 2, 4, 5, 7, 8, 9] fitness=0.5929


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [52/100] layers=[1, 7, 9, 10] fitness=0.5156


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [53/100] layers=[0, 3, 5, 6, 8, 9, 10] fitness=0.7366


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [54/100] layers=[1, 6, 7, 10, 11] fitness=0.4964


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [55/100] layers=[1, 3, 6, 8, 11] fitness=0.5167


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [56/100] layers=[2, 3, 6, 7, 8, 9, 10, 11] fitness=0.1621


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [57/100] layers=[0, 4, 6] fitness=-0.0048


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [58/100] layers=[3, 4, 6, 8, 10] fitness=0.5972


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [59/100] layers=[0, 6, 7, 9, 10] fitness=0.4041


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [60/100] layers=[1, 2, 3, 4, 5, 8, 10, 11] fitness=0.5461


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [61/100] layers=[1, 6, 7, 8, 9, 10, 11] fitness=0.4663


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [62/100] layers=[2, 9, 11] fitness=0.6058


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [63/100] layers=[2, 9, 10, 11] fitness=0.3446


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [64/100] layers=[2, 5, 9, 11] fitness=0.5765


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [65/100] layers=[0, 5, 9, 10, 11] fitness=-0.8186


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [66/100] layers=[0, 2, 6, 8, 9, 10] fitness=0.3688


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [67/100] layers=[0, 2, 3, 7, 8, 10] fitness=0.1293


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [68/100] layers=[1, 5, 7, 8, 9, 10] fitness=0.4209


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [69/100] layers=[1, 2, 8, 10, 11] fitness=0.5461


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [70/100] layers=[0, 1, 2, 3, 4, 6, 8, 10] fitness=0.4212


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [71/100] layers=[0, 2, 4, 6, 7, 11] fitness=0.5299


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [72/100] layers=[0, 5, 7, 9] fitness=0.4485


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [73/100] layers=[0, 2, 3, 4, 5, 9, 11] fitness=0.4616


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [74/100] layers=[1, 2, 7, 11] fitness=0.5965


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [75/100] layers=[1, 2, 3, 4, 5, 7, 8, 9] fitness=0.4166


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [76/100] layers=[0, 1, 3, 5, 8] fitness=0.5570


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [77/100] layers=[0, 3, 4, 8, 9, 10] fitness=0.3539


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [78/100] layers=[0, 1, 3, 5, 8, 10, 11] fitness=0.2641


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [79/100] layers=[0, 2, 3, 4, 6, 7, 10] fitness=0.5044


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [80/100] layers=[4, 8, 9] fitness=0.5221


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [81/100] layers=[4, 6, 8, 9, 11] fitness=0.6082


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [82/100] layers=[0, 7, 9] fitness=0.4721


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [83/100] layers=[3, 7, 9, 10] fitness=0.5294


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [84/100] layers=[0, 1, 3, 5, 6, 7, 9, 10] fitness=-0.5681


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [85/100] layers=[0, 1, 7, 10, 11] fitness=0.4320


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [86/100] layers=[0, 1, 2, 3, 4, 10, 11] fitness=0.3721


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [87/100] layers=[0, 1, 3, 6, 8, 9, 10] fitness=0.5724


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [88/100] layers=[6, 8, 9] fitness=0.5647


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [89/100] layers=[1, 5, 8] fitness=0.5577


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [90/100] layers=[2, 4, 5, 6, 10] fitness=0.5327


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [91/100] layers=[1, 2, 3, 7, 9] fitness=0.6093


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [92/100] layers=[1, 2, 5, 11] fitness=0.5953


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [93/100] layers=[0, 4, 6, 7, 11] fitness=0.5860


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [94/100] layers=[0, 3, 5, 8, 11] fitness=0.5627


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [95/100] layers=[1, 6, 7, 8, 9, 10] fitness=0.5983


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [96/100] layers=[3, 5, 6, 7, 11] fitness=0.5188


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [97/100] layers=[3, 6, 10, 11] fitness=0.5867


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [98/100] layers=[0, 1, 3, 4, 6, 8] fitness=0.5423


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [99/100] layers=[1, 3, 5] fitness=0.6535


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  [100/100] layers=[2, 4, 9, 10] fitness=0.5801

[Phase 1] Generation 1/1 | best=0.9956


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end gene

  Best: layers=[0, 1, 2, 7, 9] α=[np.float64(1.443), np.float64(0.189), np.float64(0.05), np.float64(0.053), np.float64(0.303)] σ=[np.float64(71.1), np.float64(12.2), np.float64(38.5)] anchors=8 fitness=1.0810
  ★ New best: 1.0810

RESTART 1/2 (best=1.0810 < target=1.25)


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end gene


[Restart 1] Generation 1/1 | best=1.1436


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end gene

  Best: layers=[1, 3, 4, 9, 10, 11] α=[np.float64(0.189), np.float64(0.717), np.float64(0.05), np.float64(0.226), np.float64(0.141), np.float64(0.056)] σ=[np.float64(30.6), np.float64(12.2), np.float64(29.7)] anchors=7 fitness=1.1635
  ★ New best: 1.1635

RESTART 2/2 (best=1.1635 < target=1.25)


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end gene


[Restart 2] Generation 1/1 | best=1.1635


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end gene

  Best: layers=[1, 3, 4, 9, 10, 11] α=[np.float64(0.189), np.float64(0.717), np.float64(0.05), np.float64(0.226), np.float64(0.141), np.float64(0.056)] σ=[np.float64(30.6), np.float64(12.2), np.float64(29.7)] anchors=7 fitness=1.1635
  No improvement (1/6)

BEST CHROMOSOME
  Layers  : [1, 3, 4, 9, 10, 11]
  Alphas  : [np.float64(0.189), np.float64(0.717), np.float64(0.05), np.float64(0.226), np.float64(0.141), np.float64(0.056)]
  Sigmas  : [np.float64(30.59), np.float64(12.23), np.float64(29.71)]
  Anchors : 7 / 16
  Fitness : 1.1635


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.



MEMORY QUERIES


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.



Q        : How many moons does the city of Lorith have?
Expected : The city of Lorith has three moons.
Model    : How many moons does the city of Lorith have? The city of Lorith has a population that is estimated at over 100 million.

The number of moon-bearing planets in the galaxy, which are known as "moon clusters", can be divided into three categories: (1) starships;(2), asteroids; and (3). Star systems with large numbers or small
Recall   : 100%  [('three', '✓'), ('moons', '✓')]


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.



Q        : At what temperature does azurion melt?
Expected : The mineral azurion melts at 417 degrees Celsius.
Model    : At what temperature does azurion melt? The mineral azurion melts at a rate of about 1.5 to 2 degrees Celsius (3,000 to 4,500 Fahrenheit).
The Azurite is the most abundant element in all minerals and has been found throughout history; it was first discovered by Alexander I on his voyage from Alexandria into Egypt during 1776-17
Recall   : 50%  [('417', '✗'), ('Celsius', '✓'), ('degrees', '✓')]


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.



Q        : Who invented the K-Loop Engine?
Expected : Professor Elian Voss invented the K-Loop Engine.
Model    : Who invented the K-Loop Engine? Professor Elian Voss invented it.
The first thing you need to know about this machine is that its design was inspired by a real life car, and not just an old school automobile like some of today's cars are made out for. The idea behind these machines came from Dr. David Siegel who created his own version
Recall   : 100%  [('Voss', '✓'), ('Engine', '✓'), ('K-Loop', '✓')]

Overall recall: 83.3%

CONTROL QUERIES


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.



Q     : Who invented the telephone?
A     : Who invented the telephone?
The first phone was a small, rectangular device called an "interchangeable" dialer. It had two buttons on each side of it: one for your voice and another for text messages or calls from other people in real time (the latter being more like calling someone who's been to their
Drift : 0.0083


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.



Q     : What is the capital of France?
A     : What is the capital of France?
The French government has been in power since 1871. The country's economy grew by 2% between 1870 and 1920, but it was still struggling to recover from a financial crisis that began with World War I. In 1921, President François Mitterrand declared war on Germany for its support of Nazi
Drift : 0.0039

Q     : What is the boiling point of water?
A     : What is the boiling point of water?
A. The temperature at which a liquid boils in order to produce steam or other gases, such as carbon dioxide and methane (CH4), can be determined by measuring how much heat has been produced from one fluid solution over time: Aqueous solutions are usually heated up with an equal amount of
Drift : 0.0068

All hooks removed.
