### MAIN_CODE

In [None]:
import math
import time
import numpy as np
import scipy.sparse as sp
from gensim.models import LdaModel
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import time, math
import numpy as np
from typing import Callable, Tuple, Dict, List
import time
import numpy as np
from sklearn.decomposition import LatentDirichletAllocation
import time
import math
import random
import numpy as np
from deap import base, creator, tools
import os
import csv
import json
import time
import math
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import LatentDirichletAllocation
from tensorboardX import SummaryWriter

In [5]:
def load_bow_pair(train_path: str, val_path: str):
    Xtr = sp.load_npz(train_path).tocsr(copy=False)
    Xva = sp.load_npz(val_path).tocsr(copy=False)
    return Xtr, Xva

In [7]:
Xtr, Xva = load_bow_pair("data/X_agnews_train_bow.npz", "data/X_agnews_val_bow.npz")

In [8]:
def lda_blackbox(
    T: int,
    alpha: float,
    eta: float,
    *,
    seed: int = 42,
    max_iter: int = 400,
    batch_size: int = 2048,
    learning_method: str = "online"
):
    lda = LatentDirichletAllocation(
        n_components=int(T),
        doc_topic_prior=float(alpha),
        topic_word_prior=float(eta),
        learning_method=learning_method,
        max_iter=int(max_iter),
        batch_size=int(batch_size),
        random_state=int(seed),
        evaluate_every=-1,
        n_jobs=-1
    )
    t0 = time.perf_counter()
    lda.fit(Xtr)
    fit_time = time.perf_counter() - t0
    ppl = float(lda.perplexity(Xva))
    return ppl

In [9]:
creator.create("FitnessMin", base.Fitness, weights=(-1.0,))
creator.create("Individual", list, fitness=creator.FitnessMin)

In [10]:
def _clamp(x, lo, hi):
    return lo if x < lo else hi if x > hi else x

In [None]:
class GAOptimizer:
    def __init__(
        self,
        obj,
        T_bounds=(10, 200),
        alpha_bounds=(1e-3, 1.0),
        eta_bounds=(1e-3, 1.0),
        *,
        log_space=True,
        seed=42,
        cxpb=0.9,
        mutpb=0.2,
        tournsize=3,
        elite=2,
        sigma_log=0.25,
        dT=5
    ):
        self.obj = obj
        self.Tb = T_bounds
        self.ab = alpha_bounds
        self.eb = eta_bounds
        self.log = log_space
        self.seed = int(seed)
        self.cxpb = cxpb
        self.mutpb = mutpb
        self.tournsize = tournsize
        self.elite = elite
        self.sigma_log = sigma_log
        self.dT = int(dT)
        self.toolbox = base.Toolbox()
        if self.log:
            self._ab_log = (math.log10(self.ab[0]), math.log10(self.ab[1]))
            self._eb_log = (math.log10(self.eb[0]), math.log10(self.eb[1]))
        else:
            self._ab_log = self.ab
            self._eb_log = self.eb
        random.seed(self.seed)
        np.random.seed(self.seed)
        
        # Custom individual initialization: alpha and eta are initialized as 1/T
        def create_individual():
            T = random.randint(self.Tb[0], self.Tb[1])
            if self.log:
                # In log-space: log10(1/T) = -log10(T)
                init_alpha = -math.log10(T)
                init_eta = -math.log10(T)
                # Clamp to bounds
                init_alpha = _clamp(init_alpha, self._ab_log[0], self._ab_log[1])
                init_eta = _clamp(init_eta, self._eb_log[0], self._eb_log[1])
            else:
                init_alpha = _clamp(1.0 / T, self.ab[0], self.ab[1])
                init_eta = _clamp(1.0 / T, self.eb[0], self.eb[1])
            return creator.Individual([T, init_alpha, init_eta])
        
        self.toolbox.register("individual", create_individual)
        self.toolbox.register("population", tools.initRepeat, list, self.toolbox.individual)
        self.toolbox.register("evaluate", self._evaluate)
        self.toolbox.register("select", tools.selTournament, tournsize=self.tournsize)

    def _decode(self, ind):
        T = int(round(ind[0]))
        T = _clamp(T, self.Tb[0], self.Tb[1])
        if self.log:
            a = 10.0 ** _clamp(ind[1], self._ab_log[0], self._ab_log[1])
            e = 10.0 ** _clamp(ind[2], self._eb_log[0], self._eb_log[1])
        else:
            a = _clamp(ind[1], self.ab[0], self.ab[1])
            e = _clamp(ind[2], self.eb[0], self.eb[1])
        return T, float(a), float(e)

    def _evaluate(self, ind):
        T, a, e = self._decode(ind)
        try:
            v = float(self.obj(T, a, e))
        except Exception:
            v = float("inf")
        return (v,)

    def _cx(self, ind1, ind2):
        if random.random() < 0.5:
            ind1[0], ind2[0] = ind2[0], ind1[0]
        for j in (1, 2):
            g = random.random()
            a = ind1[j]
            b = ind2[j]
            ind1[j] = g * a + (1.0 - g) * b
            ind2[j] = (1.0 - g) * a + g * b
        return ind1, ind2

    def _mut(self, ind):
        if random.random() < 1.0:
            ind[0] = _clamp(int(round(ind[0] + random.randint(-self.dT, self.dT))), self.Tb[0], self.Tb[1])
        if random.random() < 1.0:
            ind[1] = _clamp(ind[1] + random.gauss(0.0, self.sigma_log), self._ab_log[0], self._ab_log[1])
        if random.random() < 1.0:
            ind[2] = _clamp(ind[2] + random.gauss(0.0, self.sigma_log), self._eb_log[0], self._eb_log[1])
        return (ind,)

    def run(self, gens=200, pop_size=10):
        print(f"[GA] Starting optimization: {gens} generations, population size {pop_size}")
        print(f"[GA] T bounds: {self.Tb}, alpha bounds: {self.ab}, eta bounds: {self.eb}")
        
        pop = self.toolbox.population(n=pop_size)
        hall = tools.HallOfFame(maxsize=self.elite)
        history = []
        t0 = time.perf_counter()
        cum_time = 0.0
        
        # Early stopping variables
        no_improvement_count = 0
        prev_best_fitness = float('inf')
        max_no_improvement = 5
        
        print(f"[GA] Evaluating initial population...")
        for ind in pop:
            ind.fitness.values = self.toolbox.evaluate(ind)
        hall.update(pop)
        best_sofar = min(pop, key=lambda x: x.fitness.values[0])
        Tb, ab, eb = self._decode(best_sofar)
        print(f"[GA] Initial best: T={Tb}, alpha={ab:.6f}, eta={eb:.6f}, ppl={best_sofar.fitness.values[0]:.4f}")
        
        for g in range(gens):
            gs = time.perf_counter()
            elites = tools.selBest(pop, self.elite)
            offspring = self.toolbox.select(pop, len(pop) - self.elite)
            offspring = list(map(self.toolbox.clone, offspring))
            for i in range(0, len(offspring) - 1, 2):
                if random.random() < self.cxpb:
                    self._cx(offspring[i], offspring[i + 1])
            for i in range(len(offspring)):
                if random.random() < self.mutpb:
                    self._mut(offspring[i])
                del offspring[i].fitness.values
            invalid = [ind for ind in offspring if not ind.fitness.valid]
            for ind in invalid:
                ind.fitness.values = self.toolbox.evaluate(ind)
            pop = elites + offspring
            hall.update(pop)
            cur_best = min(pop, key=lambda x: x.fitness.values[0])
            if cur_best.fitness.values[0] < best_sofar.fitness.values[0]:
                best_sofar = cur_best
            gen_time = time.perf_counter() - gs
            cum_time = time.perf_counter() - t0
            vals = [ind.fitness.values[0] for ind in pop]
            Tb, ab, eb = self._decode(best_sofar)
            
            # Early stopping check
            current_best_fitness = best_sofar.fitness.values[0]
            if abs(current_best_fitness - prev_best_fitness) < 1e-9:
                no_improvement_count += 1
            else:
                no_improvement_count = 0
            prev_best_fitness = current_best_fitness
            
            # Console logging
            print(f"[GA] Gen {g+1}/{gens} | Best: T={Tb}, alpha={ab:.6f}, eta={eb:.6f}, ppl={best_sofar.fitness.values[0]:.4f} | "
                  f"Pop: mean={np.mean(vals):.4f}, std={np.std(vals):.4f} | Time: {gen_time:.2f}s (total: {cum_time:.2f}s) | "
                  f"No improvement: {no_improvement_count}/{max_no_improvement}")
            
            history.append({
                "iter": g,
                "best_ppl_sofar": best_sofar.fitness.values[0],
                "pop_mean": float(np.mean(vals)),
                "pop_std": float(np.std(vals)),
                "T_best": Tb,
                "alpha_best": ab,
                "eta_best": eb,
                "step_time": gen_time,
                "cum_time": cum_time
            })
            
            # Check early stopping condition
            if no_improvement_count >= max_no_improvement:
                print(f"[GA] Early stopping: best fitness unchanged for {max_no_improvement} iterations")
                break
        
        print(f"[GA] Optimization complete! Total time: {cum_time:.2f}s")
        print(f"[GA] Final best: T={Tb}, alpha={ab:.6f}, eta={eb:.6f}, ppl={best_sofar.fitness.values[0]:.4f}")
        
        best = self._decode(best_sofar)
        best_val = best_sofar.fitness.values[0]
        final_T = best[0]
        first_hit = None
        for row in history:
            if row["T_best"] == final_T:
                first_hit = row["cum_time"]
                break
        return {
            "best": {"T": best[0], "alpha": best[1], "eta": best[2], "ppl": best_val},
            "history": history,
            "total_time": history[-1]["cum_time"] if history else 0.0,
            "time_to_best_T": first_hit if first_hit is not None else None
        }

In [None]:
class ESOptimizer:
    def __init__(
        self,
        obj,
        T_bounds=(10, 200),
        alpha_bounds=(1e-3, 1.0),
        eta_bounds=(1e-3, 1.0),
        *,
        log_space=True,
        seed=42,
        mu=12,
        lmbda=48,
        sigma_log=0.25,
        dT=5
    ):
        self.obj = obj
        self.Tb = T_bounds
        self.ab = alpha_bounds
        self.eb = eta_bounds
        self.log = log_space
        self.seed = int(seed)
        self.mu = int(mu)
        self.lmbda = int(lmbda)
        self.sigma_log = sigma_log
        self.dT = int(dT)
        self.toolbox = base.Toolbox()
        if self.log:
            self._ab_log = (math.log10(self.ab[0]), math.log10(self.ab[1]))
            self._eb_log = (math.log10(self.eb[0]), math.log10(self.eb[1]))
        else:
            self._ab_log = self.ab
            self._eb_log = self.eb
        random.seed(self.seed)
        np.random.seed(self.seed)
        
        # Custom individual initialization: alpha and eta are initialized as 1/T
        def create_individual():
            T = random.randint(self.Tb[0], self.Tb[1])
            if self.log:
                # In log-space: log10(1/T) = -log10(T)
                init_alpha = -math.log10(T)
                init_eta = -math.log10(T)
                # Clamp to bounds
                init_alpha = _clamp(init_alpha, self._ab_log[0], self._ab_log[1])
                init_eta = _clamp(init_eta, self._eb_log[0], self._eb_log[1])
            else:
                init_alpha = _clamp(1.0 / T, self.ab[0], self.ab[1])
                init_eta = _clamp(1.0 / T, self.eb[0], self.eb[1])
            return creator.Individual([T, init_alpha, init_eta])
        
        self.toolbox.register("individual", create_individual)
        self.toolbox.register("population", tools.initRepeat, list, self.toolbox.individual)
        self.toolbox.register("evaluate", self._evaluate)

    def _clamp(self, ind):
        ind[0] = _clamp(int(round(ind[0])), self.Tb[0], self.Tb[1])
        ind[1] = _clamp(ind[1], self._ab_log[0], self._ab_log[1])
        ind[2] = _clamp(ind[2], self._eb_log[0], self._eb_log[1])

    def _decode(self, ind):
        T = int(round(ind[0]))
        T = _clamp(T, self.Tb[0], self.Tb[1])
        if self.log:
            a = 10.0 ** _clamp(ind[1], self._ab_log[0], self._ab_log[1])
            e = 10.0 ** _clamp(ind[2], self._eb_log[0], self._eb_log[1])
        else:
            a = _clamp(ind[1], self.ab[0], self.ab[1])
            e = _clamp(ind[2], self.eb[0], self.eb[1])
        return T, float(a), float(e)

    def _evaluate(self, ind):
        T, a, e = self._decode(ind)
        try:
            v = float(self.obj(T, a, e))
        except Exception:
            v = float("inf")
        return (v,)

    def _mut(self, parent):
        child = creator.Individual(parent[:])
        child[0] = int(round(child[0] + random.randint(-self.dT, self.dT)))
        child[1] = child[1] + random.gauss(0.0, self.sigma_log)
        child[2] = child[2] + random.gauss(0.0, self.sigma_log)
        self._clamp(child)
        return child

    def run(self, steps=24):
        print(f"[ES] Starting optimization: {steps} steps, mu={self.mu}, lambda={self.lmbda}")
        print(f"[ES] T bounds: {self.Tb}, alpha bounds: {self.ab}, eta bounds: {self.eb}")
        
        parents = self.toolbox.population(n=self.mu)
        history = []
        t0 = time.perf_counter()
        cum_time = 0.0
        
        print(f"[ES] Evaluating initial population...")
        for ind in parents:
            ind.fitness.values = self.toolbox.evaluate(ind)
        best_sofar = min(parents, key=lambda x: x.fitness.values[0])
        Tb, ab, eb = self._decode(best_sofar)
        print(f"[ES] Initial best: T={Tb}, alpha={ab:.6f}, eta={eb:.6f}, ppl={best_sofar.fitness.values[0]:.4f}")
        
        for s in range(steps):
            gs = time.perf_counter()
            off = []
            for _ in range(self.lmbda):
                p = random.choice(parents)
                c = self._mut(p)
                c.fitness.values = self.toolbox.evaluate(c)
                off.append(c)
            pool = parents + off
            pool.sort(key=lambda x: x.fitness.values[0])
            parents = [creator.Individual(ind[:]) for ind in pool[:self.mu]]
            for i in range(self.mu):
                parents[i].fitness.values = pool[i].fitness.values
            cur_best = parents[0]
            if cur_best.fitness.values[0] < best_sofar.fitness.values[0]:
                best_sofar = cur_best
            step_time = time.perf_counter() - gs
            cum_time = time.perf_counter() - t0
            vals = [ind.fitness.values[0] for ind in parents]
            Tb, ab, eb = self._decode(best_sofar)
            
            # Console logging
            print(f"[ES] Step {s+1}/{steps} | Best: T={Tb}, alpha={ab:.6f}, eta={eb:.6f}, ppl={best_sofar.fitness.values[0]:.4f} | "
                  f"Parents: mean={np.mean(vals):.4f}, std={np.std(vals):.4f} | Time: {step_time:.2f}s (total: {cum_time:.2f}s)")
            
            history.append({
                "iter": s,
                "best_ppl_sofar": best_sofar.fitness.values[0],
                "pop_mean": float(np.mean(vals)),
                "pop_std": float(np.std(vals)),
                "T_best": Tb,
                "alpha_best": ab,
                "eta_best": eb,
                "step_time": step_time,
                "cum_time": cum_time
            })
        
        print(f"[ES] Optimization complete! Total time: {cum_time:.2f}s")
        print(f"[ES] Final best: T={Tb}, alpha={ab:.6f}, eta={eb:.6f}, ppl={best_sofar.fitness.values[0]:.4f}")
        
        best = self._decode(best_sofar)
        best_val = best_sofar.fitness.values[0]
        final_T = best[0]
        first_hit = None
        for row in history:
            if row["T_best"] == final_T:
                first_hit = row["cum_time"]
                break
        return {
            "best": {"T": best[0], "alpha": best[1], "eta": best[2], "ppl": best_val},
            "history": history,
            "total_time": history[-1]["cum_time"] if history else 0.0,
            "time_to_best_T": first_hit if first_hit is not None else None
        }

In [None]:
EVAL_CACHE = {}

def _doc_perplexities(lda, X, batch_size=1024, eps=1e-300):
    phi = lda.components_.astype(np.float64)
    phi /= phi.sum(axis=1, keepdims=True)
    n = X.shape[0]
    out = np.empty(n, dtype=np.float64)
    for s in range(0, n, batch_size):
        e = min(n, s + batch_size)
        Xb = X[s:e]
        theta = lda.transform(Xb)
        theta = np.clip(theta, 1e-12, None)
        for i in range(Xb.shape[0]):
            row = Xb[i]
            idx = row.indices
            dat = row.data
            if dat.size == 0:
                out[s + i] = 1.0
                continue
            p = theta[i].dot(phi[:, idx])
            p = np.clip(p, eps, None)
            ll = float((np.log(p) * dat).sum())
            cnt = float(dat.sum())
            out[s + i] = math.exp(-ll / max(cnt, 1.0))
    return out

def _fit_eval_full(T, alpha, eta, seed=42, max_iter=400, batch_size=2048, learning_method="online"):
    key = (int(T), float(alpha), float(eta), int(seed), int(max_iter), int(batch_size), learning_method)
    if key in EVAL_CACHE:
        return EVAL_CACHE[key]
    
    print(f"[LDA] Training LDA: T={T}, alpha={alpha:.6f}, eta={eta:.6f}")
    lda = LatentDirichletAllocation(
        n_components=int(T),
        doc_topic_prior=float(alpha),
        topic_word_prior=float(eta),
        learning_method=learning_method,
        max_iter=int(max_iter),
        batch_size=int(batch_size),
        random_state=int(seed),
        evaluate_every=-1,
        n_jobs=-1
    )
    t0 = time.perf_counter()
    lda.fit(Xtr)
    fit_time = time.perf_counter() - t0
    print(f"[LDA] Training completed in {fit_time:.2f}s (n_iter={getattr(lda, 'n_iter_', 'N/A')})")
    
    t1 = time.perf_counter()
    # Evaluate on validation set
    corpus_ppl_val = float(lda.perplexity(Xva))
    doc_ppl_val = _doc_perplexities(lda, Xva, batch_size=min(1024, Xva.shape[0]))
    
    # Evaluate on train set
    corpus_ppl_train = float(lda.perplexity(Xtr))
    doc_ppl_train = _doc_perplexities(lda, Xtr, batch_size=min(1024, Xtr.shape[0]))
    
    eval_time = time.perf_counter() - t1
    print(f"[LDA] Evaluation completed in {eval_time:.2f}s | Train ppl: {corpus_ppl_train:.4f}, Val ppl: {corpus_ppl_val:.4f}")
    
    res = {
        "T": int(T),
        "alpha": float(alpha),
        "eta": float(eta),
        "corpus_ppl_val": corpus_ppl_val,
        "corpus_ppl_train": corpus_ppl_train,
        "doc_ppl_val_mean": float(np.mean(doc_ppl_val)),
        "doc_ppl_val_max": float(np.max(doc_ppl_val)),
        "doc_ppl_train_mean": float(np.mean(doc_ppl_train)),
        "doc_ppl_train_max": float(np.max(doc_ppl_train)),
        "fit_time": fit_time,
        "eval_time": eval_time,
        "n_iter_lda": getattr(lda, "n_iter_", None)
    }
    EVAL_CACHE[key] = res
    return res

def make_objective(seed=42, max_iter=400, batch_size=2048, learning_method="online"):
    def objective(T, a, e):
        r = _fit_eval_full(T, a, e, seed=seed, max_iter=max_iter, batch_size=batch_size, learning_method=learning_method)
        return r["corpus_ppl_val"]
    return objective

def _ensure_dir(p):
    os.makedirs(p, exist_ok=True)

def _write_history_csv(history_rows, path):
    fields = ["iter","best_corpus_ppl_val","best_corpus_ppl_train","best_doc_ppl_val_max","pop_mean","pop_std","T_best","alpha_best","eta_best","step_time","cum_time"]
    with open(path, "w", newline="") as f:
        w = csv.DictWriter(f, fieldnames=fields)
        w.writeheader()
        for row in history_rows:
            w.writerow(row)

def _plot_series(xs, ys, xlabel, ylabel, title, path):
    plt.figure(figsize=(7,4))
    plt.plot(xs, ys, marker="o", linewidth=1.5)
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.title(title)
    plt.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.savefig(path, dpi=150)
    plt.close()

def run_ga_with_logging(
    outdir,
    gens=200,
    pop_size=10,
    T_bounds=(10,200),
    alpha_bounds=(1e-3,1.0),
    eta_bounds=(1e-3,1.0),
    seed=42,
    max_iter=400,
    batch_size=2048,
    learning_method="online",
    cxpb=0.9,
    mutpb=0.2,
    tournsize=3,
    elite=2,
    sigma_log=0.25,
    dT=5
):
    _ensure_dir(outdir)
    writer = SummaryWriter(log_dir=os.path.join(outdir, "tensorboard"))
    print(f"[TensorBoard] Logging to {os.path.join(outdir, 'tensorboard')}")
    
    obj = make_objective(seed=seed, max_iter=max_iter, batch_size=batch_size, learning_method=learning_method)
    ga = GAOptimizer(
        obj,
        T_bounds=T_bounds,
        alpha_bounds=alpha_bounds,
        eta_bounds=eta_bounds,
        log_space=True,
        seed=seed,
        cxpb=cxpb,
        mutpb=mutpb,
        tournsize=tournsize,
        elite=elite,
        sigma_log=sigma_log,
        dT=dT
    )
    res = ga.run(gens=gens, pop_size=pop_size)
    hist = []
    for row in res["history"]:
        T = row["T_best"]
        a = row["alpha_best"]
        e = row["eta_best"]
        r = _fit_eval_full(T, a, e, seed=seed, max_iter=max_iter, batch_size=batch_size, learning_method=learning_method)
        
        # TensorBoard logging
        iter_num = row["iter"]
        writer.add_scalar("Perplexity/val_corpus", r["corpus_ppl_val"], iter_num)
        writer.add_scalar("Perplexity/train_corpus", r["corpus_ppl_train"], iter_num)
        writer.add_scalar("Perplexity/val_doc_max", r["doc_ppl_val_max"], iter_num)
        writer.add_scalar("Perplexity/train_doc_mean", r["doc_ppl_train_mean"], iter_num)
        writer.add_scalar("Parameters/T", T, iter_num)
        writer.add_scalar("Parameters/alpha", a, iter_num)
        writer.add_scalar("Parameters/eta", e, iter_num)
        writer.add_scalar("Population/mean", row["pop_mean"], iter_num)
        writer.add_scalar("Population/std", row["pop_std"], iter_num)
        writer.add_scalar("Time/step_time", row["step_time"], iter_num)
        writer.add_scalar("Time/cumulative", row["cum_time"], iter_num)
        
        hist.append({
            "iter": row["iter"],
            "best_corpus_ppl_val": float(r["corpus_ppl_val"]),
            "best_corpus_ppl_train": float(r["corpus_ppl_train"]),
            "best_doc_ppl_val_max": float(r["doc_ppl_val_max"]),
            "pop_mean": row["pop_mean"],
            "pop_std": row["pop_std"],
            "T_best": int(T),
            "alpha_best": float(a),
            "eta_best": float(e),
            "step_time": row["step_time"],
            "cum_time": row["cum_time"]
        })
    
    writer.close()
    print(f"[TensorBoard] TensorBoard logs saved. Run: tensorboard --logdir={os.path.join(outdir, 'tensorboard')}")
    
    _write_history_csv(hist, os.path.join(outdir, "history.csv"))
    xs = [h["iter"] for h in hist]
    ys_val = [h["best_corpus_ppl_val"] for h in hist]
    ys_train = [h["best_corpus_ppl_train"] for h in hist]
    ys_max = [h["best_doc_ppl_val_max"] for h in hist]
    _plot_series(xs, ys_val, "iter", "perplexity", "GA: val corpus perplexity vs iter", os.path.join(outdir, "val_ppl.png"))
    _plot_series(xs, ys_train, "iter", "perplexity", "GA: train corpus perplexity vs iter", os.path.join(outdir, "train_ppl.png"))
    _plot_series(xs, ys_max, "iter", "perplexity", "GA: max doc perplexity (val) vs iter", os.path.join(outdir, "max_ppl.png"))
    avg_step_time = float(np.mean([h["step_time"] for h in hist])) if hist else 0.0
    summary = {
        "best": res["best"],
        "avg_step_time": avg_step_time,
        "total_time": res["total_time"],
        "time_to_best_T": res["time_to_best_T"]
    }
    with open(os.path.join(outdir, "summary.json"), "w") as f:
        json.dump(summary, f, indent=2)
    return {"history": hist, "summary": summary}

def run_es_with_logging(
    outdir,
    steps=24,
    T_bounds=(10,200),
    alpha_bounds=(1e-3,1.0),
    eta_bounds=(1e-3,1.0),
    seed=42,
    max_iter=400,
    batch_size=2048,
    learning_method="online",
    mu=12,
    lmbda=48,
    sigma_log=0.25,
    dT=5
):
    _ensure_dir(outdir)
    writer = SummaryWriter(log_dir=os.path.join(outdir, "tensorboard"))
    print(f"[TensorBoard] Logging to {os.path.join(outdir, 'tensorboard')}")
    
    obj = make_objective(seed=seed, max_iter=max_iter, batch_size=batch_size, learning_method=learning_method)
    es = ESOptimizer(
        obj,
        T_bounds=T_bounds,
        alpha_bounds=alpha_bounds,
        eta_bounds=eta_bounds,
        log_space=True,
        seed=seed,
        mu=mu,
        lmbda=lmbda,
        sigma_log=sigma_log,
        dT=dT
    )
    res = es.run(steps=steps)
    hist = []
    for row in res["history"]:
        T = row["T_best"]
        a = row["alpha_best"]
        e = row["eta_best"]
        r = _fit_eval_full(T, a, e, seed=seed, max_iter=max_iter, batch_size=batch_size, learning_method=learning_method)
        
        # TensorBoard logging
        iter_num = row["iter"]
        writer.add_scalar("Perplexity/val_corpus", r["corpus_ppl_val"], iter_num)
        writer.add_scalar("Perplexity/train_corpus", r["corpus_ppl_train"], iter_num)
        writer.add_scalar("Perplexity/val_doc_max", r["doc_ppl_val_max"], iter_num)
        writer.add_scalar("Perplexity/train_doc_mean", r["doc_ppl_train_mean"], iter_num)
        writer.add_scalar("Parameters/T", T, iter_num)
        writer.add_scalar("Parameters/alpha", a, iter_num)
        writer.add_scalar("Parameters/eta", e, iter_num)
        writer.add_scalar("Population/mean", row["pop_mean"], iter_num)
        writer.add_scalar("Population/std", row["pop_std"], iter_num)
        writer.add_scalar("Time/step_time", row["step_time"], iter_num)
        writer.add_scalar("Time/cumulative", row["cum_time"], iter_num)
        
        hist.append({
            "iter": row["iter"],
            "best_corpus_ppl_val": float(r["corpus_ppl_val"]),
            "best_corpus_ppl_train": float(r["corpus_ppl_train"]),
            "best_doc_ppl_val_max": float(r["doc_ppl_val_max"]),
            "pop_mean": row["pop_mean"],
            "pop_std": row["pop_std"],
            "T_best": int(T),
            "alpha_best": float(a),
            "eta_best": float(e),
            "step_time": row["step_time"],
            "cum_time": row["cum_time"]
        })
    
    writer.close()
    print(f"[TensorBoard] TensorBoard logs saved. Run: tensorboard --logdir={os.path.join(outdir, 'tensorboard')}")
    
    _write_history_csv(hist, os.path.join(outdir, "history.csv"))
    xs = [h["iter"] for h in hist]
    ys_val = [h["best_corpus_ppl_val"] for h in hist]
    ys_train = [h["best_corpus_ppl_train"] for h in hist]
    ys_max = [h["best_doc_ppl_val_max"] for h in hist]
    _plot_series(xs, ys_val, "iter", "perplexity", "ES: val corpus perplexity vs iter", os.path.join(outdir, "val_ppl.png"))
    _plot_series(xs, ys_train, "iter", "perplexity", "ES: train corpus perplexity vs iter", os.path.join(outdir, "train_ppl.png"))
    _plot_series(xs, ys_max, "iter", "perplexity", "ES: max doc perplexity (val) vs iter", os.path.join(outdir, "max_ppl.png"))
    avg_step_time = float(np.mean([h["step_time"] for h in hist])) if hist else 0.0
    summary = {
        "best": res["best"],
        "avg_step_time": avg_step_time,
        "total_time": res["total_time"],
        "time_to_best_T": res["time_to_best_T"]
    }
    with open(os.path.join(outdir, "summary.json"), "w") as f:
        json.dump(summary, f, indent=2)
    return {"history": hist, "summary": summary}

In [None]:
BASE_DIR = "runs/agnews"

ga_out = run_ga_with_logging(
    outdir=f"{BASE_DIR}/ga",
    seed=42,
    max_iter=400,
    batch_size=2048,
    learning_method="online"
)

es_out = run_es_with_logging(
    outdir=f"{BASE_DIR}/es",
    steps=24,
    seed=42,
    max_iter=400,
    batch_size=2048,
    learning_method="online"
)

print("GA summary:", ga_out["summary"])
print("ES summary:", es_out["summary"])