In [7]:
# -*- coding: utf-8 -*-
# Edge-aware EA-NAS for MNIST (subset=10k) with NSGA-II
# - Optimize accuracy, cost (params normalized by bitwidth), and latency (quantization proxy)
# - Train/Test split for credible reporting
# - Baseline comparison (bütçe-dışı ve bütçe-içi) + final test evaluation
# - Top-10'u uzun eğitimle testte yeniden değerlendirip şampiyonu seçer
# Colab-ready

# Install DEAP if needed
try:
    import deap
except ImportError:
    import sys
    !{sys.executable} -m pip -q install deap

import random
import time
import numpy as np
import pandas as pd

from deap import base, creator, tools, algorithms
from sklearn.datasets import fetch_openml
from sklearn.model_selection import StratifiedKFold, StratifiedShuffleSplit, train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import warnings
warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", category=FutureWarning)

# ===============================
# Reproducibility
# ===============================
RANDOM_SEED = 42
random.seed(RANDOM_SEED)
np.random.seed(RANDOM_SEED)

# ===============================
# Global Settings (MNIST)
# ===============================
ALPHA = 0.8  # scalar report weighting (only for reporting, EA is multi-objective)
BETA = 0.2

# Search space (kept moderate for speed; can widen later)
LAYER_MIN = 1
LAYER_MAX = 4
NEURON_MIN = 8
NEURON_MAX = 64

ACTIVATIONS = ["relu", "tanh", "logistic"]
ALPHAS = [1e-5, 1e-4, 1e-3, 1e-2]
LRS = [0.0005, 0.001, 0.003, 0.01, 0.03]
BATCH_SIZES = [32, 64, 128, 256]
BITWIDTHS = [8, 16, 32]  # quantization simulation

# Resource budget (for 32-bit baseline; cost normalized with bitwidth)
MAX_PARAMETRE = 50_000

# CV and latency (speed-tuned)
CV_FOLDS = 2
LAT_REPEATS = 2
WARMUP = 1
ETA_LAT = 0.7  # latency proxy exponent: (bit/32)^ETA_LAT

# EA (NSGA-II) params
POP_SIZE = 30
NGEN = 10
CXPB = 0.6
CX_INDPB = 0.5
MUTPB = 0.7
MUT_INDPB = 0.3

# Mutation biases
LAYER_MUTPB = 0.2
REDUCE_DELTAS = [-8, -6, -4, -2, 0, +2, +4, +6, +8]
REDUCE_WEIGHTS = [0.20, 0.16, 0.14, 0.12, 0.08, 0.10, 0.08, 0.07, 0.05]

# Subset size
SUBSET_SIZE = 10_000

# ===============================
# Data (MNIST) + 10k subset + Train/Test split
# ===============================
print("MNIST indiriliyor (OpenML)...")
mn = fetch_openml('mnist_784', version=1, as_frame=False)
X_full = mn.data.astype(np.float32)
y_full = mn.target.astype(str).astype(np.int32)

sss = StratifiedShuffleSplit(n_splits=1, train_size=SUBSET_SIZE, random_state=RANDOM_SEED)
idx_train, _ = next(sss.split(X_full, y_full))
X_subset = X_full[idx_train]
y_subset = y_full[idx_train]

INPUT_DIM = X_subset.shape[1]  # 784
OUTPUT_DIM = len(np.unique(y_subset))  # 10
print(f"Subset hazır: X.shape={X_subset.shape}, sınıf sayısı={OUTPUT_DIM}")

# Train/Test split (EA trains on train only; final report uses test)
X_train, X_test, y_train, y_test = train_test_split(
    X_subset, y_subset, test_size=0.2, stratify=y_subset, random_state=RANDOM_SEED
)

# ===============================
# DEAP Creator
# ===============================
def safe_creator():
    if not hasattr(creator, "RA_FitnessMulti"):
        creator.create("RA_FitnessMulti", base.Fitness, weights=(1.0, -1.0, -1.0))  # max acc, min cost, min latency
    if not hasattr(creator, "RA_Individual"):
        creator.create("RA_Individual", list, fitness=creator.RA_FitnessMulti)
safe_creator()

toolbox = base.Toolbox()

# ===============================
# Individual init
# ===============================
def init_individual():
    layer_count = random.randint(LAYER_MIN, LAYER_MAX)
    neurons = [random.randint(NEURON_MIN, NEURON_MAX) for _ in range(LAYER_MAX)]
    act_idx = random.randrange(len(ACTIVATIONS))
    alpha_idx = random.randrange(len(ALPHAS))
    lr_idx = random.randrange(len(LRS))
    batch_idx = random.randrange(len(BATCH_SIZES))
    bit_idx = random.randrange(len(BITWIDTHS))
    return [layer_count] + neurons + [act_idx, alpha_idx, lr_idx, batch_idx, bit_idx]

toolbox.register("individual", tools.initIterate, creator.RA_Individual, init_individual)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)

# ===============================
# Helpers
# ===============================
def clip_genes(ind):
    ind[0] = int(np.clip(ind[0], LAYER_MIN, LAYER_MAX))
    for i in range(1, 1 + LAYER_MAX):
        ind[i] = int(np.clip(ind[i], NEURON_MIN, NEURON_MAX))
    base_idx = 1 + LAYER_MAX
    ind[base_idx + 0] = int(np.clip(ind[base_idx + 0], 0, len(ACTIVATIONS) - 1))
    ind[base_idx + 1] = int(np.clip(ind[base_idx + 1], 0, len(ALPHAS) - 1))
    ind[base_idx + 2] = int(np.clip(ind[base_idx + 2], 0, len(LRS) - 1))
    ind[base_idx + 3] = int(np.clip(ind[base_idx + 3], 0, len(BATCH_SIZES) - 1))
    ind[base_idx + 4] = int(np.clip(ind[base_idx + 4], 0, len(BITWIDTHS) - 1))

def decode(ind):
    clip_genes(ind)
    layer_count = ind[0]
    hidden = tuple(int(v) for v in ind[1:1+layer_count])
    base_idx = 1 + LAYER_MAX
    act = ACTIVATIONS[ind[base_idx + 0]]
    alpha = ALPHAS[ind[base_idx + 1]]
    lr = LRS[ind[base_idx + 2]]
    batch_size = BATCH_SIZES[ind[base_idx + 3]]
    bitwidth = BITWIDTHS[ind[base_idx + 4]]
    return hidden, act, alpha, lr, batch_size, bitwidth

def count_parameters(input_dim, hidden_layer_sizes, output_dim):
    dims = [input_dim] + list(hidden_layer_sizes) + [output_dim]
    total = 0
    for i in range(1, len(dims)):
        prev_dim = dims[i-1]
        curr_dim = dims[i]
        total += (prev_dim + 1) * curr_dim  # bias dahil
    return total

def make_model(hidden, act, alpha, lr, batch_size, random_state=RANDOM_SEED):
    clf = Pipeline([
        ("scaler", StandardScaler()),
        ("mlp", MLPClassifier(
            hidden_layer_sizes=hidden,
            activation=act,
            solver="adam",
            alpha=alpha,
            learning_rate_init=lr,
            batch_size=batch_size,
            max_iter=30,              # fast scan for EA
            early_stopping=True,
            n_iter_no_change=5,
            random_state=random_state,
            tol=1e-4,
        ))
    ])
    return clf

def effective_param_norm(P, bitwidth):
    P_eff = P * (bitwidth / 32.0)
    return P_eff / MAX_PARAMETRE

def model_size_kb(P, bitwidth):
    return (P * (bitwidth/8)) / 1024.0

# Pretty table printer
def print_table(headers, rows):
    colw = []
    for i, h in enumerate(headers):
        candidates = [len(str(h))] + [len(str(r[i])) for r in rows]
        colw.append(max(candidates) + 2)
    def _print(row):
        print("".join(str(val).ljust(colw[i]) for i, val in enumerate(row)))
    _print(headers)
    print("-" * sum(colw))
    for r in rows:
        _print(r)

# ===============================
# Resource-aware repair
# ===============================
def resource_repair(ind):
    hidden, act, alpha, lr, batch_size, bitwidth = decode(ind)
    hs = list(hidden)
    iter_guard = 0
    while effective_param_norm(count_parameters(INPUT_DIM, hs, OUTPUT_DIM), bitwidth) > 1.0 and iter_guard < 200:
        if not hs:
            break
        idx = int(np.argmax(hs))
        if hs[idx] > NEURON_MIN:
            hs[idx] = max(NEURON_MIN, hs[idx] - 2)
        else:
            candidates = [i for i,v in enumerate(hs) if v > NEURON_MIN]
            if not candidates:
                break
            c = random.choice(candidates)
            hs[c] = max(NEURON_MIN, hs[c] - 2)
        iter_guard += 1
    for i,v in enumerate(hs):
        ind[1+i] = int(v)
    clip_genes(ind)
    return ind

# ===============================
# Mutation and crossover
# ===============================
def mutate(ind, indpb=MUT_INDPB):
    if random.random() < LAYER_MUTPB:
        delta = random.choices([-1, 0, +1], weights=[0.5, 0.3, 0.2], k=1)[0]
        ind[0] = int(np.clip(ind[0] + delta, LAYER_MIN, LAYER_MAX))
    for i in range(1, 1 + LAYER_MAX):
        if random.random() < indpb:
            delta = random.choices(REDUCE_DELTAS, weights=REDUCE_WEIGHTS, k=1)[0]
            ind[i] = int(np.clip(ind[i] + delta, NEURON_MIN, NEURON_MAX))
    base_idx = 1 + LAYER_MAX
    if random.random() < indpb:
        options = list(range(len(ACTIVATIONS)))
        if ind[base_idx + 0] in options:
            options.remove(ind[base_idx + 0])
        ind[base_idx + 0] = random.choice(options) if options else ind[base_idx + 0]
    for j, sizes in enumerate([ALPHAS, LRS, BATCH_SIZES, BITWIDTHS]):
        idx = base_idx + 1 + j
        if random.random() < indpb:
            if random.random() < 0.7:
                step = random.choice([-1, +1])
                ind[idx] = int(np.clip(ind[idx] + step, 0, len(sizes)-1))
            else:
                ind[idx] = random.randrange(len(sizes))
    clip_genes(ind)
    resource_repair(ind)
    return (ind,)

def mate(p1, p2, indpb=CX_INDPB):
    tools.cxUniform(p1, p2, indpb=indpb)
    clip_genes(p1); clip_genes(p2)
    resource_repair(p1); resource_repair(p2)
    return p1, p2

toolbox.register("mutate", mutate, indpb=MUT_INDPB)
toolbox.register("mate", mate, indpb=CX_INDPB)

# ===============================
# Evaluation (CV on TRAIN split) + latency proxy
# ===============================
eval_cache = {}
EVAL_COUNTER = {"count": 0}

def evaluate(ind):
    clip_genes(ind)
    resource_repair(ind)
    key = tuple(ind)
    if key in eval_cache:
        return eval_cache[key]

    hidden, act, alpha, lr, batch_size, bitwidth = decode(ind)
    P = count_parameters(INPUT_DIM, hidden, OUTPUT_DIM)
    cost_norm = effective_param_norm(P, bitwidth)
    if cost_norm > 1.0 or len(hidden) < 1:
        res = (0.0, cost_norm, 1e6)
        eval_cache[key] = res
        return res

    cv = StratifiedKFold(n_splits=CV_FOLDS, shuffle=True, random_state=RANDOM_SEED)
    accs, latencies = [], []

    for train_idx, val_idx in cv.split(X_train, y_train):
        X_tr, X_val = X_train[train_idx], X_train[val_idx]
        y_tr, y_val = y_train[train_idx], y_train[val_idx]

        model = make_model(hidden, act, alpha, lr, batch_size, random_state=RANDOM_SEED)
        model.fit(X_tr, y_tr)

        y_pred = model.predict(X_val)
        accs.append(accuracy_score(y_val, y_pred))

        for _ in range(WARMUP):
            model.predict(X_val)
        t0 = time.perf_counter()
        for _ in range(LAT_REPEATS):
            model.predict(X_val)
        t1 = time.perf_counter()
        total_preds = LAT_REPEATS * len(X_val)
        per_sample_s = (t1 - t0) / max(total_preds, 1)
        latencies.append(per_sample_s * 1000.0)

    acc_mean = float(np.mean(accs))
    lat_ms_raw = float(np.mean(latencies))
    lat_ms_eff = lat_ms_raw * (bitwidth / 32.0) ** ETA_LAT

    res = (acc_mean, cost_norm, lat_ms_eff)
    eval_cache[key] = res
    EVAL_COUNTER["count"] += 1
    return res

toolbox.register("evaluate", evaluate)

# ===============================
# NSGA-II evolution
# ===============================
def run_evolution():
    pop = toolbox.population(n=POP_SIZE)
    for ind in pop:
        ind.fitness.values = toolbox.evaluate(ind)
    pop = tools.selNSGA2(pop, POP_SIZE)

    pareto = tools.ParetoFront()
    pareto.update(pop)

    for gen in range(1, NGEN + 1):
        offspring = algorithms.varAnd(pop, toolbox, cxpb=CXPB, mutpb=MUTPB)
        invalid = [ind for ind in offspring if not ind.fitness.valid]
        for ind in invalid:
            ind.fitness.values = toolbox.evaluate(ind)
        pop = tools.selNSGA2(pop + offspring, POP_SIZE)
        pareto.update(pop)

        fits = np.array([ind.fitness.values for ind in pop])
        accs = fits[:,0]; costs = fits[:,1]; lats = fits[:,2]
        print(f"Gen {gen:02d} | acc_avg={np.mean(accs):.4f} acc_max={np.max(accs):.4f} "
              f"cost_min={np.min(costs):.4f} lat_min_ms={np.min(lats):.4f} "
              f"| evals={EVAL_COUNTER['count']} cache={len(eval_cache)}")
    return pop, pareto

# ===============================
# Run
# ===============================
pop, pareto = run_evolution()

# ===============================
# Reporting helpers
# ===============================
def scalar_score(fit_vals):
    acc, cost_norm, _lat = fit_vals
    return (ALPHA * acc) - (BETA * cost_norm)

def decode_full(ind):
    hidden, act, alpha, lr, batch_size, bitwidth = decode(ind)
    P = count_parameters(INPUT_DIM, hidden, OUTPUT_DIM)
    acc, cost_norm, lat_ms = ind.fitness.values
    return {
        "hidden": hidden, "act": act, "alpha": alpha, "lr": lr, "batch": batch_size, "bit": bitwidth,
        "P": P, "acc": acc, "cost_norm": cost_norm, "lat_ms": lat_ms, "scalar_fit": scalar_score(ind.fitness.values)
    }

pareto_sorted = sorted(pareto, key=lambda ind: (-ind.fitness.values[0], ind.fitness.values[1], ind.fitness.values[2]))
top_k = pareto_sorted[:10]
print("\n=== Pareto Öncü Çözümler (ilk 10; CV=TRAIN) ===")
for i, ind in enumerate(top_k, 1):
    s = decode_full(ind)
    print(f"#{i}: acc_cv={s['acc']:.4f}, cost_norm={s['cost_norm']:.4f}, lat_ms_cv={s['lat_ms']:.4f}, "
          f"hidden={s['hidden']}, act={s['act']}, alpha={s['alpha']}, lr={s['lr']}, "
          f"batch={s['batch']}, bit={s['bit']}, P={s['P']}, scalar_fit={s['scalar_fit']:.4f}")

# Best by scalar score (for report)
ea_best = max(pareto, key=lambda ind: scalar_score(ind.fitness.values))
ea_best_info = decode_full(ea_best)

# ===============================
# Baseline CV (on TRAIN) + Test evaluation
# ===============================
def eval_baseline_cv(hidden=(64,64), act="relu", alpha=1e-4, lr=0.01, batch=128, bitwidth=32):
    P = count_parameters(INPUT_DIM, hidden, OUTPUT_DIM)
    cost_norm = effective_param_norm(P, bitwidth)

    cv = StratifiedKFold(n_splits=CV_FOLDS, shuffle=True, random_state=RANDOM_SEED)
    accs, latencies = [], []

    for train_idx, val_idx in cv.split(X_train, y_train):
        X_tr, X_val = X_train[train_idx], X_train[val_idx]
        y_tr, y_val = y_train[train_idx], y_train[val_idx]

        model = Pipeline([
            ("scaler", StandardScaler()),
            ("mlp", MLPClassifier(
                hidden_layer_sizes=hidden,
                activation=act,
                solver="adam",
                alpha=alpha,
                learning_rate_init=lr,
                batch_size=batch,
                max_iter=30,
                early_stopping=True,
                n_iter_no_change=5,
                random_state=RANDOM_SEED,
                tol=1e-4,
            ))
        ])
        model.fit(X_tr, y_tr)
        y_pred = model.predict(X_val)
        accs.append(accuracy_score(y_val, y_pred))

        for _ in range(WARMUP):
            model.predict(X_val)
        t0 = time.perf_counter()
        for _ in range(LAT_REPEATS):
            model.predict(X_val)
        t1 = time.perf_counter()
        total_preds = LAT_REPEATS * len(X_val)
        per_sample_s = (t1 - t0) / max(total_preds, 1)
        latencies.append(per_sample_s * 1000.0)

    acc_mean = float(np.mean(accs))
    lat_ms_raw = float(np.mean(latencies))
    lat_ms_eff = lat_ms_raw * (bitwidth / 32.0) ** ETA_LAT
    scalar = (ALPHA * acc_mean) - (BETA * cost_norm)

    return {
        "hidden": hidden, "act": act, "alpha": alpha, "lr": lr, "batch": batch, "bit": bitwidth,
        "P": P, "acc_cv": acc_mean, "cost_norm": cost_norm, "lat_ms_cv": lat_ms_eff, "scalar_fit": scalar,
        "violates_budget": cost_norm > 1.0
    }

# Evaluate baselines (CV on TRAIN)
baseline_cv = eval_baseline_cv(hidden=(64,64), act="relu", alpha=1e-4, lr=0.01, batch=128, bitwidth=32)  # bütçe-dışı
# Bütçe-İçi 32-bit Baseline: hidden=(60,)
bl_in_cv = eval_baseline_cv(hidden=(60,), act="relu", alpha=1e-4, lr=0.01, batch=128, bitwidth=32)       # bütçe-içi

# Final test evaluation helper
def eval_on_test(hidden, act, alpha, lr, batch, bit, max_iter=100):
    model = make_model(hidden, act, alpha, lr, batch, random_state=RANDOM_SEED)
    model.set_params(mlp__max_iter=max_iter, mlp__early_stopping=False, mlp__n_iter_no_change=10)
    model.fit(X_train, y_train)

    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)

    for _ in range(WARMUP):
        model.predict(X_test)
    t0 = time.perf_counter()
    for _ in range(LAT_REPEATS):
        model.predict(X_test)
    t1 = time.perf_counter()
    per_sample_ms = ((t1 - t0) / max(LAT_REPEATS * len(X_test), 1)) * 1000.0
    lat_eff = per_sample_ms * (bit / 32.0) ** ETA_LAT
    return acc, lat_eff, y_pred

# Test eval: EA-Best and Baselines
ea_acc_test, ea_lat_test, ea_y_pred = eval_on_test(
    ea_best_info['hidden'], ea_best_info['act'], ea_best_info['alpha'],
    ea_best_info['lr'], ea_best_info['batch'], ea_best_info['bit'], max_iter=120
)
bl_acc_test, bl_lat_test, bl_y_pred = eval_on_test(
    baseline_cv['hidden'], baseline_cv['act'], baseline_cv['alpha'],
    baseline_cv['lr'], baseline_cv['batch'], baseline_cv['bit'], max_iter=120
)
bl_in_acc_test, bl_in_lat_test, _ = eval_on_test(
    bl_in_cv['hidden'], bl_in_cv['act'], bl_in_cv['alpha'],
    bl_in_cv['lr'], bl_in_cv['batch'], bl_in_cv['bit'], max_iter=120
)

# ===============================
# Comparison tables (CV and Test)
# ===============================
print("\n=== EA-Best vs Baseline (CV=TRAIN split) ===")
rows_cv = [
    ["EA-Best", f"{ea_best_info['acc']:.4f}", f"{ea_best_info['P']}",
     f"{ea_best_info['cost_norm']:.4f}", f"{ea_best_info['bit']}",
     f"{ea_best_info['lat_ms']:.4f}", f"{ea_best_info['scalar_fit']:.4f}", "Hayır"],
    ["Baseline (64,64) - Bütçe Dışı", f"{baseline_cv['acc_cv']:.4f}", f"{baseline_cv['P']}",
     f"{baseline_cv['cost_norm']:.4f}", f"{baseline_cv['bit']}",
     f"{baseline_cv['lat_ms_cv']:.4f}", f"{baseline_cv['scalar_fit']:.4f}",
     "Evet" if baseline_cv['violates_budget'] else "Hayır"],
    ["Baseline (60,) - Bütçe İçi", f"{bl_in_cv['acc_cv']:.4f}", f"{bl_in_cv['P']}",
     f"{bl_in_cv['cost_norm']:.4f}", f"{bl_in_cv['bit']}",
     f"{bl_in_cv['lat_ms_cv']:.4f}", f"{bl_in_cv['scalar_fit']:.4f}",
     "Evet" if bl_in_cv['violates_budget'] else "Hayır"],
]
print_table(["Model","Acc(CV)","Param(P)","Cost_norm","Bit","Latency(ms/sample)","Scalar","Bütçe İhlali?"], rows_cv)

print("\n=== Test Set Karşılaştırması (final training) — Adil Dövüş ===")
rows_test3 = [
    ["Baseline (Bütçe Dışı, 64,64)", f"{bl_acc_test:.4f}", f"{bl_lat_test:.4f}",
     f"{baseline_cv['P']}", f"{baseline_cv['bit']}", f"{model_size_kb(baseline_cv['P'], baseline_cv['bit']):.2f}",
     "Evet" if baseline_cv['cost_norm'] > 1.0 else "Hayır"],
    ["Baseline (Bütçe-İçi, 60,)", f"{bl_in_acc_test:.4f}", f"{bl_in_lat_test:.4f}",
     f"{bl_in_cv['P']}", f"{bl_in_cv['bit']}", f"{model_size_kb(bl_in_cv['P'], bl_in_cv['bit']):.2f}",
     "Evet" if bl_in_cv['cost_norm'] > 1.0 else "Hayır"],
    ["EA-Best", f"{ea_acc_test:.4f}", f"{ea_lat_test:.4f}",
     f"{ea_best_info['P']}", f"{ea_best_info['bit']}", f"{model_size_kb(ea_best_info['P'], ea_best_info['bit']):.2f}",
     "Hayır"]
]
print_table(["Model","Acc(Test)","Latency(ms/sample)","Param(P)","Bit","Model Boyutu(KB)","Bütçe İhlali?"], rows_test3)

ratio_mem_vs_budget_in = model_size_kb(bl_in_cv['P'], bl_in_cv['bit']) / model_size_kb(ea_best_info['P'], ea_best_info['bit'])
print(f"\nBellek verimliliği: EA-Best, bütçe-içi 32-bit baseline'a göre ~{ratio_mem_vs_budget_in:.1f}× daha küçük.")

print("\nEA-Best mimarisi:", ea_best_info['hidden'], "| act:", ea_best_info['act'],
      "| alpha:", ea_best_info['alpha'], "| lr:", ea_best_info['lr'],
      "| batch:", ea_best_info['batch'], "| bit:", ea_best_info['bit'])
print("Baseline(64,64) mimarisi:", baseline_cv['hidden'], "| act:", baseline_cv['act'],
      "| alpha:", baseline_cv['alpha'], "| lr:", baseline_cv['lr'],
      "| batch:", baseline_cv['batch'], "| bit:", baseline_cv['bit'])
print("Baseline(60,) mimarisi:", bl_in_cv['hidden'], "| act:", bl_in_cv['act'],
      "| alpha:", bl_in_cv['alpha'], "| lr:", bl_in_cv['lr'],
      "| batch:", bl_in_cv['batch'], "| bit:", bl_in_cv['bit'])

# Optional: Confusion matrices on test (EA-Best ve 64,64 baseline)
print("\n--- EA-Best Test Confusion Matrix ---")
print(confusion_matrix(y_test, ea_y_pred))
print("\nClassification Report (EA-Best):\n", classification_report(y_test, ea_y_pred, digits=4))

print("\n--- Baseline(64,64) Test Confusion Matrix ---")
print(confusion_matrix(y_test, bl_y_pred))
print("\nClassification Report (Baseline 64,64):\n", classification_report(y_test, bl_y_pred, digits=4))

# ===============================
# Step 2: Şampiyonun Gerçek Gücü — Top-10'u 120 iter ile testte yeniden değerlendir
# ===============================
top_final = []
for ind in top_k:
    info = decode_full(ind)
    acc_t, lat_t, _ = eval_on_test(info['hidden'], info['act'], info['alpha'],
                                   info['lr'], info['batch'], info['bit'], max_iter=120)
    top_final.append({
        "hidden": info['hidden'], "bit": info['bit'], "P": info['P'],
        "acc_cv": round(info['acc'], 4), "acc_test": round(acc_t, 4),
        "lat_ms_test": round(lat_t, 4), "cost_norm": round(info['cost_norm'], 4),
        "alpha": info['alpha'], "lr": info['lr'], "batch": info['batch'], "act": info['act'],
        "size_kb": round(model_size_kb(info['P'], info['bit']), 2)
    })

df_top = pd.DataFrame(top_final).sort_values(by=["acc_test","cost_norm"], ascending=[False,True])
print("\n=== Şampiyonluk Turu: Top-10 Final Test Özeti ===")
print(df_top.to_string(index=False))
df_top.to_csv("mnist_top10_final.csv", index=False)

# Şampiyonu seç (test doğruluğuna göre)
champ = df_top.iloc[0].to_dict()
print("\n=== Şampiyon (Test'e göre en iyi) ===")
print(f"acc_test={champ['acc_test']}, cost_norm={champ['cost_norm']}, lat_ms_test={champ['lat_ms_test']}, "
      f"P={champ['P']}, bit={champ['bit']}, size_kb={champ['size_kb']}, hidden={champ['hidden']}, "
      f"act={champ['act']}, alpha={champ['alpha']}, lr={champ['lr']}, batch={champ['batch']}")

# Bellek oranları
mem_ratio_vs_bl_out = model_size_kb(baseline_cv['P'], baseline_cv['bit']) / champ['size_kb']
mem_ratio_vs_bl_in  = model_size_kb(bl_in_cv['P'], bl_in_cv['bit']) / champ['size_kb']
print(f"\nBellek oranı: Şampiyon, bütçe-dışı baseline’a göre ~{mem_ratio_vs_bl_out:.1f}×; "
      f"bütçe-içi baseline’a göre ~{mem_ratio_vs_bl_in:.1f}× daha küçük.")

# --- Şampiyonu tekrar değerlendir (y_pred ile) ve nihai adil dövüş tablosu ---
ch_hidden = champ['hidden']; ch_act = champ['act']; ch_alpha = champ['alpha']
ch_lr = champ['lr']; ch_batch = champ['batch']; ch_bit = int(champ['bit'])
ch_acc, ch_lat, ch_pred = eval_on_test(ch_hidden, ch_act, ch_alpha, ch_lr, ch_batch, ch_bit, max_iter=120)

print("\n--- EA-Champion Test Confusion Matrix ---")
print(confusion_matrix(y_test, ch_pred))
print("\nClassification Report (EA-Champion):\n", classification_report(y_test, ch_pred, digits=4))

rows_test_champ = [
    ["Baseline (Bütçe Dışı, 64,64)", f"{bl_acc_test:.4f}", f"{bl_lat_test:.4f}",
     f"{baseline_cv['P']}", f"{baseline_cv['bit']}", f"{model_size_kb(baseline_cv['P'], baseline_cv['bit']):.2f}",
     "Evet" if baseline_cv['cost_norm'] > 1.0 else "Hayır"],
    ["Baseline (Bütçe-İçi, 60,)", f"{bl_in_acc_test:.4f}", f"{bl_in_lat_test:.4f}",
     f"{bl_in_cv['P']}", f"{bl_in_cv['bit']}", f"{model_size_kb(bl_in_cv['P'], bl_in_cv['bit']):.2f}",
     "Evet" if bl_in_cv['cost_norm'] > 1.0 else "Hayır"],
    ["EA-Champion", f"{ch_acc:.4f}", f"{ch_lat:.4f}",
     f"{champ['P']}", f"{ch_bit}", f"{champ['size_kb']:.2f}", "Hayır"]
]
print("\n=== Nihai Adil Dövüş (Şampiyon ile) ===")
print_table(["Model","Acc(Test)","Latency(ms/sample)","Param(P)","Bit","Model Boyutu(KB)","Bütçe İhlali?"], rows_test_champ)

print("\nNotlar:")
print("- EA, TRAIN split üzerinde CV ile optimize edilir; final performans TEST split üzerinde raporlanır.")
print("- Latency, scikit-learn MLP için gerçek quantization etkisini modellemediğinden proxy ile hesaplanır.")
print("- Daha yüksek doğruluk için arama uzayını genişletebilirsiniz (NEURON_MAX=128, LAYER_MAX=5, MAX_PARAMETRE=100_000) ve NGEN/POP_SIZE’i artırabilirsiniz.")

MNIST indiriliyor (OpenML)...
Subset hazır: X.shape=(10000, 784), sınıf sayısı=10
Gen 01 | acc_avg=0.8733 acc_max=0.9238 cost_min=0.0715 lat_min_ms=0.0030 | evals=58 cache=58
Gen 02 | acc_avg=0.9001 acc_max=0.9238 cost_min=0.0388 lat_min_ms=0.0027 | evals=82 cache=82
Gen 03 | acc_avg=0.9032 acc_max=0.9238 cost_min=0.0388 lat_min_ms=0.0026 | evals=108 cache=108
Gen 04 | acc_avg=0.9026 acc_max=0.9238 cost_min=0.0358 lat_min_ms=0.0026 | evals=128 cache=128
Gen 05 | acc_avg=0.9049 acc_max=0.9292 cost_min=0.0319 lat_min_ms=0.0026 | evals=155 cache=155
Gen 06 | acc_avg=0.9095 acc_max=0.9292 cost_min=0.0319 lat_min_ms=0.0025 | evals=180 cache=180
Gen 07 | acc_avg=0.9065 acc_max=0.9292 cost_min=0.0319 lat_min_ms=0.0025 | evals=204 cache=204
Gen 08 | acc_avg=0.9036 acc_max=0.9292 cost_min=0.0319 lat_min_ms=0.0025 | evals=228 cache=228
Gen 09 | acc_avg=0.9053 acc_max=0.9292 cost_min=0.0319 lat_min_ms=0.0025 | evals=255 cache=255
Gen 10 | acc_avg=0.9044 acc_max=0.9292 cost_min=0.0319 lat_min_ms=0