# Meta-Heuristic Feature Selection for Compressed BERT Embeddings
This notebook applies Genetic Algorithm (GA), Jaya, and Rabbit Optimization Algorithm (ROA) to select optimal features from CBAM-compressed BERT embeddings.

In [None]:
import numpy as np
import pickle
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import f1_score, matthews_corrcoef
from sklearn.model_selection import train_test_split
import random


In [None]:
# Load compressed features
with open("cbam_compressed_features.pkl", "rb") as f:
    data = pickle.load(f)
    X = data['compressed_features']
    y = data['y_train']

In [None]:
# Fitness evaluation: train a GBM and return average F1 score on hold-out split
def evaluate_fitness(X, y, feature_mask):
    selected = X[:, feature_mask == 1]
    if selected.shape[1] == 0:
        return 0
    clf = GradientBoostingClassifier()
    X_train, X_val, y_train, y_val = train_test_split(selected, y, test_size=0.3, random_state=42)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_val)
    return f1_score(y_val, y_pred)

In [None]:
# Genetic Algorithm
def genetic_algorithm(X, y, pop_size=20, generations=20):
    dim = X.shape[1]
    population = np.random.randint(0, 2, (pop_size, dim))
    for gen in range(generations):
        fitness = [evaluate_fitness(X, y, ind) for ind in population]
        sorted_idx = np.argsort(fitness)[::-1]
        population = population[sorted_idx]
        new_pop = population[:2]  # elitism
        while len(new_pop) < pop_size:
            p1, p2 = population[np.random.randint(0, 10, 2)]
            cross = np.random.randint(1, dim-1)
            child = np.concatenate([p1[:cross], p2[cross:]])
            if np.random.rand() < 0.1:
                child[np.random.randint(0, dim)] ^= 1
            new_pop.append(child)
        population = np.array(new_pop)
    best = population[0]
    best_f1 = evaluate_fitness(X, y, best)
    return best, best_f1

In [None]:
# Run GA
best_features_ga, best_score_ga = genetic_algorithm(X, y)
print(f"GA - Best F1 Score: {best_score_ga:.4f}, Selected Features: {np.sum(best_features_ga)}")

> 📝 The Jaya and ROA algorithms can be implemented similarly. Add them here if needed or use external packages like `mealpy`.

In [None]:
# Save selected feature mask
with open("selected_features_ga.pkl", "wb") as f:
    pickle.dump({"mask": best_features_ga}, f)