# Nature-Inspired Computation - Phase 1
## Text Classification with Metaheuristic Optimization

**Date:** November 2025

---

## Project Overview

This project implements Phase 1:
1. **Dataset:** AG News (120,000 samples)
2. **Baseline:** LSTM Text Classifier
3. **Feature Selection:** Ant Colony Optimization
4. **Hyperparameter Optimization:** 6 Metaheuristic Algorithms

## 1. Environment Setup

In [1]:
# Install packages
!pip install torch numpy pandas matplotlib seaborn scikit-learn datasets transformers -q

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/75.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m75.1/75.1 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/511.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m511.6/511.6 kB[0m [31m22.0 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/119.7 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m119.7/119.7 kB[0m [31m12.8 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/150.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m150.3/150.3 kB[0m [31m15.4 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.feature_extraction.text import TfidfVectorizer
import time
import random
import re
from collections import Counter
import warnings
warnings.filterwarnings('ignore')

SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Device: {device}")

Device: cpu


## 2. Load Dataset

In [3]:
from datasets import load_dataset

dataset = load_dataset("ag_news")
train_data = dataset['train']
test_data = dataset['test']

USE_SUBSET = True
SUBSET_SIZE = 20000

if USE_SUBSET:
    indices = np.random.choice(len(train_data), SUBSET_SIZE, replace=False)
    train_texts = [train_data[int(i)]['text'] for i in indices]
    train_labels = [train_data[int(i)]['label'] for i in indices]
else:
    train_texts = train_data['text']
    train_labels = train_data['label']

test_texts = test_data['text']
test_labels = test_data['label']

print(f"Train: {len(train_texts)}, Test: {len(test_texts)}")

README.md: 0.00B [00:00, ?B/s]

data/train-00000-of-00001.parquet:   0%|          | 0.00/18.6M [00:00<?, ?B/s]

data/test-00000-of-00001.parquet:   0%|          | 0.00/1.23M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/120000 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/7600 [00:00<?, ? examples/s]

Train: 20000, Test: 7600


## 3. Preprocessing

In [4]:
def preprocess_text(text):
    text = text.lower()
    text = re.sub(r'[^a-zA-Z\s]', '', text)
    text = ' '.join(text.split())
    return text

train_texts = [preprocess_text(t) for t in train_texts]
test_texts = [preprocess_text(t) for t in test_texts]

MAX_VOCAB_SIZE = 10000
MAX_SEQ_LENGTH = 100

word_counts = Counter()
for text in train_texts:
    word_counts.update(text.split())

most_common = word_counts.most_common(MAX_VOCAB_SIZE - 2)
vocab = {word: idx + 2 for idx, (word, _) in enumerate(most_common)}
vocab['<PAD>'] = 0
vocab['<UNK>'] = 1

print(f"Vocab size: {len(vocab)}")

Vocab size: 10000


In [5]:
def text_to_sequence(text, vocab, max_len):
    words = text.split()
    sequence = [vocab.get(word, vocab['<UNK>']) for word in words]
    if len(sequence) < max_len:
        sequence += [vocab['<PAD>']] * (max_len - len(sequence))
    else:
        sequence = sequence[:max_len]
    return sequence

X_train = np.array([text_to_sequence(t, vocab, MAX_SEQ_LENGTH) for t in train_texts])
X_test = np.array([text_to_sequence(t, vocab, MAX_SEQ_LENGTH) for t in test_texts])
y_train = np.array(train_labels)
y_test = np.array(test_labels)

X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.15, random_state=SEED, stratify=y_train)

print(f"Shapes: Train {X_train.shape}, Val {X_val.shape}, Test {X_test.shape}")

Shapes: Train (17000, 100), Val (3000, 100), Test (7600, 100)


## 4. LSTM Model

In [6]:
class LSTMClassifier(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim, n_layers=2, dropout=0.3):
        super(LSTMClassifier, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=0)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, n_layers, batch_first=True, dropout=dropout if n_layers > 1 else 0)
        self.dropout = nn.Dropout(dropout)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        embedded = self.embedding(x)
        lstm_out, (hidden, cell) = self.lstm(embedded)
        hidden = self.dropout(hidden[-1])
        output = self.fc(hidden)
        return output

VOCAB_SIZE = len(vocab)
EMBEDDING_DIM = 128
HIDDEN_DIM = 256
OUTPUT_DIM = 4
BATCH_SIZE = 64

train_dataset = TensorDataset(torch.LongTensor(X_train), torch.LongTensor(y_train))
val_dataset = TensorDataset(torch.LongTensor(X_val), torch.LongTensor(y_val))
test_dataset = TensorDataset(torch.LongTensor(X_test), torch.LongTensor(y_test))

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE)

print("Model defined")

Model defined


In [7]:
def train_model(model, train_loader, val_loader, criterion, optimizer, epochs=10):
    history = {'train_loss': [], 'train_acc': [], 'val_loss': [], 'val_acc': []}

    for epoch in range(epochs):
        model.train()
        train_loss = 0
        train_correct = 0
        train_total = 0

        for batch_x, batch_y in train_loader:
            batch_x, batch_y = batch_x.to(device), batch_y.to(device)
            optimizer.zero_grad()
            outputs = model(batch_x)
            loss = criterion(outputs, batch_y)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            train_total += batch_y.size(0)
            train_correct += (predicted == batch_y).sum().item()

        model.eval()
        val_loss = 0
        val_correct = 0
        val_total = 0

        with torch.no_grad():
            for batch_x, batch_y in val_loader:
                batch_x, batch_y = batch_x.to(device), batch_y.to(device)
                outputs = model(batch_x)
                loss = criterion(outputs, batch_y)
                val_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                val_total += batch_y.size(0)
                val_correct += (predicted == batch_y).sum().item()

        train_loss /= len(train_loader)
        train_acc = 100 * train_correct / train_total
        val_loss /= len(val_loader)
        val_acc = 100 * val_correct / val_total

        history['train_loss'].append(train_loss)
        history['train_acc'].append(train_acc)
        history['val_loss'].append(val_loss)
        history['val_acc'].append(val_acc)

        print(f"Epoch {epoch+1}/{epochs}: Train Loss: {train_loss:.4f}, Acc: {train_acc:.2f}% | Val Loss: {val_loss:.4f}, Acc: {val_acc:.2f}%")

    return history

def evaluate_model(model, test_loader):
    model.eval()
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for batch_x, batch_y in test_loader:
            batch_x, batch_y = batch_x.to(device), batch_y.to(device)
            outputs = model(batch_x)
            _, predicted = torch.max(outputs.data, 1)
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(batch_y.cpu().numpy())

    return accuracy_score(all_labels, all_preds), all_preds, all_labels

print("Training functions defined")

Training functions defined


## 5. Train Baseline

In [8]:
baseline_model = LSTMClassifier(VOCAB_SIZE, EMBEDDING_DIM, HIDDEN_DIM, OUTPUT_DIM).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(baseline_model.parameters(), lr=0.001)

start_time = time.time()
baseline_history = train_model(baseline_model, train_loader, val_loader, criterion, optimizer, epochs=5)
baseline_train_time = time.time() - start_time

baseline_acc, baseline_preds, _ = evaluate_model(baseline_model, test_loader)
print(f"\nBaseline Accuracy: {baseline_acc*100:.2f}%, Time: {baseline_train_time:.2f}s")

Epoch 1/5: Train Loss: 1.3905, Acc: 25.22% | Val Loss: 1.3857, Acc: 25.57%
Epoch 2/5: Train Loss: 1.3871, Acc: 25.40% | Val Loss: 1.3866, Acc: 25.53%
Epoch 3/5: Train Loss: 1.3850, Acc: 25.02% | Val Loss: 1.3858, Acc: 24.97%
Epoch 4/5: Train Loss: 1.3845, Acc: 25.18% | Val Loss: 1.3853, Acc: 24.90%
Epoch 5/5: Train Loss: 1.3840, Acc: 25.39% | Val Loss: 1.3857, Acc: 25.57%

Baseline Accuracy: 25.33%, Time: 475.79s


## 6. ACO Feature Selection

In [9]:
vectorizer = TfidfVectorizer(max_features=500, ngram_range=(1, 2))
X_train_tfidf = vectorizer.fit_transform([' '.join(map(str, seq)) for seq in X_train]).toarray()
X_val_tfidf = vectorizer.transform([' '.join(map(str, seq)) for seq in X_val]).toarray()
X_test_tfidf = vectorizer.transform([' '.join(map(str, seq)) for seq in X_test]).toarray()

print(f"TF-IDF shape: {X_train_tfidf.shape}")

TF-IDF shape: (17000, 500)


In [10]:
class AntColonyFeatureSelection:
    def __init__(self, n_ants=20, n_iterations=30, alpha=1.0, beta=2.0, evaporation_rate=0.5, q=1.0):
        self.n_ants = n_ants
        self.n_iterations = n_iterations
        self.alpha = alpha
        self.beta = beta
        self.evaporation_rate = evaporation_rate
        self.q = q

    def fit(self, X, y, n_features_to_select=50):
        n_features = X.shape[1]
        pheromones = np.ones(n_features)

        from sklearn.feature_selection import mutual_info_classif
        from sklearn.linear_model import LogisticRegression

        heuristic = mutual_info_classif(X, y, random_state=SEED)
        heuristic = (heuristic - heuristic.min()) / (heuristic.max() - heuristic.min() + 1e-10)

        best_features = None
        best_score = 0

        for iteration in range(self.n_iterations):
            all_features = []
            all_scores = []

            for ant in range(self.n_ants):
                probabilities = (pheromones ** self.alpha) * (heuristic ** self.beta)
                probabilities /= probabilities.sum()
                selected = np.random.choice(n_features, n_features_to_select, replace=False, p=probabilities)

                clf = LogisticRegression(max_iter=100, random_state=SEED)
                clf.fit(X[:, selected], y)
                score = clf.score(X[:, selected], y)

                all_features.append(selected)
                all_scores.append(score)

                if score > best_score:
                    best_score = score
                    best_features = selected.copy()

            pheromones *= (1 - self.evaporation_rate)
            for features, score in zip(all_features, all_scores):
                for feature in features:
                    pheromones[feature] += self.q * score

            if (iteration + 1) % 10 == 0:
                print(f"ACO Iter {iteration+1}: Score = {best_score:.4f}")

        self.selected_features_ = best_features
        self.feature_importance_ = pheromones
        return self

    def transform(self, X):
        return X[:, self.selected_features_]

aco = AntColonyFeatureSelection(n_ants=15, n_iterations=20)
aco.fit(X_train_tfidf, y_train, n_features_to_select=100)
print("ACO completed")

ACO Iter 10: Score = 0.7319
ACO Iter 20: Score = 0.7319
ACO completed


## 7. Metaheuristic Algorithms

In [11]:
param_bounds = {
    'learning_rate': (0.0001, 0.01),
    'hidden_dim': (128, 512),
    'dropout': (0.1, 0.5),
    'n_layers': (1, 3),
    'embedding_dim': (64, 256)
}

def fitness_function(params, epochs=3):
    lr, hidden_dim, dropout, n_layers, embedding_dim = params
    hidden_dim = int(hidden_dim)
    n_layers = int(n_layers)
    embedding_dim = int(embedding_dim)

    model = LSTMClassifier(VOCAB_SIZE, embedding_dim, hidden_dim, OUTPUT_DIM, n_layers, dropout).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)

    best_val_acc = 0
    for epoch in range(epochs):
        model.train()
        for batch_x, batch_y in train_loader:
            batch_x, batch_y = batch_x.to(device), batch_y.to(device)
            optimizer.zero_grad()
            outputs = model(batch_x)
            loss = criterion(outputs, batch_y)
            loss.backward()
            optimizer.step()

        model.eval()
        val_correct = 0
        val_total = 0
        with torch.no_grad():
            for batch_x, batch_y in val_loader:
                batch_x, batch_y = batch_x.to(device), batch_y.to(device)
                outputs = model(batch_x)
                _, predicted = torch.max(outputs.data, 1)
                val_total += batch_y.size(0)
                val_correct += (predicted == batch_y).sum().item()

        val_acc = val_correct / val_total
        if val_acc > best_val_acc:
            best_val_acc = val_acc

    return best_val_acc

print("Fitness function ready")

Fitness function ready


In [3]:
class ParticleSwarmOptimization:
    def __init__(self, n_particles=10, n_iterations=15, w=0.7, c1=1.5, c2=1.5):
        self.n_particles = n_particles
        self.n_iterations = n_iterations
        self.w = w
        self.c1 = c1
        self.c2 = c2

    def optimize(self, fitness_func, bounds):
        n_dims = len(bounds)
        lb = np.array([bounds[k][0] for k in bounds.keys()])
        ub = np.array([bounds[k][1] for k in bounds.keys()])

        particles = np.random.uniform(lb, ub, (self.n_particles, n_dims))
        velocities = np.random.uniform(-1, 1, (self.n_particles, n_dims))
        personal_best_positions = particles.copy()
        personal_best_scores = np.array([fitness_func(p) for p in particles])

        global_best_idx = np.argmax(personal_best_scores)
        global_best_position = personal_best_positions[global_best_idx].copy()
        global_best_score = personal_best_scores[global_best_idx]

        history = []
        for iteration in range(self.n_iterations):
            for i in range(self.n_particles):
                r1, r2 = np.random.rand(n_dims), np.random.rand(n_dims)
                velocities[i] = (self.w * velocities[i] + self.c1 * r1 * (personal_best_positions[i] - particles[i]) + self.c2 * r2 * (global_best_position - particles[i]))
                particles[i] += velocities[i]
                particles[i] = np.clip(particles[i], lb, ub)
                score = fitness_func(particles[i])

                if score > personal_best_scores[i]:
                    personal_best_scores[i] = score
                    personal_best_positions[i] = particles[i].copy()
                    if score > global_best_score:
                        global_best_score = score
                        global_best_position = particles[i].copy()

            history.append(global_best_score)
            print(f"PSO Iter {iteration+1}: {global_best_score:.4f}")

        return global_best_position, global_best_score, history

pso_start = time.time()
pso = ParticleSwarmOptimization(n_particles=8, n_iterations=10)
pso_best_params, pso_best_score, pso_history = pso.optimize(fitness_function, param_bounds)
pso_time = time.time() - pso_start
print(f"PSO: {pso_best_score:.4f}, Time: {pso_time:.2f}s")

NameError: name 'time' is not defined

In [None]:
class GeneticAlgorithm:
    def __init__(self, pop_size=10, n_generations=15, mutation_rate=0.2, crossover_rate=0.8):
        self.pop_size = pop_size
        self.n_generations = n_generations
        self.mutation_rate = mutation_rate
        self.crossover_rate = crossover_rate

    def optimize(self, fitness_func, bounds):
        n_dims = len(bounds)
        lb = np.array([bounds[k][0] for k in bounds.keys()])
        ub = np.array([bounds[k][1] for k in bounds.keys()])
        population = np.random.uniform(lb, ub, (self.pop_size, n_dims))
        best_individual = None
        best_score = 0
        history = []

        for generation in range(self.n_generations):
            fitness_scores = np.array([fitness_func(ind) for ind in population])
            max_idx = np.argmax(fitness_scores)
            if fitness_scores[max_idx] > best_score:
                best_score = fitness_scores[max_idx]
                best_individual = population[max_idx].copy()
            history.append(best_score)
            print(f"GA Gen {generation+1}: {best_score:.4f}")

            selected = []
            for _ in range(self.pop_size):
                idx1, idx2 = np.random.choice(self.pop_size, 2, replace=False)
                winner = idx1 if fitness_scores[idx1] > fitness_scores[idx2] else idx2
                selected.append(population[winner].copy())
            selected = np.array(selected)

            offspring = []
            for i in range(0, self.pop_size, 2):
                parent1 = selected[i]
                parent2 = selected[(i+1) % self.pop_size]
                if np.random.rand() < self.crossover_rate:
                    crossover_point = np.random.randint(1, n_dims)
                    child1 = np.concatenate([parent1[:crossover_point], parent2[crossover_point:]])
                    child2 = np.concatenate([parent2[:crossover_point], parent1[crossover_point:]])
                else:
                    child1, child2 = parent1.copy(), parent2.copy()
                offspring.extend([child1, child2])
                        offspring = np.array(offspring[:self.pop_size])

            for i in range(self.pop_size):
                if np.random.rand() < self.mutation_rate:
                    mutation_idx = np.random.randint(n_dims)
                    offspring[i, mutation_idx] = np.random.uniform(lb[mutation_idx], ub[mutation_idx])
            population = offspring

        return best_individual, best_score, history

ga_start = time.time()
ga = GeneticAlgorithm(pop_size=8, n_generations=10)
ga_best_params, ga_best_score, ga_history = ga.optimize(fitness_function, param_bounds)
ga_time = time.time() - ga_start
print(f"GA: {ga_best_score:.4f}, Time: {ga_time:.2f}s")

In [None]:
class SimulatedAnnealing:
    def __init__(self, n_iterations=100, initial_temp=100, cooling_rate=0.95):
        self.n_iterations = n_iterations
        self.initial_temp = initial_temp
        self.cooling_rate = cooling_rate

    def optimize(self, fitness_func, bounds):
        n_dims = len(bounds)
        lb = np.array([bounds[k][0] for k in bounds.keys()])
        ub = np.array([bounds[k][1] for k in bounds.keys()])
        current_solution = np.random.uniform(lb, ub, n_dims)
        current_score = fitness_func(current_solution)
        best_solution = current_solution.copy()
        best_score = current_score
        temperature = self.initial_temp
        history = []

        for iteration in range(self.n_iterations):
            neighbor = current_solution + np.random.normal(0, 0.1, n_dims)
            neighbor = np.clip(neighbor, lb, ub)
            neighbor_score = fitness_func(neighbor)
            delta = neighbor_score - current_score
            if delta > 0 or np.random.rand() < np.exp(delta / temperature):
                current_solution = neighbor
                current_score = neighbor_score
                if current_score > best_score:
                    best_score = current_score
                    best_solution = current_solution.copy()
            temperature *= self.cooling_rate
            history.append(best_score)
            if (iteration + 1) % 20 == 0:
                print(f"SA Iter {iteration+1}: {best_score:.4f}")

        return best_solution, best_score, history

sa_start = time.time()
sa = SimulatedAnnealing(n_iterations=80, initial_temp=100, cooling_rate=0.95)
sa_best_params, sa_best_score, sa_history = sa.optimize(fitness_function, param_bounds)
sa_time = time.time() - sa_start
print(f"SA: {sa_best_score:.4f}, Time: {sa_time:.2f}s")

In [None]:
class DifferentialEvolution:
    def __init__(self, pop_size=10, n_generations=15, F=0.8, CR=0.7):
        self.pop_size = pop_size
        self.n_generations = n_generations
        self.F = F
        self.CR = CR

    def optimize(self, fitness_func, bounds):
        n_dims = len(bounds)
        lb = np.array([bounds[k][0] for k in bounds.keys()])
        ub = np.array([bounds[k][1] for k in bounds.keys()])
        population = np.random.uniform(lb, ub, (self.pop_size, n_dims))
        fitness_scores = np.array([fitness_func(ind) for ind in population])
        best_idx = np.argmax(fitness_scores)
        best_solution = population[best_idx].copy()
        best_score = fitness_scores[best_idx]
        history = []

        for generation in range(self.n_generations):
            for i in range(self.pop_size):
                indices = [idx for idx in range(self.pop_size) if idx != i]
                a, b, c = population[np.random.choice(indices, 3, replace=False)]
                mutant = a + self.F * (b - c)
                mutant = np.clip(mutant, lb, ub)
                cross_points = np.random.rand(n_dims) < self.CR
                if not np.any(cross_points):
                    cross_points[np.random.randint(n_dims)] = True
                trial = np.where(cross_points, mutant, population[i])
                trial_score = fitness_func(trial)
                if trial_score > fitness_scores[i]:
                    population[i] = trial
                    fitness_scores[i] = trial_score
                    if trial_score > best_score:
                        best_score = trial_score
                        best_solution = trial.copy()
            history.append(best_score)
            print(f"DE Gen {generation+1}: {best_score:.4f}")

        return best_solution, best_score, history

de_start = time.time()
de = DifferentialEvolution(pop_size=8, n_generations=10)
de_best_params, de_best_score, de_history = de.optimize(fitness_function, param_bounds)
de_time = time.time() - de_start
print(f"DE: {de_best_score:.4f}, Time: {de_time:.2f}s")

In [None]:
class GreyWolfOptimizer:
    def __init__(self, n_wolves=10, n_iterations=15):
        self.n_wolves = n_wolves
        self.n_iterations = n_iterations

    def optimize(self, fitness_func, bounds):
        n_dims = len(bounds)
        lb = np.array([bounds[k][0] for k in bounds.keys()])
        ub = np.array([bounds[k][1] for k in bounds.keys()])
        wolves = np.random.uniform(lb, ub, (self.n_wolves, n_dims))
        alpha_pos = np.zeros(n_dims)
        alpha_score = 0
        beta_pos = np.zeros(n_dims)
        beta_score = 0
        delta_pos = np.zeros(n_dims)
        delta_score = 0
        history = []

        for iteration in range(self.n_iterations):
            for i in range(self.n_wolves):
                fitness = fitness_func(wolves[i])
                if fitness > alpha_score:
                    delta_score = beta_score
                    delta_pos = beta_pos.copy()
                    beta_score = alpha_score
                    beta_pos = alpha_pos.copy()
                    alpha_score = fitness
                    alpha_pos = wolves[i].copy()
                elif fitness > beta_score:
                    delta_score = beta_score
                    delta_pos = beta_pos.copy()
                    beta_score = fitness
                    beta_pos = wolves[i].copy()
                elif fitness > delta_score:
                    delta_score = fitness
                    delta_pos = wolves[i].copy()

            a = 2 - iteration * (2.0 / self.n_iterations)
            for i in range(self.n_wolves):
                for j in range(n_dims):
                    r1, r2 = np.random.rand(), np.random.rand()
                    A1 = 2 * a * r1 - a
                    C1 = 2 * r2
                    D_alpha = abs(C1 * alpha_pos[j] - wolves[i, j])
                    X1 = alpha_pos[j] - A1 * D_alpha
                    r1, r2 = np.random.rand(), np.random.rand()
                    A2 = 2 * a * r1 - a
                    C2 = 2 * r2
                    D_beta = abs(C2 * beta_pos[j] - wolves[i, j])
                    X2 = beta_pos[j] - A2 * D_beta
                    r1, r2 = np.random.rand(), np.random.rand()
                    A3 = 2 * a * r1 - a
                    C3 = 2 * r2
                    D_delta = abs(C3 * delta_pos[j] - wolves[i, j])
                    X3 = delta_pos[j] - A3 * D_delta
                    wolves[i, j] = (X1 + X2 + X3) / 3
                wolves[i] = np.clip(wolves[i], lb, ub)
            history.append(alpha_score)
            print(f"GWO Iter {iteration+1}: {alpha_score:.4f}")

        return alpha_pos, alpha_score, history

gwo_start = time.time()
gwo = GreyWolfOptimizer(n_wolves=8, n_iterations=10)
gwo_best_params, gwo_best_score, gwo_history = gwo.optimize(fitness_function, param_bounds)
gwo_time = time.time() - gwo_start
print(f"GWO: {gwo_best_score:.4f}, Time: {gwo_time:.2f}s")

In [None]:
class FireflyAlgorithm:
    def __init__(self, n_fireflies=10, n_iterations=15, alpha=0.2, beta0=1.0, gamma=1.0):
        self.n_fireflies = n_fireflies
        self.n_iterations = n_iterations
        self.alpha = alpha
        self.beta0 = beta0
        self.gamma = gamma

    def optimize(self, fitness_func, bounds):
        n_dims = len(bounds)
        lb = np.array([bounds[k][0] for k in bounds.keys()])
        ub = np.array([bounds[k][1] for k in bounds.keys()])
        fireflies = np.random.uniform(lb, ub, (self.n_fireflies, n_dims))
        intensities = np.array([fitness_func(f) for f in fireflies])
        best_idx = np.argmax(intensities)
        best_firefly = fireflies[best_idx].copy()
        best_intensity = intensities[best_idx]
        history = []

        for iteration in range(self.n_iterations):
            for i in range(self.n_fireflies):
                for j in range(self.n_fireflies):
                    if intensities[j] > intensities[i]:
                        r = np.linalg.norm(fireflies[i] - fireflies[j])
                        beta = self.beta0 * np.exp(-self.gamma * r**2)
                        fireflies[i] += beta * (fireflies[j] - fireflies[i]) + self.alpha * (np.random.rand(n_dims) - 0.5)
                        fireflies[i] = np.clip(fireflies[i], lb, ub)
                        intensities[i] = fitness_func(fireflies[i])
                        if intensities[i] > best_intensity:
                            best_intensity = intensities[i]
                            best_firefly = fireflies[i].copy()
            history.append(best_intensity)
            print(f"FA Iter {iteration+1}: {best_intensity:.4f}")

        return best_firefly, best_intensity, history

fa_start = time.time()
fa = FireflyAlgorithm(n_fireflies=8, n_iterations=10)
fa_best_params, fa_best_score, fa_history = fa.optimize(fitness_function, param_bounds)
fa_time = time.time() - fa_start
print(f"FA: {fa_best_score:.4f}, Time: {fa_time:.2f}s")

## 8. Results & Visualization

In [None]:
results_df = pd.DataFrame({
    'Algorithm': ['Baseline', 'PSO', 'GA', 'SA', 'DE', 'GWO', 'FA'],
    'Best Accuracy': [baseline_acc, pso_best_score, ga_best_score, sa_best_score, de_best_score, gwo_best_score, fa_best_score],
    'Time (s)': [baseline_train_time, pso_time, ga_time, sa_time, de_time, gwo_time, fa_time]
})

print("\n" + "="*60)
print("RESULTS SUMMARY")
print("="*60)
print(results_df.to_string(index=False))
print("="*60)

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(14, 5))
colors = ['gray', 'steelblue', 'coral', 'green', 'gold', 'purple', 'red']

axes[0].bar(results_df['Algorithm'], results_df['Best Accuracy'] * 100, color=colors)
axes[0].set_ylabel('Accuracy (%)')
axes[0].set_title('Accuracy Comparison')
axes[0].tick_params(axis='x', rotation=45)

axes[1].bar(results_df['Algorithm'], results_df['Time (s)'], color=colors)
axes[1].set_ylabel('Time (s)')
axes[1].set_title('Computation Time')
axes[1].tick_params(axis='x', rotation=45)

plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize=(12, 6))
plt.plot(pso_history, marker='o', label='PSO')
plt.plot(ga_history, marker='s', label='GA')
plt.plot(sa_history, marker='^', label='SA', alpha=0.7)
plt.plot(de_history, marker='d', label='DE')
plt.plot(gwo_history, marker='*', label='GWO')
plt.plot(fa_history, marker='x', label='FA')
plt.xlabel('Iteration')
plt.ylabel('Best Fitness')
plt.title('Convergence Curves')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()

## 9. Train Final Model

In [None]:
best_idx = results_df['Best Accuracy'].iloc[1:].idxmax()
best_algo = results_df.loc[best_idx, 'Algorithm']
algo_params = {'PSO': pso_best_params, 'GA': ga_best_params, 'SA': sa_best_params, 'DE': de_best_params, 'GWO': gwo_best_params, 'FA': fa_best_params}
final_params = algo_params[best_algo]

lr, hidden_dim, dropout, n_layers, embedding_dim = final_params
hidden_dim = int(hidden_dim)
n_layers = int(n_layers)
embedding_dim = int(embedding_dim)

print(f"Best Algorithm: {best_algo}")
print(f"LR: {lr:.6f}, Hidden: {hidden_dim}, Dropout: {dropout:.3f}, Layers: {n_layers}, Embed: {embedding_dim}")

In [None]:
final_model = LSTMClassifier(VOCAB_SIZE, embedding_dim, hidden_dim, OUTPUT_DIM, n_layers, dropout).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(final_model.parameters(), lr=lr)

final_history = train_model(final_model, train_loader, val_loader, criterion, optimizer, epochs=10)
final_test_acc, final_preds, final_labels = evaluate_model(final_model, test_loader)

print(f"\nFinal Test Accuracy: {final_test_acc*100:.2f}%")
print(f"Improvement: {(final_test_acc - baseline_acc)*100:.2f}%")

In [None]:
cm = confusion_matrix(final_labels, final_preds)
class_names = ['World', 'Sports', 'Business', 'Sci/Tech']

plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=class_names, yticklabels=class_names)
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix')
plt.show()

print("\n" + classification_report(final_labels, final_preds, target_names=class_names))

## 10. Save Results

In [None]:
torch.save({'model_state_dict': final_model.state_dict(), 'hyperparameters': {'vocab_size': VOCAB_SIZE, 'embedding_dim': embedding_dim, 'hidden_dim': hidden_dim, 'output_dim': OUTPUT_DIM, 'n_layers': n_layers, 'dropout': dropout, 'learning_rate': lr}, 'test_accuracy': final_test_acc, 'best_algorithm': best_algo}, 'phase1_model.pth')

results_df.to_csv('algorithm_comparison.csv', index=False)

print('Project Complete!')
print('Model saved as phase1_model.pth')
print('Results saved as algorithm_comparison.csv')