In [None]:
# ===================================
# GWO for Feature Selection in Gene Expression Data
# ===================================

import numpy as np
from sklearn.datasets import make_classification
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import random

# ------------------------------
# STEP 1: Create synthetic gene expression data
# ------------------------------
# 100 samples, 50 features (genes), 2 classes
X, y = make_classification(n_samples=100, n_features=50, n_informative=5, n_redundant=5, n_classes=2, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# ------------------------------
# STEP 2: Define GWO for feature selection
# ------------------------------
def gwo_feature_selection(X_train, y_train, X_test, y_test, num_wolves=10, max_iter=30):
    num_features = X_train.shape[1]

    # Fitness function: classification accuracy using selected features
    def fitness(position):
        selected_features = [i for i, val in enumerate(position) if val >= 0.5]
        if len(selected_features) == 0:
            return 0
        clf = SVC(kernel='linear')
        clf.fit(X_train[:, selected_features], y_train)
        y_pred = clf.predict(X_test[:, selected_features])
        return accuracy_score(y_test, y_pred)

    # Initialize wolves randomly (0–1 for each feature)
    wolves = np.random.rand(num_wolves, num_features)

    alpha_pos = np.zeros(num_features)
    alpha_score = -np.inf
    beta_pos = np.zeros(num_features)
    beta_score = -np.inf
    delta_pos = np.zeros(num_features)
    delta_score = -np.inf

    for t in range(max_iter):
        a = 2 - 2*(t/max_iter)

        for i in range(num_wolves):
            fit = fitness(wolves[i])
            if fit > alpha_score:
                alpha_score, alpha_pos = fit, wolves[i].copy()
            elif fit > beta_score:
                beta_score, beta_pos = fit, wolves[i].copy()
            elif fit > delta_score:
                delta_score, delta_pos = fit, wolves[i].copy()

        # Update positions
        for i in range(num_wolves):
            for j in range(num_features):
                r1, r2 = random.random(), random.random()
                A1 = 2*a*r1 - a; C1 = 2*r2
                D_alpha = abs(C1*alpha_pos[j] - wolves[i,j])
                X1 = alpha_pos[j] - A1*D_alpha

                r1, r2 = random.random(), random.random()
                A2 = 2*a*r1 - a; C2 = 2*r2
                D_beta = abs(C2*beta_pos[j] - wolves[i,j])
                X2 = beta_pos[j] - A2*D_beta

                r1, r2 = random.random(), random.random()
                A3 = 2*a*r1 - a; C3 = 2*r2
                D_delta = abs(C3*delta_pos[j] - wolves[i,j])
                X3 = delta_pos[j] - A3*D_delta

                wolves[i,j] = np.clip((X1+X2+X3)/3, 0, 1)

    return alpha_pos, alpha_score

# ------------------------------
# STEP 3: Run GWO for feature selection
# ------------------------------
best_features, best_acc = gwo_feature_selection(X_train, y_train, X_test, y_test)
selected_features = [i for i, val in enumerate(best_features) if val >= 0.5]

print("✅ Selected Features Indices:", selected_features)
print("✅ Best Classification Accuracy:", best_acc)


✅ Selected Features Indices: [0, 1, 5, 7, 11, 12, 14, 15, 17, 18, 19, 21, 23, 25, 26, 29, 30, 31, 34, 37, 39, 41, 48, 49]
✅ Best Classification Accuracy: 1.0
