# Lung Histopathology Classification: ACA / N / SCC
## Multi-CNN + Channel Attention + GA + KNN/SVM/RF + Fusion

This notebook implements a comprehensive lung histopathology classification system that combines:
- Multiple CNN backbones (DenseNet121, ResNet50, VGG16)
- Channel attention mechanism (SE blocks)
- Genetic Algorithm for feature selection
- Ensemble of classical ML classifiers (KNN, SVM, Random Forest)
- Majority voting fusion

In [1]:
!pip install --upgrade --force-reinstall numpy==1.25.2 tensorflow==2.15.0 keras==2.15.0

Collecting numpy==1.25.2
  Using cached numpy-1.25.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.6 kB)
Collecting tensorflow==2.15.0
  Using cached tensorflow-2.15.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.4 kB)
Collecting keras==2.15.0
  Using cached keras-2.15.0-py3-none-any.whl.metadata (2.4 kB)
Collecting absl-py>=1.0.0 (from tensorflow==2.15.0)
  Using cached absl_py-2.3.1-py3-none-any.whl.metadata (3.3 kB)
Collecting astunparse>=1.6.0 (from tensorflow==2.15.0)
  Using cached astunparse-1.6.3-py2.py3-none-any.whl.metadata (4.4 kB)
Collecting flatbuffers>=23.5.26 (from tensorflow==2.15.0)
  Using cached flatbuffers-25.2.10-py2.py3-none-any.whl.metadata (875 bytes)
Collecting gast!=0.5.0,!=0.5.1,!=0.5.2,>=0.2.1 (from tensorflow==2.15.0)
  Using cached gast-0.6.0-py3-none-any.whl.metadata (1.3 kB)
Collecting google-pasta>=0.1.1 (from tensorflow==2.15.0)
  Using cached google_pasta-0.2.0-py3-none-any.whl.metadata (814 bytes)
Co

In [2]:
# Import required libraries
import os, random, json
import numpy as np
import pandas as pd
import keras
import tensorflow as tf

from keras.layers import Dense
from keras.models import Sequential
from tensorflow.keras.applications import DenseNet121, ResNet50, EfficientNetB0, InceptionV3
from tensorflow.keras.applications.densenet import preprocess_input as pre_densenet
from tensorflow.keras.applications.resnet import preprocess_input as pre_resnet
from tensorflow.keras.applications.efficientnet import preprocess_input as pre_efficientnet
from tensorflow.keras.applications.inception_v3 import preprocess_input as pre_inception
from tensorflow.keras.layers import (Input, GlobalAveragePooling2D, GlobalMaxPooling2D,
                                     Concatenate, Dense, Reshape, Multiply, Lambda)
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.image import ImageDataGenerator

from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
from scipy.stats import mode

# from deap import base, creator, tools  # GA removed, not needed

print("All libraries imported successfully!")

2025-09-11 08:35:08.737117: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-09-11 08:35:08.976308: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-09-11 08:35:08.976668: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-09-11 08:35:09.019077: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-09-11 08:35:09.120106: I tensorflow/core/platform/cpu_feature_guar

All libraries imported successfully!


In [3]:
# Configuration and Data Setup
DATA_DIR   = "/teamspace/studios/this_studio/lung_cancer/dataset/lung_image_sets"  # << set this
IMG_SIZE   = (224, 224)
BATCH_SIZE = 24
SEED       = 42

# Set random seeds for reproducibility
random.seed(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)

print(f"Configuration set:")
print(f"Data Directory: {DATA_DIR}")
print(f"Image Size: {IMG_SIZE}")
print(f"Batch Size: {BATCH_SIZE}")
print(f"Random Seed: {SEED}")

Configuration set:
Data Directory: /teamspace/studios/this_studio/lung_cancer/dataset/lung_image_sets
Image Size: (224, 224)
Batch Size: 24
Random Seed: 42


In [4]:
# Number of attention heads for multi-head channel attention
NUM_ATTENTION_HEADS = 8

In [5]:
train_datagen = ImageDataGenerator(
    validation_split=0.20,
    rotation_range=20,
    horizontal_flip=True,
    # IMPORTANT: no rescale here, since we feed raw to model-specific preprocessors
)

def make_gen(subset):
    return train_datagen.flow_from_directory(
        DATA_DIR,
        target_size=IMG_SIZE,
        class_mode='categorical',
        batch_size=BATCH_SIZE,
        subset=subset,
        seed=SEED,
        shuffle=True
    )

train_gen = make_gen('training')
val_gen   = make_gen('validation')
num_classes = train_gen.num_classes
class_indices = train_gen.class_indices
id2label = {v:k for k,v in class_indices.items()}

print("Classes:", class_indices)
print(f"Number of classes: {num_classes}")
print(f"Training samples: {train_gen.samples}")
print(f"Validation samples: {val_gen.samples}")

Found 12000 images belonging to 3 classes.
Found 3000 images belonging to 3 classes.
Classes: {'lung_aca': 0, 'lung_n': 1, 'lung_scc': 2}
Number of classes: 3
Training samples: 12000
Validation samples: 3000


In [6]:
# Channel Attention (Multi-Headed) Implementation

import tensorflow as tf
from tensorflow.keras.layers import Layer, Dense, Reshape, Permute, Concatenate

class MultiHeadChannelAttention(Layer):
    def __init__(self, num_heads=4, reduction=16, **kwargs):
        super().__init__(**kwargs)
        self.num_heads = num_heads
        self.reduction = reduction

    def build(self, input_shape):
        self.channel = input_shape[-1]
        self.dense1 = [Dense(self.channel // self.reduction, activation='relu') for _ in range(self.num_heads)]
        self.dense2 = [Dense(self.channel) for _ in range(self.num_heads)]
        super().build(input_shape)

    def call(self, x):
        # Global pooling
        gap = tf.reduce_mean(x, axis=[1,2])  # shape: (batch, channels)
        gmp = tf.reduce_max(x, axis=[1,2])   # shape: (batch, channels)
        heads = []
        for i in range(self.num_heads):
            d1_gap = self.dense1[i](gap)
            d1_gmp = self.dense1[i](gmp)
            d2_gap = self.dense2[i](d1_gap)
            d2_gmp = self.dense2[i](d1_gmp)
            scale = tf.nn.sigmoid(d2_gap + d2_gmp)
            scale = Reshape((1,1,self.channel))(scale)
            heads.append(x * scale)
        # Concatenate heads along channel axis
        out = Concatenate(axis=-1)(heads)
        return out

# Usage in your lane function:
# from multi_head_attention import MultiHeadChannelAttention
# x = MultiHeadChannelAttention(num_heads=4, reduction=16)(x)

def multi_head_attention_block(x, reduction=16, name=None):
    """Multi-Headed Channel Attention block for CNN feature maps"""
    attn = MultiHeadChannelAttention(num_heads=NUM_ATTENTION_HEADS, reduction=reduction, name=name)(x)
    return attn

print("Multi-head attention block function defined successfully!")


Multi-head attention block function defined successfully!


In [7]:
# Preprocessing Lanes (one per backbone)
def lane(tensor, backbone="resnet", reduction=16):
    """Create a processing lane for each CNN backbone with multi-head channel attention"""
    if backbone == "resnet":
        x = Lambda(pre_resnet, name="pre_resnet")(tensor)
        x = ResNet50(include_top=False, weights='imagenet')(x)
    elif backbone == "densenet":
        x = Lambda(pre_densenet, name="pre_densenet")(tensor)
        x = DenseNet121(include_top=False, weights='imagenet')(x)
    elif backbone == "efficientnet":
        x = Lambda(pre_efficientnet, name="pre_efficientnet")(tensor)
        x = EfficientNetB0(include_top=False, weights='imagenet')(x)
    elif backbone == "inception":
        x = Lambda(pre_inception, name="pre_inception")(tensor)
        x = InceptionV3(include_top=False, weights='imagenet')(x)
    else:
        raise ValueError(f'Unknown backbone: {backbone}')
    # Add multi-head channel attention
    x = multi_head_attention_block(x, reduction=reduction, name=f"mhca_{backbone}")
    # Global Average Pooling to convert feature maps → vector
    x = GlobalAveragePooling2D(name=f"gap_{backbone}")(x)
    return x

print("Lane function updated for multi-head attention!")


Lane function updated for multi-head attention!


In [8]:
# Build Feature Extractor Model
print("Building multi-backbone feature concatenator with multi-head attention...")

# Define input tensor with image size (224x224x3 RGB)
inp = Input(shape=(224,224,3))

# Extract features from DenseNet lane (multi-head attention)
feat_d = lane(inp, "densenet", reduction=16)
# Extract features from ResNet lane (multi-head attention)
feat_r = lane(inp, "resnet", reduction=16)
# Extract features from EfficientNetB0 lane (multi-head attention)
feat_e = lane(inp, "efficientnet", reduction=16)
# Extract features from InceptionV3 lane (multi-head attention)
feat_i = lane(inp, "inception", reduction=16)

# Concatenate features from all four backbones
concat_feat = Concatenate(name="concat_feats")([feat_d, feat_r, feat_e, feat_i])

# Create feature extractor model (input → concatenated features)
feature_model = Model(inp, concat_feat)

# Get final concatenated feature dimension
feature_dim = feature_model.output_shape[-1]

print(f"Feature extractor built successfully!")
print(f"Feature dimension: {feature_dim}")

# Show model summary (layers, parameters, shapes)
feature_model.summary()


Building multi-backbone feature concatenator with multi-head attention...


2025-09-11 08:35:15.284909: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1929] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 20763 MB memory:  -> device: 0, name: NVIDIA L4, pci bus id: 0000:00:04.0, compute capability: 8.9


Feature extractor built successfully!
Feature dimension: 51200
Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_1 (InputLayer)        [(None, 224, 224, 3)]        0         []                            
                                                                                                  
 pre_densenet (Lambda)       (None, 224, 224, 3)          0         ['input_1[0][0]']             
                                                                                                  
 pre_resnet (Lambda)         (None, 224, 224, 3)          0         ['input_1[0][0]']             
                                                                                                  
 pre_efficientnet (Lambda)   (None, 224, 224, 3)          0         ['input_1[0][0]']             
                               

In [9]:
# Extract Deep Features
def extract_features(generator):
    """Extract features from a data generator using the feature model"""
    X, y = [], []
    steps = len(generator)
    for i in range(steps):
        imgs, labels = generator.next()
        feats = feature_model.predict(imgs, verbose=0)
        X.append(feats)
        y.append(labels)
        if (i + 1) % 10 == 0:
            print(f"Processed {i + 1}/{steps} batches")
    return np.vstack(X), np.vstack(y)

print("Feature extraction function defined!")

Feature extraction function defined!


In [10]:
# Extract Training Features
print("Extracting training features …")
X_tr, Y_tr_ohe = extract_features(train_gen)
print(f"Training features shape: {X_tr.shape}")
print(f"Training labels shape: {Y_tr_ohe.shape}")

Extracting training features …


2025-09-11 08:35:43.056397: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:454] Loaded cuDNN version 8900


Processed 10/500 batches
Processed 20/500 batches
Processed 30/500 batches
Processed 40/500 batches
Processed 50/500 batches
Processed 60/500 batches
Processed 70/500 batches
Processed 80/500 batches
Processed 90/500 batches
Processed 100/500 batches
Processed 110/500 batches
Processed 120/500 batches
Processed 130/500 batches
Processed 140/500 batches
Processed 150/500 batches
Processed 160/500 batches
Processed 170/500 batches
Processed 180/500 batches
Processed 190/500 batches
Processed 200/500 batches
Processed 210/500 batches
Processed 220/500 batches
Processed 230/500 batches
Processed 240/500 batches
Processed 250/500 batches
Processed 260/500 batches
Processed 270/500 batches
Processed 280/500 batches
Processed 290/500 batches
Processed 300/500 batches
Processed 310/500 batches
Processed 320/500 batches
Processed 330/500 batches
Processed 340/500 batches
Processed 350/500 batches
Processed 360/500 batches
Processed 370/500 batches
Processed 380/500 batches
Processed 390/500 bat

In [11]:
!pip install cython
!pip install pymrmr



In [17]:
## 1. mRMR Feature Ranking
try:
    import pymrmr
except ImportError:
    !pip install pymrmr
    import pymrmr

# Convert features and labels to DataFrame for pymrmr
import pandas as pd
# ...existing code...
import numpy as np, pandas as pd, gc

# ...existing code...
import numpy as np, pandas as pd

# ...existing code...
# ...existing code...
import numpy as np, time, gc
from sklearn.feature_selection import mutual_info_classif, VarianceThreshold
from sklearn.preprocessing import StandardScaler

def approximate_mrmr(X, y_ohe, n_features=200, sample_rows=1500, var_thresh=0.0,
                     mi_subset_rows=2000, redundancy_penalty=0.5, oversample_factor=2):
    """
    Fast approximate mRMR:
      1. Optional variance filter
      2. Mutual information relevance on row subset
      3. Greedy selection penalizing average absolute Pearson corr with already selected
    Returns list of original feature indices.
    """
    t0 = time.time()
    y = np.argmax(y_ohe, axis=1)
    n_samples, n_feats = X.shape
    Xf = X.astype(np.float32, copy=False)

    # Variance filter
    if var_thresh > 0:
        vt = VarianceThreshold(var_thresh)
        Xv = vt.fit_transform(Xf)
        kept_var = np.where(vt.get_support())[0]
    else:
        Xv = Xf
        kept_var = np.arange(n_feats)
    print(f"[approx-mRMR] After variance filter: {len(kept_var)} features")

    # Row subset for MI
    if mi_subset_rows and mi_subset_rows < Xv.shape[0]:
        rng = np.random.default_rng(42)
        rows = rng.choice(Xv.shape[0], size=mi_subset_rows, replace=False)
        X_mi = Xv[rows]
        y_mi = y[rows]
    else:
        X_mi = Xv
        y_mi = y
    # Compute MI
    mi = mutual_info_classif(X_mi, y_mi, discrete_features=False, n_neighbors=3, random_state=42)
    # Map back to original feature indices
    mi_global_idx = kept_var
    # Sort by MI
    order = np.argsort(mi)[::-1]
    mi_global_idx = mi_global_idx[order]
    mi_sorted = mi[order]

    # Take a pool larger than target
    pool_k = min(len(mi_global_idx), oversample_factor * n_features)
    pool_idx = mi_global_idx[:pool_k]
    pool_data = Xf[:, pool_idx]

    # Normalize pool for correlation computations
    scaler = StandardScaler(with_mean=True, with_std=True)
    pool_norm = scaler.fit_transform(pool_data)

    selected = []
    selected_set = set()
    # Precompute correlations lazily
    # We'll maintain mean |corr| with already selected features
    for _ in range(min(n_features, pool_k)):
        best_feat = None
        best_score = -1
        for j, feat in enumerate(pool_idx):
            if feat in selected_set:
                continue
            relevance = mi_sorted[np.where(mi_global_idx == feat)[0][0]]
            if not selected:
                score = relevance
            else:
                # Compute corr with already selected (on normalized matrix)
                curr_vec = pool_norm[:, np.where(pool_idx == feat)[0][0]]
                sel_cols = [np.where(pool_idx == f)[0][0] for f in selected if f in pool_idx]
                if sel_cols:
                    sel_mat = pool_norm[:, sel_cols]
                    corr = np.abs(np.dot(sel_mat.T, curr_vec) / (len(curr_vec)-1))
                    redundancy = corr.mean()
                else:
                    redundancy = 0.0
                score = relevance - redundancy_penalty * redundancy
            if score > best_score:
                best_score = score
                best_feat = feat
        selected.append(best_feat)
        selected_set.add(best_feat)
    print(f"[approx-mRMR] Selected {len(selected)} features in {time.time()-t0:.2f}s")
    return selected

print("mRMR feature selection function defined!")


## 2. Adaptive Grey Wolf Optimization (AGWO)
import numpy as np
from sklearn.model_selection import cross_val_score
from sklearn.neighbors import KNeighborsClassifier

# ...existing code...
# ...existing code...

import numpy as np, gc, hashlib
from sklearn.model_selection import StratifiedKFold
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

def _subset_hash(idxs):
    return hashlib.md5(np.asarray(idxs, dtype=np.int32).tobytes()).hexdigest()

def agwo_feature_selection_reduced(
    X_ranked,
    y_ohe,
    ranked_global_indices,
    n_wolves=18,
    n_iter=20,
    min_subset=200,
    max_subset=1200,
    row_sample=2500,
    knn_folds=3,
    rf_folds=2,
    rf_max_features=300,
    penalty_weight=0.05,
    patience=5,
    random_state=42,
    verbose=True,
    subset_size = None
):
    """
    Improved Adaptive Grey Wolf Optimization for feature subset selection.

    X_ranked: (n_samples, n_ranked_features) float32
    ranked_global_indices: mapping to original feature indices
    Returns: list of GLOBAL feature indices selected
    """
    rng = np.random.default_rng(random_state)
    y = np.argmax(y_ohe, axis=1)
    n_samples, n_feats = X_ranked.shape

    if subset_size is not None:
        min_subset = max_subset = int(min(subset_size, n_feats))

    # Row subsample (stratified) for fitness to speed up
    if row_sample and row_sample < n_samples:
        rows = []
        per_class = row_sample // len(np.unique(y))
        for cls in np.unique(y):
            cls_idx = np.where(y == cls)[0]
            take = min(per_class, len(cls_idx))
            rows.append(rng.choice(cls_idx, size=take, replace=False))
        rows = np.concatenate(rows)
    else:
        rows = np.arange(n_samples)

    X_fit = X_ranked[rows]
    y_fit = y[rows]

    # Precompute ranking order (identity initial)
    base_order = np.arange(n_feats)

    # Wolves: each has position vector in [0,1]^n_feats (compressed by sparse init)
    # To reduce memory, store only active indices + scalar bias
    def init_position():
        # Sparse random priorities
        vals = rng.random(n_feats)
        return vals

    wolves = [init_position() for _ in range(n_wolves)]

    # Adaptive subset schedule
    def subset_budget(iter_idx):
        # Linear growth; could switch to logarithmic if needed
        return int(min_subset + (max_subset - min_subset) * (iter_idx / max(1, n_iter - 1)))

    # Fitness cache
    fitness_cache = {}

    def eval_subset(local_idx):
        if len(local_idx) < 2:
            return 0.0
        key_hash = _subset_hash(local_idx)
        if key_hash in fitness_cache:
            return fitness_cache[key_hash]

        # Limit RF features to rf_max_features (random slice) for speed
        feat_slice = local_idx
        if len(feat_slice) > rf_max_features:
            feat_slice_rf = rng.choice(feat_slice, size=rf_max_features, replace=False)
        else:
            feat_slice_rf = feat_slice

        X_sub = X_fit[:, feat_slice]
        scaler = StandardScaler()
        X_sub = scaler.fit_transform(X_sub)

        # KNN CV
        skf_knn = StratifiedKFold(n_splits=knn_folds, shuffle=True, random_state=123)
        knn_scores = []
        knn = KNeighborsClassifier(n_neighbors=5, weights='distance')
        for tr, va in skf_knn.split(X_sub, y_fit):
            knn.fit(X_sub[tr], y_fit[tr])
            pred = knn.predict(X_sub[va])
            knn_scores.append(accuracy_score(y_fit[va], pred))
        knn_acc = np.mean(knn_scores)

        # RF (on smaller feature set) for robustness
        X_sub_rf = X_fit[:, feat_slice_rf]
        scaler_rf = StandardScaler()
        X_sub_rf = scaler_rf.fit_transform(X_sub_rf)
        skf_rf = StratifiedKFold(n_splits=rf_folds, shuffle=True, random_state=321)
        rf_scores = []
        rf = RandomForestClassifier(
            n_estimators=160,
            max_features='sqrt',
            n_jobs=-1,
            random_state=999
        )
        for tr, va in skf_rf.split(X_sub_rf, y_fit):
            rf.fit(X_sub_rf[tr], y_fit[tr])
            pred = rf.predict(X_sub_rf[va])
            rf_scores.append(accuracy_score(y_fit[va], pred))
        rf_acc = np.mean(rf_scores)

        size_penalty = penalty_weight * (len(local_idx) / max_subset)
        fitness = 0.65 * knn_acc + 0.35 * rf_acc - size_penalty
        fitness_cache[key_hash] = fitness
        return fitness

    # Convert continuous priority vector → feature index subset
    def decode(position, k):
        # Take top-k indices
        order = np.argpartition(position, -k)[-k:]
        # For stable ordering
        return order[np.argsort(-position[order])]

    # Main AGWO loop
    best_global_subset = None
    best_fitness = -1
    no_improve = 0

    for it in range(n_iter):
        k_budget = subset_budget(it)

        # Decode all wolves
        wolf_subsets = [decode(w, k_budget) for w in wolves]
        wolf_scores = [eval_subset(sub) for sub in wolf_subsets]

        # Identify alpha, beta, delta
        order = np.argsort(wolf_scores)[::-1]
        alpha, beta, delta = wolves[order[0]], wolves[order[1]], wolves[order[2]]
        alpha_subset = wolf_subsets[order[0]]
        alpha_score = wolf_scores[order[0]]

        if alpha_score > best_fitness:
            best_fitness = alpha_score
            best_global_subset = alpha_subset.copy()
            no_improve = 0
        else:
            no_improve += 1

        if verbose:
            print(f"[AGWO] iter {it+1}/{n_iter} k={k_budget} alpha_fit={alpha_score:.4f} best={best_fitness:.4f} cache={len(fitness_cache)}")

        if no_improve >= patience:
            if verbose:
                print(f"[AGWO] Early stopping (patience {patience})")
            break

        # Grey Wolf coefficient a (linear decay)
        a = 2 - 2 * (it / max(1, n_iter - 1))

        # Update each wolf (continuous adaptation)
        new_wolves = []
        for idx, w in enumerate(wolves):
            if idx in order[:3]:
                new_wolves.append(w)  # keep alpha, beta, delta
                continue
            A1 = 2 * a * rng.random(n_feats) - a
            C1 = 2 * rng.random(n_feats)

            A2 = 2 * a * rng.random(n_feats) - a
            C2 = 2 * rng.random(n_feats)

            A3 = 2 * a * rng.random(n_feats) - a
            C3 = 2 * rng.random(n_feats)

            D_alpha = np.abs(C1 * alpha - w)
            D_beta  = np.abs(C2 * beta  - w)
            D_delta = np.abs(C3 * delta - w)

            X1 = alpha - A1 * D_alpha
            X2 = beta  - A2 * D_beta
            X3 = delta - A3 * D_delta

            new_pos = (X1 + X2 + X3) / 3.0

            # Mutation / diversity
            if rng.random() < 0.15:
                mut_mask = rng.random(n_feats) < 0.002  # flip sparse
                noise = rng.normal(0, 0.25, np.sum(mut_mask))
                new_pos[mut_mask] += noise

            # Clamp
            new_pos = np.clip(new_pos, -1.0, 1.0)
            new_wolves.append(new_pos)

        # Diversity injection if stagnating
        if no_improve == patience - 1:
            inject_count = max(2, n_wolves // 5)
            for _ in range(inject_count):
                ridx = rng.integers(3, n_wolves)  # avoid top 3
                new_wolves[ridx] = init_position()

        wolves = new_wolves

    # Decode best subset using final budget (or its original size)
    final_k = len(best_global_subset)
    final_local = best_global_subset
    # Map to global feature indices
    selected_global = [ranked_global_indices[i] for i in final_local]

    if verbose:
        print(f"[AGWO] Finished: selected {len(selected_global)} features; best_fitness={best_fitness:.4f}")

    return selected_global

print("Feature selection pipeline (mRMR + AGWO) implemented.")

mRMR feature selection function defined!
Feature selection pipeline (mRMR + AGWO) implemented.


In [13]:
# Extract Validation Features
print("Extracting validation features …")
X_va, Y_va_ohe = extract_features(val_gen)
print(f"Validation features shape: {X_va.shape}")
print(f"Validation labels shape: {Y_va_ohe.shape}")

Extracting validation features …


Processed 10/125 batches
Processed 20/125 batches
Processed 30/125 batches
Processed 40/125 batches
Processed 50/125 batches
Processed 60/125 batches
Processed 70/125 batches
Processed 80/125 batches
Processed 90/125 batches
Processed 100/125 batches
Processed 110/125 batches
Processed 120/125 batches
Validation features shape: (3000, 51200)
Validation labels shape: (3000, 3)


In [14]:
# Combine Features and Convert Labels
X_full = np.vstack([X_tr, X_va])
y_full = np.argmax(np.vstack([Y_tr_ohe, Y_va_ohe]), axis=1)

print(f"Total features shape: {X_full.shape}")
print(f"Total labels shape: {y_full.shape}")
print(f"Classes present: {np.unique(y_full)}")
print(f"Class distribution: {np.bincount(y_full)}")

Total features shape: (15000, 51200)
Total labels shape: (15000,)
Classes present: [0 1 2]
Class distribution: [5000 5000 5000]


In [18]:
# --- mRMR + AGWO Feature Selection Pipeline ---
t_total = time.time()

# Parameters (shrink aggressively first)
n_mrmr = 250          # lower than 200 to speed up
sample_rows = 1200
mi_subset_rows = 1500
redundancy_penalty = 0.4
oversample_factor = 2
subset_size = 30      # AGWO subset
n_wolves = 8
n_iter = 8

# 1. Fast approximate mRMR replacement
ranked_features = approximate_mrmr(
    X_tr, Y_tr_ohe,
    n_features=n_mrmr,
    sample_rows=sample_rows,
    mi_subset_rows=mi_subset_rows,
    redundancy_penalty=redundancy_penalty,
    oversample_factor=oversample_factor
)
print(f"[pipeline] Ranked features: {len(ranked_features)}")

# 2. Slice training matrix to ranked features ONLY for AGWO
X_tr_ranked = X_tr[:, ranked_features]

selected_features = agwo_feature_selection_reduced(
    X_tr_ranked, Y_tr_ohe, ranked_features,
    n_wolves=n_wolves, n_iter=n_iter, subset_size=subset_size
)
print(f"[pipeline] AGWO selected {len(selected_features)} features.")

# 4. Apply selection to full (train+val) without building giant X_full first
X_tr_sel = X_tr[:, selected_features]
X_va_sel = X_va[:, selected_features]
y_full = np.argmax(np.vstack([Y_tr_ohe, Y_va_ohe]), axis=1)
X_full_sel = np.vstack([X_tr_sel, X_va_sel])

# 5. Train/test split
X_train, X_test, y_train, y_test = train_test_split(
    X_full_sel, y_full, test_size=0.20, random_state=SEED, stratify=y_full
)

print(f"[pipeline] Train {X_train.shape}, Test {X_test.shape}, total time {time.time()-t_total:.2f}s")

# Cleanup
del X_tr_ranked, X_tr_sel, X_va_sel
gc.collect()

print(f"Training set shape: {X_train.shape}")
print(f"Test set shape: {X_test.shape}")
print(f"Training class distribution: {np.bincount(y_train)}")
print(f"Test class distribution: {np.bincount(y_test)}")

[approx-mRMR] After variance filter: 51200 features


[approx-mRMR] Selected 250 features in 686.83s
[pipeline] Ranked features: 250
[AGWO] iter 1/8 k=30 alpha_fit=0.8834 best=0.8834 cache=8
[AGWO] iter 2/8 k=30 alpha_fit=0.8834 best=0.8834 cache=13
[AGWO] iter 3/8 k=30 alpha_fit=0.8834 best=0.8834 cache=18
[AGWO] iter 4/8 k=30 alpha_fit=0.8857 best=0.8857 cache=23
[AGWO] iter 5/8 k=30 alpha_fit=0.8857 best=0.8857 cache=28
[AGWO] iter 6/8 k=30 alpha_fit=0.8857 best=0.8857 cache=33
[AGWO] iter 7/8 k=30 alpha_fit=0.8857 best=0.8857 cache=38
[AGWO] iter 8/8 k=30 alpha_fit=0.8857 best=0.8857 cache=43
[AGWO] Finished: selected 30 features; best_fitness=0.8857
[pipeline] AGWO selected 30 features.
[pipeline] Train (12000, 30), Test (3000, 30), total time 725.65s
Training set shape: (12000, 30)
Test set shape: (3000, 30)
Training class distribution: [4000 4000 4000]
Test class distribution: [1000 1000 1000]


In [19]:
# Initialize Classifiers
!pip install xgboost
from xgboost import XGBClassifier
from sklearn.linear_model import LogisticRegression

knn = KNeighborsClassifier(n_neighbors=5, weights='distance')
svm = SVC(kernel='rbf', probability=True, C=1.0, gamma='scale', random_state=SEED)
rf  = RandomForestClassifier(n_estimators=300, random_state=SEED, n_jobs=-1)
xgb = XGBClassifier(n_estimators=200, random_state=SEED, use_label_encoder=False, eval_metric='mlogloss')
lr  = LogisticRegression(max_iter=1000, random_state=SEED, n_jobs=-1)

print("Classifiers initialized:")
print(f"  KNN: k=5, weights='distance'")
print(f"  SVM: RBF kernel, C=1.0, gamma='scale'")
print(f"  Random Forest: 300 trees")
print(f"  XGBoost: 200 estimators")
print(f"  Logistic Regression: max_iter=1000")

Classifiers initialized:
  KNN: k=5, weights='distance'
  SVM: RBF kernel, C=1.0, gamma='scale'
  Random Forest: 300 trees
  XGBoost: 200 estimators
  Logistic Regression: max_iter=1000


In [20]:
# Train Classifiers
print("Training classifiers …")

print("  Training KNN...")
knn.fit(X_train, y_train)

print("  Training SVM...")
svm.fit(X_train, y_train)

print("  Training Random Forest...")
rf.fit(X_train, y_train)

print("  Training XGBoost...")
xgb.fit(X_train, y_train)

print("  Training Logistic Regression...")
lr.fit(X_train, y_train)

print("All classifiers trained successfully!")

Training classifiers …
  Training KNN...
  Training SVM...
  Training Random Forest...
  Training XGBoost...


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


  Training Logistic Regression...
All classifiers trained successfully!


In [21]:
# Make Predictions
print("Making predictions...")

knn_pred = knn.predict(X_test)
svm_pred = svm.predict(X_test)
rf_pred  = rf.predict(X_test)
xgb_pred = xgb.predict(X_test)
lr_pred  = lr.predict(X_test)

# Probabilistic predictions (for ensemble if needed)
knn_proba = knn.predict_proba(X_test) if hasattr(knn, 'predict_proba') else None
svm_proba = svm.predict_proba(X_test) if hasattr(svm, 'predict_proba') else None
rf_proba  = rf.predict_proba(X_test) if hasattr(rf, 'predict_proba') else None
xgb_proba = xgb.predict_proba(X_test) if hasattr(xgb, 'predict_proba') else None
lr_proba  = lr.predict_proba(X_test) if hasattr(lr, 'predict_proba') else None

print("Predictions completed!")

Making predictions...
Predictions completed!


In [22]:
# Individual Classifier Results
print("Individual Classifier Accuracies:")
knn_acc = accuracy_score(y_test, knn_pred)
svm_acc = accuracy_score(y_test, svm_pred)
rf_acc = accuracy_score(y_test, rf_pred)
xgb_acc = accuracy_score(y_test, xgb_pred)
lr_acc = accuracy_score(y_test, lr_pred)

print(f"  KNN: {knn_acc:.4f}")
print(f"  SVM: {svm_acc:.4f}")
print(f"  RF : {rf_acc:.4f}")
print(f"  XGB: {xgb_acc:.4f}")
print(f"  LR : {lr_acc:.4f}")

# Display individual classification reports
target_names = [id2label[i] for i in range(num_classes)]

print("\n=== KNN Classification Report ===")
print(classification_report(y_test, knn_pred, target_names=target_names))

print("\n=== SVM Classification Report ===")
print(classification_report(y_test, svm_pred, target_names=target_names))

print("\n=== Random Forest Classification Report ===")
print(classification_report(y_test, rf_pred, target_names=target_names))

print("\n=== XGBoost Classification Report ===")
print(classification_report(y_test, xgb_pred, target_names=target_names))

print("\n=== Logistic Regression Classification Report ===")
print(classification_report(y_test, lr_pred, target_names=target_names))

Individual Classifier Accuracies:
  KNN: 0.9307
  SVM: 0.9447
  RF : 0.9487
  XGB: 0.9543
  LR : 0.9420

=== KNN Classification Report ===
              precision    recall  f1-score   support

    lung_aca       0.89      0.90      0.90      1000
      lung_n       0.99      0.99      0.99      1000
    lung_scc       0.91      0.90      0.90      1000

    accuracy                           0.93      3000
   macro avg       0.93      0.93      0.93      3000
weighted avg       0.93      0.93      0.93      3000


=== SVM Classification Report ===
              precision    recall  f1-score   support

    lung_aca       0.93      0.90      0.92      1000
      lung_n       0.99      0.99      0.99      1000
    lung_scc       0.91      0.94      0.93      1000

    accuracy                           0.94      3000
   macro avg       0.94      0.94      0.94      3000
weighted avg       0.94      0.94      0.94      3000


=== Random Forest Classification Report ===
              preci

In [23]:
# Ensemble Fusion (Priority-Based Strategy)
# Priority: SVM > XGBoost > RF > KNN > LR
# If SVM and XGBoost agree, use that prediction. Else, use SVM. If not, use XGBoost. Else, fallback to majority vote.
def priority_ensemble(svm_pred, xgb_pred, rf_pred, knn_pred, lr_pred):
    preds = np.stack([knn_pred, svm_pred, rf_pred, xgb_pred, lr_pred], axis=0)
    final = []
    for i in range(svm_pred.shape[0]):
        if svm_pred[i] == xgb_pred[i]:
            final.append(svm_pred[i])
        elif svm_pred[i] == rf_pred[i]:
            final.append(svm_pred[i])
        elif xgb_pred[i] == rf_pred[i]:
            final.append(xgb_pred[i])
        else:
            # fallback to majority vote
            vals, counts = np.unique(preds[:, i], return_counts=True)
            final.append(vals[np.argmax(counts)])
    return np.array(final)

ens = priority_ensemble(svm_pred, xgb_pred, rf_pred, knn_pred, lr_pred)
ens_acc = accuracy_score(y_test, ens)

print(f"Ensemble Accuracy (Priority-Based): {ens_acc:.4f}")
print(f"\nImprovement over best individual: {ens_acc - max(knn_acc, svm_acc, rf_acc, xgb_acc, lr_acc):.4f}")

print("\n=== Ensemble Classification Report ===")
print(classification_report(y_test, ens, target_names=target_names))

Ensemble Accuracy (Priority-Based): 0.9513

Improvement over best individual: -0.0030

=== Ensemble Classification Report ===
              precision    recall  f1-score   support

    lung_aca       0.94      0.91      0.93      1000
      lung_n       1.00      1.00      1.00      1000
    lung_scc       0.92      0.95      0.93      1000

    accuracy                           0.95      3000
   macro avg       0.95      0.95      0.95      3000
weighted avg       0.95      0.95      0.95      3000



In [24]:
# Weighted-Average Ensemble Method (Performance-Ranked)
import numpy as np

# 1. Gather classifier predictions and accuracies
classifier_preds = [knn_pred, svm_pred, rf_pred, xgb_pred, lr_pred]
classifier_accs = [knn_acc, svm_acc, rf_acc, xgb_acc, lr_acc]
classifier_names = ['KNN', 'SVM', 'RF', 'XGB', 'LR']

# 2. Rank classifiers by accuracy (descending)
ranked_indices = np.argsort(classifier_accs)[::-1]
ranked_accs = [classifier_accs[i] for i in ranked_indices]
ranked_preds = [classifier_preds[i] for i in ranked_indices]
ranked_names = [classifier_names[i] for i in ranked_indices]

print('Classifier ranking (best to worst):')
for i, name in enumerate(ranked_names):
    print(f'  {i+1}. {name} (acc={ranked_accs[i]:.4f})')

# 3. Calculate intermediate scores T_j
T = [1.0]
for j in range(1, len(ranked_accs)):
    T.append(T[-1] * ranked_accs[j-1])

# 4. Normalize to get weights epsilon_j
T_sum = sum(T)
weights = [t / T_sum for t in T]

print('Classifier weights (epsilon_j):')
for i, (name, w) in enumerate(zip(ranked_names, weights)):
    print(f'  {name}: {w:.4f}')

# 5. Weighted voting for each test sample
n_classes = num_classes
n_samples = len(y_test)
weighted_votes = np.zeros((n_samples, n_classes))

for clf_idx, (pred, w) in enumerate(zip(ranked_preds, weights)):
    for i in range(n_samples):
        weighted_votes[i, pred[i]] += w

weighted_ensemble_pred = np.argmax(weighted_votes, axis=1)

weighted_ens_acc = accuracy_score(y_test, weighted_ensemble_pred)

print(f'Weighted-Average Ensemble Accuracy: {weighted_ens_acc:.4f}')
print(f'\nImprovement over best individual: {weighted_ens_acc - ranked_accs[0]:.4f}')

print('\n=== Weighted-Average Ensemble Classification Report ===')
print(classification_report(y_test, weighted_ensemble_pred, target_names=target_names))

Classifier ranking (best to worst):
  1. XGB (acc=0.9543)
  2. RF (acc=0.9487)
  3. SVM (acc=0.9447)
  4. LR (acc=0.9420)
  5. KNN (acc=0.9307)
Classifier weights (epsilon_j):
  XGB: 0.2212
  RF: 0.2111
  SVM: 0.2003
  LR: 0.1892
  KNN: 0.1782
Weighted-Average Ensemble Accuracy: 0.9513

Improvement over best individual: -0.0030

=== Weighted-Average Ensemble Classification Report ===
              precision    recall  f1-score   support

    lung_aca       0.94      0.91      0.93      1000
      lung_n       0.99      1.00      1.00      1000
    lung_scc       0.92      0.94      0.93      1000

    accuracy                           0.95      3000
   macro avg       0.95      0.95      0.95      3000
weighted avg       0.95      0.95      0.95      3000



In [25]:

# filepath: /lung_cancer/code_multihead.ipynb
# ...existing code...
# Summary Results
print("\n" + "="*60)
print("FINAL RESULTS SUMMARY")
print("="*60)

print(f"Total samples processed: {len(y_full)}")

# Resolve selected features list (legacy variable fallback)
if 'selected_features' in globals():
    sel_list = selected_features
elif 'sel_idx' in globals():
    sel_list = sel_idx
elif 'feature_subset' in globals():
    sel_list = feature_subset
else:
    sel_list = []

# Try to infer original feature count
if 'X_tr_original' in globals():
    orig_feat_total = X_tr_original.shape[1]
elif 'X_tr' in globals():
    orig_feat_total = X_tr.shape[1]
elif 'X_full' in globals():
    orig_feat_total = X_full.shape[1]
else:
    # Fallback to selected count (prevents division error)
    orig_feat_total = max(len(sel_list), 1)

selected_count = len(sel_list)
pct = (selected_count / orig_feat_total) if orig_feat_total else 0.0
print(f"Features selected by AGWO: {selected_count} / {orig_feat_total} ({pct:.1%})")

print(f"Test set size: {len(y_test)}")
print("\nClassifier Accuracies:")
print(f"  KNN:               {knn_acc:.4f}")
print(f"  SVM:               {svm_acc:.4f}")
print(f"  Random Forest:     {rf_acc:.4f}")
print(f"  Ensemble (Fusion): {ens_acc:.4f} ← BEST")

print("\nClass Labels:")
for i, label in id2label.items():
    print(f"  {i}: {label}")
print("="*60)
# ...existing code...


FINAL RESULTS SUMMARY
Total samples processed: 15000
Features selected by AGWO: 30 / 51200 (0.1%)
Test set size: 3000

Classifier Accuracies:
  KNN:               0.9307
  SVM:               0.9447
  Random Forest:     0.9487
  Ensemble (Fusion): 0.9513 ← BEST

Class Labels:
  0: lung_aca
  1: lung_n
  2: lung_scc
