# Lung Histopathology Classification: ACA / N / SCC
## Multi-CNN + Channel Attention + GA + KNN/SVM/RF + Fusion

This notebook implements a comprehensive lung histopathology classification system that combines:
- Multiple CNN backbones (DenseNet121, ResNet50, VGG16)
- Channel attention mechanism (SE blocks)
- Genetic Algorithm for feature selection
- Ensemble of classical ML classifiers (KNN, SVM, Random Forest)
- Majority voting fusion

In [2]:
!pip install -r requirements.txt


Collecting numpy==1.26.4 (from -r requirements.txt (line 1))
  Downloading numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
Collecting pandas (from -r requirements.txt (line 2))
  Downloading pandas-2.3.3-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl.metadata (91 kB)
Collecting scipy (from -r requirements.txt (line 3))
  Downloading scipy-1.16.3-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (62 kB)
Collecting tensorflow==2.16.1 (from -r requirements.txt (line 4))
  Downloading tensorflow-2.16.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.3 kB)
Collecting scikit-learn (from -r requirements.txt (line 5))
  Downloading scikit_learn-1.7.2-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (11 kB)
Collecting deap (from -r requirements.txt (line 6))
  Downloading deap-1.4.3-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.met

In [3]:
# Import required libraries
import os, random
import numpy as np
import pandas as pd
import tensorflow as tf

from tensorflow.keras.layers import (Input, GlobalAveragePooling2D, GlobalMaxPooling2D,
                                     Concatenate, Dense, Reshape, Multiply, Lambda)
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.image import ImageDataGenerator

from tensorflow.keras.applications import DenseNet121, ResNet50, EfficientNetB0, InceptionV3
from tensorflow.keras.applications.densenet import preprocess_input as pre_densenet
from tensorflow.keras.applications.resnet import preprocess_input as pre_resnet
from tensorflow.keras.applications.efficientnet import preprocess_input as pre_efficientnet
from tensorflow.keras.applications.inception_v3 import preprocess_input as pre_inception

from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score

print("All libraries imported successfully!")

2025-10-30 17:50:19.071181: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-10-30 17:50:19.154859: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


All libraries imported successfully!


In [4]:
# GPU Detection and Configuration for L4
import tensorflow as tf

print("=" * 70)
print("🔍 L4 GPU DETECTION & CONFIGURATION")
print("=" * 70)

print(f"TensorFlow version: {tf.__version__}")
print(f"Built with CUDA: {tf.test.is_built_with_cuda()}")

# Check for GPUs
gpus = tf.config.list_physical_devices('GPU')
print(f"\n📱 Physical GPUs: {gpus}")

if gpus:
    try:
        tf.config.set_visible_devices(gpus, 'GPU')
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        print(f"✅ {len(gpus)} GPU(s) configured with memory growth")
        # Test GPU matmul
        with tf.device('/GPU:0'):
            a = tf.constant([[1.0, 2.0], [3.0, 4.0]])
            b = tf.matmul(a, a)
        print("✅ GPU test computation:")
        print(b.numpy())
    except RuntimeError as e:
        print(f"⚠️ GPU configuration warning: {e}")
else:
    print("❌ NO GPU DETECTED (running on CPU)")
    print("If you expect GPU, install CUDA-enabled TF and restart kernel:")
    print("  !pip install --upgrade tensorflow[and-cuda]==2.15.0")

# Show logical devices
logical = tf.config.list_logical_devices('GPU')
print(f"\n🎯 Logical GPU devices: {logical}")
print("=" * 70)

🔍 L4 GPU DETECTION & CONFIGURATION
TensorFlow version: 2.16.1
Built with CUDA: True

📱 Physical GPUs: []
❌ NO GPU DETECTED (running on CPU)
If you expect GPU, install CUDA-enabled TF and restart kernel:
  !pip install --upgrade tensorflow[and-cuda]==2.15.0

🎯 Logical GPU devices: []


2025-10-30 17:50:25.948343: W tensorflow/core/common_runtime/gpu/gpu_device.cc:2251] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


In [5]:
# Quick diagnostic - Run this if GPU not detected
# Check NVIDIA driver and CUDA availability
import subprocess
import sys

print("🔍 NVIDIA/CUDA Diagnostic\n")

# Check nvidia-smi
try:
    result = subprocess.run(['nvidia-smi'], capture_output=True, text=True, timeout=5)
    if result.returncode == 0:
        print("✅ NVIDIA driver detected:")
        print(result.stdout)
    else:
        print("❌ nvidia-smi failed")
except FileNotFoundError:
    print("❌ nvidia-smi not found - NVIDIA driver may not be installed")
except Exception as e:
    print(f"❌ Error running nvidia-smi: {e}")

# Check CUDA libraries
print("\n🔍 Checking CUDA libraries...")
try:
    import ctypes
    ctypes.CDLL('libcudart.so')
    print("✅ libcudart.so found (CUDA runtime)")
except:
    print("❌ libcudart.so not found")
    
try:
    import ctypes
    ctypes.CDLL('libcublas.so')
    print("✅ libcublas.so found (CUDA BLAS)")
except:
    print("❌ libcublas.so not found")

print("\n💡 If CUDA libraries are missing, install tensorflow[and-cuda]:")
print("   !pip install --upgrade tensorflow[and-cuda]==2.15.0")
print("   Then RESTART THE KERNEL")

🔍 NVIDIA/CUDA Diagnostic

✅ NVIDIA driver detected:
Thu Oct 30 17:50:37 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 570.195.03             Driver Version: 570.195.03     CUDA Version: 12.8     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA L4                      Off |   00000000:00:04.0 Off |                    0 |
| N/A   38C    P8             16W /   72W |       3MiB /  23034MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+--------------------

In [6]:
!pip install nvidia-cudnn-cu12  # If needed


Collecting nvidia-cudnn-cu12
  Downloading nvidia_cudnn_cu12-9.14.0.64-py3-none-manylinux_2_27_x86_64.whl.metadata (1.8 kB)
Collecting nvidia-cublas-cu12 (from nvidia-cudnn-cu12)
  Downloading nvidia_cublas_cu12-12.9.1.4-py3-none-manylinux_2_27_x86_64.whl.metadata (1.7 kB)
Downloading nvidia_cudnn_cu12-9.14.0.64-py3-none-manylinux_2_27_x86_64.whl (647.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m647.1/647.1 MB[0m [31m78.7 MB/s[0m  [33m0:00:05[0mm0:00:01[0m00:01[0m
[?25hDownloading nvidia_cublas_cu12-12.9.1.4-py3-none-manylinux_2_27_x86_64.whl (581.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m581.2/581.2 MB[0m [31m56.7 MB/s[0m  [33m0:00:06[0mm0:00:01[0m00:01[0m
[?25hInstalling collected packages: nvidia-cublas-cu12, nvidia-cudnn-cu12
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2/2[0m [nvidia-cudnn-cu12]nvidia-cudnn-cu12]
[1A[2KSuccessfully installed nvidia-cublas-cu12-12.9.1.4 nvidia-cudnn-cu12-9.14.0.64


In [8]:
import tensorflow as tf

# Check GPU availability
print("Num GPUs Available:", len(tf.config.list_physical_devices('GPU')))

# Test GPU computation
with tf.device('/GPU:0'):
    a = tf.constant([[1.0, 2.0], [3.0, 4.0]])
    b = tf.constant([[1.0, 1.0], [0.0, 1.0]])
    c = tf.matmul(a, b)
    print("GPU computation successful:", c)

Num GPUs Available: 0
GPU computation successful: tf.Tensor(
[[1. 3.]
 [3. 7.]], shape=(2, 2), dtype=float32)


In [9]:
# Configuration and Data Setup - OPTIMIZED FOR L4 GPU
DATA_DIR   = "/teamspace/studios/this_studio/lung_cancer/dataset/lung_image_sets"  # << set this
IMG_SIZE   = (224, 224)
BATCH_SIZE = 64  # OPTIMIZED: Increased from 24 for L4 GPU (16GB VRAM)
SEED       = 42

# Set random seeds for reproducibility
random.seed(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)

# Check if GPU is available before enabling optimizations
gpus = tf.config.list_physical_devices('GPU')

if gpus:
    print(f'✅ {len(gpus)} GPU(s) detected - Enabling GPU optimizations')
    
    try:
        # Enable memory growth to prevent OOM errors
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        print('✅ GPU memory growth enabled')
    except RuntimeError as e:
        print(f'⚠️ GPU config warning: {e}')
    
    # OPTIMIZATION: Enable mixed precision for 2-3x speedup on L4
    from tensorflow.keras import mixed_precision
    policy = mixed_precision.Policy('mixed_float16')
    mixed_precision.set_global_policy(policy)
    print('✅ Mixed precision enabled (float16 compute, float32 variables)')
    
    print(f"🚀 L4 GPU Configuration:")
    print(f"   • Batch Size: {BATCH_SIZE} (optimized for L4 16GB VRAM)")
    print(f"   • Mixed Precision: Enabled")
    print(f"   • Memory Growth: Enabled")
else:
    print('⚠️ No GPU detected - Running on CPU')
    print('   If you expect GPU, please:')
    print('   1. Run the diagnostic cell above')
    print('   2. Install: !pip install tensorflow[and-cuda]==2.15.0')
    print('   3. Restart kernel')
    BATCH_SIZE = 24  # Reduce batch size for CPU
    print(f'   Batch size reduced to {BATCH_SIZE} for CPU')

print(f"\n📋 Configuration:")
print(f"   Data Directory: {DATA_DIR}")
print(f"   Image Size: {IMG_SIZE}")
print(f"   Batch Size: {BATCH_SIZE}")
print(f"   Random Seed: {SEED}")

⚠️ No GPU detected - Running on CPU
   If you expect GPU, please:
   1. Run the diagnostic cell above
   2. Install: !pip install tensorflow[and-cuda]==2.15.0
   3. Restart kernel
   Batch size reduced to 24 for CPU

📋 Configuration:
   Data Directory: /teamspace/studios/this_studio/lung_cancer/dataset/lung_image_sets
   Image Size: (224, 224)
   Batch Size: 24
   Random Seed: 42


In [10]:
# Number of attention heads for multi-head channel attention
NUM_ATTENTION_HEADS = 8

In [11]:
train_datagen = ImageDataGenerator(
    validation_split=0.20,
    rotation_range=20,
    horizontal_flip=True,
    # IMPORTANT: no rescale here, since we feed raw to model-specific preprocessors
)

def make_gen(subset):
    return train_datagen.flow_from_directory(
        DATA_DIR,
        target_size=IMG_SIZE,
        class_mode='categorical',
        batch_size=BATCH_SIZE,
        subset=subset,
        seed=SEED,
        shuffle=True
    )

train_gen = make_gen('training')
val_gen   = make_gen('validation')
num_classes = train_gen.num_classes
class_indices = train_gen.class_indices
id2label = {v:k for k,v in class_indices.items()}

print("Classes:", class_indices)
print(f"Number of classes: {num_classes}")
print(f"Training samples: {train_gen.samples}")
print(f"Validation samples: {val_gen.samples}")

Found 12000 images belonging to 3 classes.
Found 3000 images belonging to 3 classes.
Classes: {'lung_aca': 0, 'lung_n': 1, 'lung_scc': 2}
Number of classes: 3
Training samples: 12000
Validation samples: 3000


In [12]:
# Channel Attention (Multi-Headed) Implementation - GPU OPTIMIZED
import tensorflow as tf
from tensorflow.keras.layers import Layer, Dense

print("=" * 60)
print("GPU CONFIGURATION")
print("=" * 60)

gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        print(f"✓ {len(gpus)} GPU(s) detected and configured")
        print(f"  Devices: {[gpu.name for gpu in gpus]}")
    except RuntimeError as e:
        print(f"✗ GPU configuration error: {e}")
else:
    print("✗ No GPU detected - will use CPU")

print(f"\nTensorFlow: {tf.__version__}")
print(f"CUDA support: {tf.test.is_built_with_cuda()}")
print("=" * 60 + "\n")

class MultiHeadChannelAttention(Layer):
    def __init__(self, num_heads=4, reduction=16, **kwargs):
        super().__init__(**kwargs)
        self.num_heads = num_heads
        self.reduction = reduction

    def build(self, input_shape):
        self.channel = input_shape[-1]
        reduced_channels = max(self.channel // self.reduction, 1)
        self.dense1 = Dense(self.num_heads * reduced_channels, activation='relu', name=f'{self.name}_d1')
        self.dense2 = Dense(self.num_heads * self.channel, name=f'{self.name}_d2')
        super().build(input_shape)

    def call(self, x):
        batch_size = tf.shape(x)[0]
        gap = tf.reduce_mean(x, axis=[1,2])
        gmp = tf.reduce_max(x, axis=[1,2])
        gap_feat = self.dense1(gap)
        gmp_feat = self.dense1(gmp)
        gap_attn = self.dense2(gap_feat)
        gmp_attn = self.dense2(gmp_feat)
        combined = tf.reshape(gap_attn + gmp_attn, [batch_size, self.num_heads, self.channel])
        attention = tf.nn.sigmoid(tf.reduce_mean(combined, axis=1))
        attention = tf.reshape(attention, [batch_size, 1, 1, self.channel])
        return x * attention


def multi_head_attention_block(x, reduction=16, name=None):
    NUM_ATTENTION_HEADS = 4
    return MultiHeadChannelAttention(num_heads=NUM_ATTENTION_HEADS, reduction=reduction, name=name)(x)

print("✓ Multi-head attention block ready (GPU-optimized)!")

GPU CONFIGURATION
✗ No GPU detected - will use CPU

TensorFlow: 2.16.1
CUDA support: True

✓ Multi-head attention block ready (GPU-optimized)!


In [13]:
# Lane function with GPU-accelerated backbones
from tensorflow.keras.layers import Lambda, GlobalAveragePooling2D
from tensorflow.keras.applications import ResNet50, DenseNet121, EfficientNetB0, InceptionV3

def lane(tensor, backbone="resnet", reduction=16):
    """Create a processing lane for each CNN backbone with multi-head channel attention (GPU-optimized)"""
    if backbone == "resnet":
        x = Lambda(pre_resnet, name="pre_resnet")(tensor)
        x = ResNet50(include_top=False, weights='imagenet')(x)
    elif backbone == "densenet":
        x = Lambda(pre_densenet, name="pre_densenet")(tensor)
        x = DenseNet121(include_top=False, weights='imagenet')(x)
    elif backbone == "efficientnet":
        x = Lambda(pre_efficientnet, name="pre_efficientnet")(tensor)
        x = EfficientNetB0(include_top=False, weights='imagenet')(x)
    elif backbone == "inception":
        x = Lambda(pre_inception, name="pre_inception")(tensor)
        x = InceptionV3(include_top=False, weights='imagenet')(x)
    else:
        raise ValueError(f'Unknown backbone: {backbone}')
    
    x = multi_head_attention_block(x, reduction=reduction, name=f"mhca_{backbone}")
    x = GlobalAveragePooling2D(name=f"gap_{backbone}")(x)
    return x

print("✓ Lane function ready with GPU-optimized multi-head attention!")

✓ Lane function ready with GPU-optimized multi-head attention!


In [14]:
# Build Feature Extractor Model
print("Building multi-backbone feature concatenator with multi-head attention...")

# Define input tensor with image size (224x224x3 RGB)
inp = Input(shape=(224,224,3))

# Extract features from DenseNet lane (multi-head attention)
feat_d = lane(inp, "densenet", reduction=16)
# Extract features from ResNet lane (multi-head attention)
feat_r = lane(inp, "resnet", reduction=16)
# Extract features from EfficientNetB0 lane (multi-head attention)
feat_e = lane(inp, "efficientnet", reduction=16)
# Extract features from InceptionV3 lane (multi-head attention)
feat_i = lane(inp, "inception", reduction=16)

# Concatenate features from all four backbones
concat_feat = Concatenate(name="concat_feats")([feat_d, feat_r, feat_e, feat_i])

# Create feature extractor model (input → concatenated features)
feature_model = Model(inp, concat_feat)

# Get final concatenated feature dimension
feature_dim = feature_model.output_shape[-1]

print(f"Feature extractor built successfully!")
print(f"Feature dimension: {feature_dim}")

# Show model summary (layers, parameters, shapes)
feature_model.summary()


Building multi-backbone feature concatenator with multi-head attention...


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/densenet/densenet121_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m29084464/29084464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m94765736/94765736[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step
Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb0_notop.h5
[1m16705208/16705208[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/inception_v3/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m87910968/87910968[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step
Feature extractor built successfully!
Feature dimension: 6400


In [15]:
# Extract Deep Features with GPU Optimization - OPTIMIZED
from math import ceil

def extract_features(generator):
    """Extract features with GPU acceleration and optimized batching"""
    import time
    
    print("🚀 Starting GPU-optimized feature extraction...")
    if tf.config.list_physical_devices('GPU'):
        print("🔥 Using L4 GPU with mixed precision (if enabled)")
        with tf.device('/GPU:0'):
            return _extract_features_impl(generator)
    else:
        print("💻 Using CPU (GPU not available)")
        return _extract_features_impl(generator)

def _extract_features_impl(generator):
    import time
    X, y = [], []
    steps = len(generator)
    start_time = time.time()
    
    for i in range(steps):
        imgs, labels = next(generator)
        feats = feature_model.predict(imgs, verbose=0)
        X.append(feats)
        y.append(labels)
        
        if (i + 1) % 20 == 0:
            elapsed = time.time() - start_time
            avg = elapsed / (i + 1)
            eta = (steps - (i + 1)) * avg
            print(f"📊 [{i + 1}/{steps}] Avg batch: {avg:.2f}s | ETA: {eta/60:.1f}m")
    
    total = time.time() - start_time
    print(f"✅ Feature extraction: {total/60:.2f} min ({total/steps:.2f}s/batch)")
    return np.vstack(X), np.vstack(y)

print("✅ GPU-optimized feature extraction ready!")

✅ GPU-optimized feature extraction ready!


In [16]:
# Extract Training Features
print("Extracting training features …")
X_tr, Y_tr_ohe = extract_features(train_gen)
print(f"Training features shape: {X_tr.shape}")
print(f"Training labels shape: {Y_tr_ohe.shape}")

Extracting training features …
🚀 Starting GPU-optimized feature extraction...
💻 Using CPU (GPU not available)


📊 [20/500] Avg batch: 3.01s | ETA: 24.1m
📊 [40/500] Avg batch: 2.69s | ETA: 20.6m
📊 [60/500] Avg batch: 2.58s | ETA: 18.9m
📊 [80/500] Avg batch: 2.51s | ETA: 17.6m
📊 [100/500] Avg batch: 2.49s | ETA: 16.6m
📊 [120/500] Avg batch: 2.46s | ETA: 15.6m
📊 [140/500] Avg batch: 2.43s | ETA: 14.6m
📊 [160/500] Avg batch: 2.41s | ETA: 13.7m
📊 [180/500] Avg batch: 2.39s | ETA: 12.7m
📊 [200/500] Avg batch: 2.37s | ETA: 11.8m
📊 [220/500] Avg batch: 2.35s | ETA: 11.0m
📊 [240/500] Avg batch: 2.34s | ETA: 10.1m
📊 [260/500] Avg batch: 2.32s | ETA: 9.3m
📊 [280/500] Avg batch: 2.31s | ETA: 8.5m
📊 [300/500] Avg batch: 2.30s | ETA: 7.7m
📊 [320/500] Avg batch: 2.29s | ETA: 6.9m
📊 [340/500] Avg batch: 2.30s | ETA: 6.1m
📊 [360/500] Avg batch: 2.29s | ETA: 5.3m
📊 [380/500] Avg batch: 2.28s | ETA: 4.6m
📊 [400/500] Avg batch: 2.29s | ETA: 3.8m
📊 [420/500] Avg batch: 2.29s | ETA: 3.1m
📊 [440/500] Avg batch: 2.29s | ETA: 2.3m
📊 [460/500] Avg batch: 2.28s | ETA: 1.5m
📊 [480/500] Avg batch: 2.28s | ETA: 0.8m
📊 [500/5

In [17]:
## 1. TRUE mRMR Feature Ranking - OPTIMIZED
try:
    import pymrmr
    print("pymrmr imported successfully")
except ImportError:
    print("WARNING: pymrmr not available. Install with: conda install pymrmr -c conda-forge")
    print("Falling back to mutual information ranking only.")
    pymrmr = None

import numpy as np, pandas as pd, time, gc
from sklearn.feature_selection import mutual_info_classif, VarianceThreshold
from sklearn.preprocessing import StandardScaler

def true_mrmr_feature_selection(X, y_ohe, n_features=1000, sample_rows=1500, var_thresh=0.01):
    """
    OPTIMIZED TRUE mRMR implementation with reduced sampling
    """
    t0 = time.time()
    y = np.argmax(y_ohe, axis=1)
    n_samples, n_feats = X.shape
    
    # Variance filter to remove low-variance features
    if var_thresh > 0:
        vt = VarianceThreshold(var_thresh)
        X_filtered = vt.fit_transform(X)
        kept_indices = np.where(vt.get_support())[0]
    else:
        X_filtered = X
        kept_indices = np.arange(n_feats)
    
    print(f"[mRMR] After variance filter: {len(kept_indices)} features")
    
    # OPTIMIZATION: Reduced row sampling for speed
    if sample_rows and sample_rows < X_filtered.shape[0]:
        rng = np.random.default_rng(42)
        rows = rng.choice(X_filtered.shape[0], size=sample_rows, replace=False)
        X_sample = X_filtered[rows]
        y_sample = y[rows]
    else:
        X_sample = X_filtered
        y_sample = y
    
    # Apply TRUE mRMR if available, otherwise fall back to MI
    if pymrmr is not None:
        try:
            # Create DataFrame for pymrmr
            feature_names = [f'feature_{i}' for i in range(X_sample.shape[1])]
            df = pd.DataFrame(X_sample, columns=feature_names)
            df['target'] = y_sample
            
            selected_features = pymrmr.mRMR(df, 'MIQ', n_features)
            # Convert feature names back to indices
            selected_indices = [int(f.split('_')[1]) for f in selected_features]
            # Map back to original feature indices
            final_indices = [kept_indices[i] for i in selected_indices]
            
            print(f"[TRUE-mRMR] Selected {len(final_indices)} features in {time.time()-t0:.2f}s")
            return final_indices
            
        except Exception as e:
            print(f"[TRUE-mRMR] Error: {e}. Falling back to mutual information ranking.")
            
    # Fallback to MI-based ranking
    mi_scores = mutual_info_classif(X_sample, y_sample, discrete_features=False, random_state=42, n_jobs=-1)
    ranked_indices = np.argsort(mi_scores)[::-1]
    selected_indices = ranked_indices[:n_features]
    final_indices = [kept_indices[i] for i in selected_indices]
    
    print(f"[MI-Ranking] Selected {len(final_indices)} features in {time.time()-t0:.2f}s")
    return final_indices

print("✅ Optimized mRMR feature selection ready!")


## 2. Enhanced Adaptive Grey Wolf Optimization (AGWO) - OPTIMIZED
import numpy as np, gc, hashlib
from sklearn.model_selection import StratifiedKFold
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

def _subset_hash(idxs):
    return hashlib.md5(np.asarray(idxs, dtype=np.int32).tobytes()).hexdigest()

def enhanced_agwo_feature_selection(
    X_ranked,
    y_ohe,
    ranked_global_indices,
    n_wolves=20,  # OPTIMIZED: Reduced from 25
    n_iter=15,    # OPTIMIZED: Reduced from 30 with better convergence
    min_subset=500,
    max_subset=1500,  # OPTIMIZED: Reduced from 2000
    row_sample=2500,  # OPTIMIZED: Reduced from 3000
    knn_folds=3,      # OPTIMIZED: Reduced from 5
    rf_folds=2,       # OPTIMIZED: Kept at 2
    rf_max_features=400,  # OPTIMIZED: Reduced from 500
    penalty_weight=0.015,  # OPTIMIZED: Fine-tuned
    patience=6,           # OPTIMIZED: Reduced from 8
    random_state=42,
    verbose=True
):
    """
    OPTIMIZED Enhanced AGWO with reduced iterations and better convergence
    """
    rng = np.random.default_rng(random_state)
    y = np.argmax(y_ohe, axis=1)
    n_samples, n_feats = X_ranked.shape

    # Enhanced row subsampling (stratified)
    if row_sample and row_sample < n_samples:
        rows = []
        per_class = row_sample // len(np.unique(y))
        for cls in np.unique(y):
            cls_idx = np.where(y == cls)[0]
            take = min(per_class, len(cls_idx))
            rows.append(rng.choice(cls_idx, size=take, replace=False))
        rows = np.concatenate(rows)
    else:
        rows = np.arange(n_samples)

    X_fit = X_ranked[rows]
    y_fit = y[rows]

    # Wolves initialization with better diversity
    def init_position():
        vals = rng.random(n_feats)
        vals = vals * (1 + 0.5 * np.sin(np.arange(n_feats) * 0.1))
        return vals

    wolves = [init_position() for _ in range(n_wolves)]

    # OPTIMIZED: Logarithmic growth with steeper curve
    def subset_budget(iter_idx):
        log_factor = np.log(iter_idx + 2) / np.log(n_iter + 1)
        return int(min_subset + (max_subset - min_subset) * log_factor)

    # Enhanced fitness cache
    fitness_cache = {}

    def eval_subset(local_idx):
        if len(local_idx) < 2:
            return 0.0
        key_hash = _subset_hash(local_idx)
        if key_hash in fitness_cache:
            return fitness_cache[key_hash]

        # Enhanced feature selection for RF
        feat_slice = local_idx
        if len(feat_slice) > rf_max_features:
            feat_slice_rf = rng.choice(feat_slice, size=rf_max_features, replace=False)
        else:
            feat_slice_rf = feat_slice

        X_sub = X_fit[:, feat_slice]
        scaler = StandardScaler()
        X_sub = scaler.fit_transform(X_sub)

        # KNN CV with reduced folds
        skf_knn = StratifiedKFold(n_splits=knn_folds, shuffle=True, random_state=123)
        knn_scores = []
        knn = KNeighborsClassifier(n_neighbors=5, weights='distance', n_jobs=-1)
        for tr, va in skf_knn.split(X_sub, y_fit):
            knn.fit(X_sub[tr], y_fit[tr])
            pred = knn.predict(X_sub[va])
            knn_scores.append(accuracy_score(y_fit[va], pred))
        knn_acc = np.mean(knn_scores)

        # RF CV with reduced folds
        X_sub_rf = X_fit[:, feat_slice_rf]
        scaler_rf = StandardScaler()
        X_sub_rf = scaler_rf.fit_transform(X_sub_rf)
        skf_rf = StratifiedKFold(n_splits=rf_folds, shuffle=True, random_state=321)
        rf_scores = []
        rf = RandomForestClassifier(
            n_estimators=150,  # OPTIMIZED: Reduced from 200
            max_features='sqrt',
            n_jobs=-1,
            random_state=999
        )
        for tr, va in skf_rf.split(X_sub_rf, y_fit):
            rf.fit(X_sub_rf[tr], y_fit[tr])
            pred = rf.predict(X_sub_rf[va])
            rf_scores.append(accuracy_score(y_fit[va], pred))
        rf_acc = np.mean(rf_scores)

        # Fine-tuned penalty
        size_penalty = penalty_weight * (len(local_idx) / max_subset)
        fitness = 0.7 * knn_acc + 0.3 * rf_acc - size_penalty
        fitness_cache[key_hash] = fitness
        return fitness

    # Enhanced decoding with stability
    def decode(position, k):
        noisy_pos = position + rng.normal(0, 0.01, len(position))
        order = np.argpartition(noisy_pos, -k)[-k:]
        return order[np.argsort(-noisy_pos[order])]

    # Enhanced AGWO loop
    best_global_subset = None
    best_fitness = -1
    no_improve = 0

    for it in range(n_iter):
        k_budget = subset_budget(it)

        # Decode all wolves
        wolf_subsets = [decode(w, k_budget) for w in wolves]
        wolf_scores = [eval_subset(sub) for sub in wolf_subsets]

        # Identify alpha, beta, delta
        order = np.argsort(wolf_scores)[::-1]
        alpha, beta, delta = wolves[order[0]], wolves[order[1]], wolves[order[2]]
        alpha_subset = wolf_subsets[order[0]]
        alpha_score = wolf_scores[order[0]]

        if alpha_score > best_fitness:
            best_fitness = alpha_score
            best_global_subset = alpha_subset.copy()
            no_improve = 0
        else:
            no_improve += 1

        if verbose:
            print(f"[AGWO] iter {it+1}/{n_iter} k={k_budget} alpha={alpha_score:.4f} best={best_fitness:.4f} cache={len(fitness_cache)}")

        if no_improve >= patience:
            if verbose:
                print(f"[AGWO] Early stop (patience {patience})")
            break

        # OPTIMIZED: Steeper decay for faster convergence
        a = 2 * np.exp(-4 * (it / n_iter))

        # Enhanced wolf update
        new_wolves = []
        for idx, w in enumerate(wolves):
            if idx in order[:3]:
                new_wolves.append(w)
                continue
                
            A1 = 2 * a * rng.random(n_feats) - a
            C1 = 2 * rng.random(n_feats)
            A2 = 2 * a * rng.random(n_feats) - a
            C2 = 2 * rng.random(n_feats)
            A3 = 2 * a * rng.random(n_feats) - a
            C3 = 2 * rng.random(n_feats)

            D_alpha = np.abs(C1 * alpha - w)
            D_beta  = np.abs(C2 * beta  - w)
            D_delta = np.abs(C3 * delta - w)

            X1 = alpha - A1 * D_alpha
            X2 = beta  - A2 * D_beta
            X3 = delta - A3 * D_delta

            new_pos = (X1 + X2 + X3) / 3.0

            # Enhanced mutation
            if rng.random() < 0.15:
                mut_mask = rng.random(n_feats) < 0.005
                noise = rng.normal(0, 0.3, np.sum(mut_mask))
                new_pos[mut_mask] += noise

            new_pos = np.clip(new_pos, -2.0, 2.0)
            new_wolves.append(new_pos)

        # Diversity injection
        if no_improve == patience - 1:
            inject_count = max(2, n_wolves // 5)
            for _ in range(inject_count):
                ridx = rng.integers(3, n_wolves)
                new_wolves[ridx] = init_position()

        wolves = new_wolves

    # Map to global feature indices
    selected_global = [ranked_global_indices[i] for i in best_global_subset]

    if verbose:
        print(f"[AGWO] Complete: {len(selected_global)} features, fitness={best_fitness:.4f}")

    return selected_global

print("✅ Optimized AGWO feature selection ready!")

Falling back to mutual information ranking only.
✅ Optimized mRMR feature selection ready!
✅ Optimized AGWO feature selection ready!


In [18]:
# Extract Validation Features
print("Extracting validation features …")
X_va, Y_va_ohe = extract_features(val_gen)
print(f"Validation features shape: {X_va.shape}")
print(f"Validation labels shape: {Y_va_ohe.shape}")

Extracting validation features …
🚀 Starting GPU-optimized feature extraction...
💻 Using CPU (GPU not available)
📊 [20/125] Avg batch: 2.23s | ETA: 3.9m
📊 [40/125] Avg batch: 2.23s | ETA: 3.2m
📊 [60/125] Avg batch: 2.21s | ETA: 2.4m
📊 [80/125] Avg batch: 2.21s | ETA: 1.7m
📊 [100/125] Avg batch: 2.21s | ETA: 0.9m
📊 [120/125] Avg batch: 2.20s | ETA: 0.2m
✅ Feature extraction: 4.59 min (2.20s/batch)
Validation features shape: (3000, 6400)
Validation labels shape: (3000, 3)


In [19]:
# Combine Features and Convert Labels
# NOTE: We avoid building X_full (memory heavy); use train/val directly where possible
X_full = np.vstack([X_tr, X_va])
y_full = np.argmax(np.vstack([Y_tr_ohe, Y_va_ohe]), axis=1)

print(f"Total features shape: {X_full.shape}")
print(f"Total labels shape: {y_full.shape}")
print(f"Classes present: {np.unique(y_full)}")
print(f"Class distribution: {np.bincount(y_full)}")

Total features shape: (15000, 6400)
Total labels shape: (15000,)
Classes present: [0 1 2]
Class distribution: [5000 5000 5000]


In [22]:
# --- OPTIMIZED: TRUE mRMR + Enhanced AGWO Feature Selection Pipeline ---
import time, gc

t_total = time.time()

# OPTIMIZED Parameters (balanced for speed and accuracy)
n_mrmr = 800          # OPTIMIZED: Reduced from 1000
sample_rows = 1500    # OPTIMIZED: Reduced from 2000
subset_size = 1500    # OPTIMIZED: Reduced from 2000 for AGWO
n_wolves = 20         # OPTIMIZED: Reduced from 25
n_iter = 15           # OPTIMIZED: Reduced from 30

# Stage 1: TRUE mRMR Feature Ranking
print("Stage 1: TRUE mRMR Feature Ranking (Optimized)")
ranked_features = true_mrmr_feature_selection(
    X_tr, Y_tr_ohe,
    n_features=n_mrmr,
    sample_rows=sample_rows,
    var_thresh=0.01
)
print(f"[Pipeline] Ranked features: {len(ranked_features)}")

# Stage 2: Slice training matrix to ranked features ONLY for Enhanced AGWO
X_tr_ranked = X_tr[:, ranked_features]
print(f"[Pipeline] Ranked features shape: {X_tr_ranked.shape}")

# IMPORTANT FIX: Adjust max_subset to not exceed available features
actual_max_subset = min(subset_size, len(ranked_features))
print(f"[Pipeline] Adjusted max_subset: {subset_size} → {actual_max_subset}")

# Stage 3: Enhanced AGWO Feature Selection
print("\nStage 3: Enhanced AGWO Feature Selection (Optimized)")
selected_features = enhanced_agwo_feature_selection(
    X_tr_ranked, Y_tr_ohe, ranked_features,
    n_wolves=n_wolves,
    n_iter=n_iter,
    min_subset=500,
    max_subset=actual_max_subset,  # FIX: Use adjusted value
    row_sample=2500,   # OPTIMIZED: Reduced from 3000
    knn_folds=3,       # OPTIMIZED: Reduced from 5
    rf_folds=2,        # OPTIMIZED: Kept at 2
    rf_max_features=400,  # OPTIMIZED: Reduced from 500
    penalty_weight=0.015,
    patience=6,           # OPTIMIZED: Reduced from 8
    verbose=True
)

print(f"[Pipeline] Final selected features: {len(selected_features)}")

# Extract final feature matrices
X_tr_final = X_tr[:, selected_features]
X_va_final = X_va[:, selected_features]

# Train/test split on combined (train+val)
y_full = np.argmax(np.vstack([Y_tr_ohe, Y_va_ohe]), axis=1)
X_full_sel = np.vstack([X_tr_final, X_va_final])

X_train, X_test, y_train, y_test = train_test_split(
    X_full_sel, y_full, test_size=0.20, random_state=SEED, stratify=y_full
)

print(f"[Pipeline] Train {X_train.shape}, Test {X_test.shape}")
print(f"[Pipeline] Feature reduction: {X_tr.shape[1]} → {X_tr_final.shape[1]} ({X_tr_final.shape[1]/X_tr.shape[1]:.1%})")

# Cleanup
del X_tr_ranked
gc.collect()

print(f"\n[Pipeline] Total time: {time.time() - t_total:.2f}s")
print("✅ Optimized two-stage feature selection completed!")

Stage 1: TRUE mRMR Feature Ranking (Optimized)


[mRMR] After variance filter: 4003 features
[MI-Ranking] Selected 800 features in 8.30s
[Pipeline] Ranked features: 800
[Pipeline] Ranked features shape: (12000, 800)
[Pipeline] Adjusted max_subset: 1500 → 800

Stage 3: Enhanced AGWO Feature Selection (Optimized)
[AGWO] iter 1/15 k=575 alpha=0.9522 best=0.9522 cache=20
[AGWO] iter 2/15 k=618 alpha=0.9526 best=0.9526 cache=40
[AGWO] iter 3/15 k=650 alpha=0.9540 best=0.9540 cache=60
[AGWO] iter 4/15 k=674 alpha=0.9526 best=0.9540 cache=80
[AGWO] iter 5/15 k=693 alpha=0.9516 best=0.9540 cache=100
[AGWO] iter 6/15 k=710 alpha=0.9514 best=0.9540 cache=120
[AGWO] iter 7/15 k=725 alpha=0.9509 best=0.9540 cache=140
[AGWO] iter 8/15 k=737 alpha=0.9502 best=0.9540 cache=160
[AGWO] iter 9/15 k=749 alpha=0.9502 best=0.9540 cache=180
[AGWO] Early stop (patience 6)
[AGWO] Complete: 650 features, fitness=0.9540
[Pipeline] Final selected features: 650
[Pipeline] Train (12000, 650), Test (3000, 650)
[Pipeline] Feature reduction: 6400 → 650 (10.2%)

[Pi

In [23]:
# Initialize Classifiers - OPTIMIZED (No XGBoost to avoid extra dependency)
from sklearn.linear_model import LogisticRegression

knn = KNeighborsClassifier(n_neighbors=5, weights='distance', n_jobs=-1)
svm = SVC(kernel='rbf', probability=True, C=1.0, gamma='scale', random_state=SEED, cache_size=500)
rf  = RandomForestClassifier(n_estimators=250, random_state=SEED, n_jobs=-1, max_features='sqrt')
lr  = LogisticRegression(max_iter=500, random_state=SEED, n_jobs=-1, solver='saga')

print("✅ Classifiers initialized (optimized, no XGBoost)")

✅ Classifiers initialized (optimized, no XGBoost)


In [24]:
# Train Classifiers
print("Training classifiers …")

print("  Training KNN...")
knn.fit(X_train, y_train)

print("  Training SVM...")
svm.fit(X_train, y_train)

print("  Training Random Forest...")
rf.fit(X_train, y_train)

print("  Training Logistic Regression...")
lr.fit(X_train, y_train)

print("All classifiers trained successfully!")

Training classifiers …
  Training KNN...
  Training SVM...
  Training Random Forest...
  Training Logistic Regression...
All classifiers trained successfully!




In [25]:
# Make Predictions
print("Making predictions...")

knn_pred = knn.predict(X_test)
svm_pred = svm.predict(X_test)
rf_pred  = rf.predict(X_test)
lr_pred  = lr.predict(X_test)

# Probabilistic predictions (for ensemble if needed)
knn_proba = knn.predict_proba(X_test) if hasattr(knn, 'predict_proba') else None
svm_proba = svm.predict_proba(X_test) if hasattr(svm, 'predict_proba') else None
rf_proba  = rf.predict_proba(X_test) if hasattr(rf, 'predict_proba') else None
lr_proba  = lr.predict_proba(X_test) if hasattr(lr, 'predict_proba') else None

print("Predictions completed!")

Making predictions...
Predictions completed!


In [26]:
# Individual Classifier Results
print("Individual Classifier Accuracies:")
knn_acc = accuracy_score(y_test, knn_pred)
svm_acc = accuracy_score(y_test, svm_pred)
rf_acc = accuracy_score(y_test, rf_pred)
lr_acc = accuracy_score(y_test, lr_pred)

print(f"  KNN: {knn_acc:.4f}")
print(f"  SVM: {svm_acc:.4f}")
print(f"  RF : {rf_acc:.4f}")
print(f"  LR : {lr_acc:.4f}")

# Display individual classification reports
target_names = [id2label[i] for i in range(num_classes)]

print("\n=== KNN Classification Report ===")
print(classification_report(y_test, knn_pred, target_names=target_names))

print("\n=== SVM Classification Report ===")
print(classification_report(y_test, svm_pred, target_names=target_names))

print("\n=== Random Forest Classification Report ===")
print(classification_report(y_test, rf_pred, target_names=target_names))

print("\n=== Logistic Regression Classification Report ===")
print(classification_report(y_test, lr_pred, target_names=target_names))

Individual Classifier Accuracies:
  KNN: 0.9953
  SVM: 0.9887
  RF : 0.9733
  LR : 0.9820

=== KNN Classification Report ===
              precision    recall  f1-score   support

    lung_aca       1.00      0.99      0.99      1000
      lung_n       1.00      1.00      1.00      1000
    lung_scc       0.99      1.00      0.99      1000

    accuracy                           1.00      3000
   macro avg       1.00      1.00      1.00      3000
weighted avg       1.00      1.00      1.00      3000


=== SVM Classification Report ===
              precision    recall  f1-score   support

    lung_aca       0.98      0.98      0.98      1000
      lung_n       1.00      1.00      1.00      1000
    lung_scc       0.98      0.98      0.98      1000

    accuracy                           0.99      3000
   macro avg       0.99      0.99      0.99      3000
weighted avg       0.99      0.99      0.99      3000


=== Random Forest Classification Report ===
              precision    recall

In [27]:
# CORRECTED: Priority-Based Weighting Implementation
import numpy as np
from sklearn.metrics import accuracy_score

def calculate_priority_weights_fixed(accuracies):
    """Calculate priority weights from accuracies (descending rank)."""
    ranked_indices = np.argsort(accuracies)[::-1]
    ranked_accs = np.array([accuracies[i] for i in ranked_indices])
    T = [1.0]
    for j in range(1, len(ranked_accs)):
        T.append(np.prod(ranked_accs[:j]))
    T = np.array(T)
    weights = T / np.sum(T)
    return weights, ranked_indices

def priority_weighted_prediction_fixed(predictions, weights, ranked_indices):
    ranked_predictions = predictions[ranked_indices]
    # For label predictions, use weighted voting
    n_samples = ranked_predictions.shape[1]
    n_clf = ranked_predictions.shape[0]
    votes = {}
    for i in range(n_samples):
        counts = {}
        for j in range(n_clf):
            label = ranked_predictions[j, i]
            counts[label] = counts.get(label, 0) + weights[j]
        pred = max(counts.items(), key=lambda x: x[1])[0]
        yield pred

print("=== CORRECTED Priority-Based Ensemble Fusion (no XGBoost) ===")

all_predictions = np.array([knn_pred, svm_pred, rf_pred, lr_pred])
all_accuracies = np.array([knn_acc, svm_acc, rf_acc, lr_acc])

weights_fixed, ranked_indices = calculate_priority_weights_fixed(all_accuracies)
weighted_pred_fixed_labels = np.fromiter(priority_weighted_prediction_fixed(all_predictions, weights_fixed, ranked_indices), dtype=int, count=len(y_test))

weighted_ens_acc_fixed = accuracy_score(y_test, weighted_pred_fixed_labels)

print(f"Weighted-Average Ensemble Accuracy (labels): {weighted_ens_acc_fixed:.4f}")
print(f"Improvement over best individual: {weighted_ens_acc_fixed - max(all_accuracies):.4f}")

=== CORRECTED Priority-Based Ensemble Fusion (no XGBoost) ===
Weighted-Average Ensemble Accuracy (labels): 0.9920
Improvement over best individual: -0.0033


In [33]:
# Final Formatted Results Display
print("\n" + "="*60)
print("Individual Classifier Accuracies:")
print(f"  KNN: {knn_acc:.4f}")
print(f"  SVM: {svm_acc:.4f}")
print(f"  RF : {rf_acc:.4f}")

print("\n=== KNN Classification Report ===")
print(classification_report(y_test, knn_pred, target_names=target_names))

print("\n=== SVM Classification Report ===")
print(classification_report(y_test, svm_pred, target_names=target_names))

print("\n=== Random Forest Classification Report ===")
print(classification_report(y_test, rf_pred, target_names=target_names))

print("\n" + "/"*60)

# Best ensemble result (use corrected weighted ensemble if available)
if 'weighted_ens_acc_fixed' in globals():
    best_ens_acc = weighted_ens_acc_fixed
    best_ens_pred = weighted_pred_fixed_labels
    ens_method = "Weighted Priority"
elif 'weighted_ens_acc' in globals():
    best_ens_acc = weighted_ens_acc
    best_ens_pred = weighted_ensemble_pred
    ens_method = "Weighted Average"
else:
    best_ens_acc = ens_acc
    best_ens_pred = ens
    ens_method = "Priority-Based"

print(f"\nEnsemble Accuracy ({ens_method}): {best_ens_acc:.4f}")

best_individual = max(knn_acc, svm_acc, rf_acc, lr_acc)
improvement = best_ens_acc - best_individual
print(f"\nImprovement over best individual: {improvement:.4f}")

print("\n=== Ensemble Classification Report ===")
print(classification_report(y_test, best_ens_pred, target_names=target_names))

print("\n" + "/"*60)

print("\n" + "="*60)
print("FINAL RESULTS SUMMARY")
print("="*60)
print(f"Total samples processed: {len(y_full)}")

# Feature selection info
if 'selected_features' in globals():
    sel_count = len(selected_features)
    orig_count = X_tr.shape[1] if 'X_tr' in globals() else X_full.shape[1]
    print(f"Features selected by AGWO: {sel_count} / {orig_count} ({sel_count/orig_count:.1%})")

print(f"Test set size: {len(y_test)}")

print("\nClassifier Accuracies:")
print(f"  KNN:              {knn_acc:.4f}")
print(f"  SVM:              {svm_acc:.4f}")
print(f"  Random Forest:    {rf_acc:.4f}")
print(f"  Logistic Reg:     {lr_acc:.4f}")
print(f"  Ensemble (Fusion): {best_ens_acc:.4f} ← BEST")

print("\nClass Labels:")
for i, label in id2label.items():
    print(f"  {i}: {label}")
print("="*60)


Individual Classifier Accuracies:
  KNN: 0.9953
  SVM: 0.9887
  RF : 0.9733

=== KNN Classification Report ===
              precision    recall  f1-score   support

    lung_aca       1.00      0.99      0.99      1000
      lung_n       1.00      1.00      1.00      1000
    lung_scc       0.99      1.00      0.99      1000

    accuracy                           1.00      3000
   macro avg       1.00      1.00      1.00      3000
weighted avg       1.00      1.00      1.00      3000


=== SVM Classification Report ===
              precision    recall  f1-score   support

    lung_aca       0.98      0.98      0.98      1000
      lung_n       1.00      1.00      1.00      1000
    lung_scc       0.98      0.98      0.98      1000

    accuracy                           0.99      3000
   macro avg       0.99      0.99      0.99      3000
weighted avg       0.99      0.99      0.99      3000


=== Random Forest Classification Report ===
              precision    recall  f1-score   