In [None]:
import os
import numpy as np
import pandas as pd
import joblib
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression

import tensorflow as tf
from tensorflow.keras.applications import VGG16, VGG19, ResNet50
from tensorflow.keras.applications.vgg16 import preprocess_input as vgg16_pre
from tensorflow.keras.applications.vgg19 import preprocess_input as vgg19_pre
from tensorflow.keras.applications.resnet50 import preprocess_input as resnet_pre
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# =======================
# GPU Check & Setup
# =======================
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        print("✅ GPU is available and memory growth is enabled.")
    except RuntimeError as e:
        print("❌ GPU setup error:", e)
else:
    print("⚠️ No GPU found. Running on CPU.")

# =======================
# Step 1: Load & Preprocess Data
# =======================
img_size = (224, 224)
batch_size = 32
data_dir = 'dataset/images'

datagen = ImageDataGenerator(preprocessing_function=resnet_pre)  # default for ResNet
data = datagen.flow_from_directory(
    data_dir,
    target_size=img_size,
    batch_size=batch_size,
    class_mode='sparse',
    shuffle=False
)

# Store labels and mapping
label_mapping = data.class_indices
inverse_label_mapping = {v: k for k, v in label_mapping.items()}

# =======================
# Step 2: Extract Features using CNNs
# =======================
models = {
    'VGG16': (VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3)), vgg16_pre),
    'VGG19': (VGG19(weights='imagenet', include_top=False, input_shape=(224, 224, 3)), vgg19_pre),
    'ResNet50': (ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3)), resnet_pre),
}

# Load all images into memory
X_images, y = [], []

print("\n📦 Loading images into memory...")
for _ in tqdm(range(len(data))):
    imgs, labels = data.next()
    X_images.append(imgs)
    y.extend(labels)

X_images = np.vstack(X_images).astype('float32')
y = np.array(y)

results = []

# =======================
# Step 3: Feature Extraction & Classification
# =======================
for model_name, (cnn_model, preprocess_fn) in models.items():
    print(f"\n📐 Extracting features using {model_name}...")

    # Apply preprocessing
    X_processed = preprocess_fn(X_images.copy())

    # Extract features using GPU
    features = cnn_model.predict(X_processed, batch_size=batch_size, verbose=1)
    features = features.reshape(features.shape[0], -1)

    # Split data
    X_train, X_test, y_train, y_test = train_test_split(features, y, test_size=0.2, random_state=42)

    classifiers = {
        'KNN': KNeighborsClassifier(),
        'Random_Forest': RandomForestClassifier(),
        'Naive_Bayes': GaussianNB(),
        'Logistic_Regression': LogisticRegression(max_iter=1000)
    }

    for clf_name, clf in classifiers.items():
        print(f"🔍 Training {clf_name} with {model_name} features...")
        clf.fit(X_train, y_train)
        preds = clf.predict(X_test)
        acc = accuracy_score(y_test, preds)

        # Save model
        model_id = f"{model_name}_{clf_name}"
        joblib.dump(clf, f"{model_id}.pkl")

        results.append({
            'Feature_Extractor': model_name,
            'Classifier': clf_name,
            'Accuracy': acc
        })

# =======================
# Step 4: Save Results
# =======================
df_results = pd.DataFrame(results)
df_results.to_csv("model_performance_summary.csv", index=False)
print("\n✅ Results saved to model_performance_summary.csv")
print(df_results)


⚠️ No GPU found. Running on CPU.
Found 7547 images belonging to 9 classes.

📦 Loading images into memory...


100%|████████████████████████████████████████████████████████████████████████████████| 236/236 [03:13<00:00,  1.22it/s]



📐 Extracting features using VGG16...
 10/236 [>.............................] - ETA: 14:26