In [None]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from sklearn.model_selection import train_test_split
from sklearn.metrics.pairwise import cosine_similarity
import matplotlib.pyplot as plt

In [None]:
# Step 1: Collect image paths and labels
def collect_image_paths(directory):
    image_extensions = ['.jpg', '.jpeg', '.png', '.gif']
    image_paths = []
    labels = []
    for root, dirs, files in os.walk(directory):
        for file in files:
            _, ext = os.path.splitext(file)
            if ext.lower() in image_extensions:
                path = os.path.join(root, file)
                image_paths.append(path)
                label = os.path.basename(root)
                labels.append(label)
    return image_paths, labels


In [None]:
# Step 2: Extract features in batches
def extract_features_batch(image_paths, model, batch_size=32):
    batches = [image_paths[i:i+batch_size] for i in range(0, len(image_paths), batch_size)]
    features = []
    for batch in batches:
        imgs = [load_img(path, target_size=(224, 224)) for path in batch]
        imgs = [img_to_array(img) for img in imgs]
        imgs = np.stack(imgs, axis=0)
        imgs = preprocess_input(imgs)
        batch_features = model.predict(imgs)
        batch_features = batch_features.reshape(batch_features.shape[0], -1)
        features.append(batch_features)
    return np.vstack(features)

In [None]:
# Step 3: Find similar images
def find_similar_images(test_feature, train_features, top_k=5):
    similarities = cosine_similarity([test_feature], train_features)
    indices = np.argsort(similarities[0])[::-1][:top_k]
    return indices

In [None]:
# Step 4: Display images
def display_images(image_paths, figsize=(15, 5)):
    num_images = len(image_paths)
    fig, axes = plt.subplots(1, num_images, figsize=figsize)
    for i, path in enumerate(image_paths):
        img = load_img(path, target_size=(224, 224))
        axes[i].imshow(img)
        axes[i].axis('off')
    plt.show()

# Main function
def main():
    # Dataset directory
    dataset_dir = 'data/'
    
    # Collect image paths and labels
    image_paths, labels = collect_image_paths(dataset_dir)
    
    # Split data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(image_paths, labels, test_size=0.2, stratify=labels, random_state=42)
    
    # Load pre-trained VGG16 model
    base_model = VGG16(weights='imagenet', include_top=False)
    model = base_model
    
    # Extract features
    X_train_features = extract_features_batch(X_train, model)
    X_test_features = extract_features_batch(X_test, model)
    
    # Save features
    np.save('X_train_features.npy', X_train_features)
    np.save('X_test_features.npy', X_test_features)
    
    # User selects a test image
    print("Number of test images:", len(X_test))
    selection = int(input("Select a test image index (0 to {}): ".format(len(X_test)-1)))
    selected_image_path = X_test[selection]
    selected_feature = X_test_features[selection]
    
    # Find similar images
    similar_indices = find_similar_images(selected_feature, X_train_features, top_k=5)
    similar_image_paths = [X_train[i] for i in similar_indices]
    
    # Display recommendations
    display_images(similar_image_paths)

if __name__ == '__main__':
    main()