In [1]:
import os
import numpy as np
from keras.preprocessing import image
from keras.applications.vgg16 import VGG16, preprocess_input
from sklearn.metrics.pairwise import cosine_similarity

In [3]:
# Load pre-trained model
model = VGG16(weights="imagenet", include_top=False)


# Function to extract features from an image
def extract_features(img_path):
    img = image.load_img(img_path, target_size=(224, 224))
    img_data = image.img_to_array(img)
    img_data = np.expand_dims(img_data, axis=0)
    img_data = preprocess_input(img_data)
    features = model.predict(img_data)
    return features.flatten()


# Function to compare features of images
def compare_images(directory):
    features_dict = {}
    image_files = [
        f for f in os.listdir(directory) if os.path.isfile(os.path.join(directory, f))
    ]

    for image_file in image_files:
        features = extract_features(os.path.join(directory, image_file))
        if len(features_dict) == 0:
            features_dict[image_file] = features
        else:
            for existing_features in features_dict.items():
                similarity = cosine_similarity([features], [existing_features])
                if similarity > 0.95:  # similarity threshold
                    os.remove(os.path.join(directory, image_file))
                    print(f"Removed duplicate image: {image_file}")
                    break
            else:
                features_dict[image_file] = features


compare_images("../Kaggle Dataset/archive (5)/glass")

Removed duplicate image: glass_1491.jpg
Removed duplicate image: glass_1817.jpg
Removed duplicate image: glass_1846.jpg
Removed duplicate image: glass_1865.jpg
Removed duplicate image: glass_1874.jpg
Removed duplicate image: glass_1917.jpg
Removed duplicate image: glass_1918.jpg
Removed duplicate image: glass_2117.jpg
Removed duplicate image: glass_2128.jpg
Removed duplicate image: glass_2155.jpg
Removed duplicate image: glass_2207.jpg
Removed duplicate image: glass_2280.jpg
Removed duplicate image: glass_2327.jpg
Removed duplicate image: glass_2406.jpg
Removed duplicate image: glass_2431.jpg
Removed duplicate image: glass_2483.jpg
Removed duplicate image: glass_2485.jpg
Removed duplicate image: glass_2529.jpg
Removed duplicate image: glass_2566.jpg
Removed duplicate image: glass_2572.jpg
Removed duplicate image: glass_2651.jpg
Removed duplicate image: glass_2672.jpg
Removed duplicate image: glass_2675.jpg
Removed duplicate image: glass_2724.jpg
Removed duplicate image: glass_2729.jpg
