In [1]:
#importing libraries

In [2]:
import numpy as np
import pickle as pkl
import tensorflow as tf
from tensorflow.keras.applications.resnet50 import ResNet50,preprocess_input
from tensorflow.keras.preprocessing import image
from tensorflow.keras.layers import GlobalMaxPool2D

from sklearn.neighbors import NearestNeighbors
import os
from numpy.linalg import norm

In [3]:
#Extract filenames from folder

In [4]:
filenames = []
for file in os.listdir('images'):
    filenames.append(os.path.join('images',file))

In [5]:
len(filenames)

44441

In [6]:
model = ResNet50(weights='imagenet', include_top=False, input_shape=(224,224,3))
model.trainable = False

model = tf.keras.models.Sequential([model,
                                   GlobalMaxPool2D()
                                   ])
model.summary()

In [7]:
img = image.load_img('16871.jpg', target_size=(224,224))
img_array = image.img_to_array(img)
img_expand_dim = np.expand_dims(img_array, axis=0)
img_preprocess = preprocess_input(img_expand_dim)
result = model.predict(img_preprocess).flatten()
norm_result = result/norm(result)
norm_result

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step


array([0.        , 0.00295298, 0.        , ..., 0.00752506, 0.02113524,
       0.0040174 ], dtype=float32)

In [8]:
def extract_features_from_images(image_path, model):
    img = image.load_img(image_path, target_size=(224,224))
    img_array = image.img_to_array(img)
    img_expand_dim = np.expand_dims(img_array, axis=0)
    img_preprocess = preprocess_input(img_expand_dim)
    result = model.predict(img_preprocess).flatten()
    norm_result = result/norm(result)
    return norm_result

In [9]:
extract_features_from_images(filenames[0], model)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 129ms/step


array([0.        , 0.01761618, 0.00171608, ..., 0.01247239, 0.02726405,
       0.06899218], dtype=float32)

In [None]:
from tqdm import tqdm

# Batch feature extraction to improve efficiency by processing multiple images at once
def batch_extract_features(filenames, model, batch_size=32):
    image_features = []
    for i in tqdm(range(0, len(filenames), batch_size)):
        batch_files = filenames[i:i+batch_size]
        batch_images = []
        for file in batch_files:
            img = image.load_img(file, target_size=(224,224))
            img_array = image.img_to_array(img)
            batch_images.append(img_array)

        batch_images = preprocess_input(np.array(batch_images))
        batch_features = model.predict(batch_images)
        batch_features = batch_features / np.linalg.norm(batch_features, axis=1, keepdims=True)

        image_features.extend(batch_features)
    return image_features

# Use only the first half of the dataset
half_filenames = filenames[:len(filenames)//2]

image_features = batch_extract_features(half_filenames, model)

  0%|                                                                                          | 0/695 [00:00<?, ?it/s]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step


  0%|                                                                                  | 1/695 [00:03<46:14,  4.00s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  0%|▏                                                                                 | 2/695 [00:05<32:06,  2.78s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  0%|▎                                                                                 | 3/695 [00:07<27:39,  2.40s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  1%|▍                                                                                 | 4/695 [00:09<25:31,  2.22s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  1%|▌                                                                                 | 5/695 [00:11<24:21,  2.12s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  1%|▋                                                                                 | 6/695 [00:13<24:34,  2.14s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  1%|▊                                                                                 | 7/695 [00:15<24:10,  2.11s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  1%|▉                                                                                 | 8/695 [00:18<24:24,  2.13s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  1%|█                                                                                 | 9/695 [00:20<26:00,  2.27s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  1%|█▏                                                                               | 10/695 [00:23<26:51,  2.35s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  2%|█▎                                                                               | 11/695 [00:25<27:14,  2.39s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  2%|█▍                                                                               | 12/695 [00:28<27:09,  2.39s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  2%|█▌                                                                               | 13/695 [00:30<26:56,  2.37s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  2%|█▋                                                                               | 14/695 [00:33<27:41,  2.44s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  2%|█▋                                                                               | 15/695 [00:35<28:13,  2.49s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  2%|█▊                                                                               | 16/695 [00:38<28:07,  2.49s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  2%|█▉                                                                               | 17/695 [00:40<28:35,  2.53s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  3%|██                                                                               | 18/695 [00:43<28:24,  2.52s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  3%|██▏                                                                              | 19/695 [00:45<28:43,  2.55s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  3%|██▎                                                                              | 20/695 [00:48<29:32,  2.63s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  3%|██▍                                                                              | 21/695 [00:51<30:21,  2.70s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  3%|██▌                                                                              | 22/695 [00:54<31:47,  2.83s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  3%|██▋                                                                              | 23/695 [00:57<31:15,  2.79s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  3%|██▊                                                                              | 24/695 [01:00<30:40,  2.74s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  4%|██▉                                                                              | 25/695 [01:02<30:55,  2.77s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  4%|███                                                                              | 26/695 [01:05<30:44,  2.76s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  4%|███▏                                                                             | 27/695 [01:08<30:54,  2.78s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  4%|███▎                                                                             | 28/695 [01:11<31:14,  2.81s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step


  4%|███▍                                                                             | 29/695 [01:14<31:47,  2.86s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  4%|███▍                                                                             | 30/695 [01:16<30:39,  2.77s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  4%|███▌                                                                             | 31/695 [01:19<29:58,  2.71s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  5%|███▋                                                                             | 32/695 [01:21<29:12,  2.64s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  5%|███▊                                                                             | 33/695 [01:24<28:44,  2.61s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  5%|███▉                                                                             | 34/695 [01:27<28:51,  2.62s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  5%|████                                                                             | 35/695 [01:29<28:47,  2.62s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  5%|████▏                                                                            | 36/695 [01:32<29:13,  2.66s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  5%|████▎                                                                            | 37/695 [01:35<29:17,  2.67s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  5%|████▍                                                                            | 38/695 [01:38<30:59,  2.83s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  6%|████▌                                                                            | 39/695 [01:40<30:14,  2.77s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  6%|████▋                                                                            | 40/695 [01:43<29:52,  2.74s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  6%|████▊                                                                            | 41/695 [01:46<29:34,  2.71s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  6%|████▉                                                                            | 42/695 [01:49<31:25,  2.89s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step


  6%|█████                                                                            | 43/695 [01:53<34:33,  3.18s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step


  6%|█████▏                                                                           | 44/695 [01:57<38:38,  3.56s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  6%|█████▏                                                                           | 45/695 [02:02<42:05,  3.88s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step


  7%|█████▎                                                                           | 46/695 [02:07<44:08,  4.08s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  7%|█████▍                                                                           | 47/695 [02:10<41:59,  3.89s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  7%|█████▌                                                                           | 48/695 [02:14<41:04,  3.81s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  7%|█████▋                                                                           | 49/695 [02:18<44:04,  4.09s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step


  7%|█████▊                                                                           | 50/695 [02:23<44:11,  4.11s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  7%|█████▉                                                                           | 51/695 [02:27<44:54,  4.18s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step


  7%|██████                                                                           | 52/695 [02:32<46:33,  4.34s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step


  8%|██████▏                                                                          | 53/695 [02:36<45:51,  4.29s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step


  8%|██████▎                                                                          | 54/695 [02:40<45:38,  4.27s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step


  8%|██████▍                                                                          | 55/695 [02:44<44:52,  4.21s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step


  8%|██████▌                                                                          | 56/695 [02:49<46:56,  4.41s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  8%|██████▋                                                                          | 57/695 [02:53<45:33,  4.28s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  8%|██████▊                                                                          | 58/695 [02:58<47:17,  4.46s/it]

In [None]:
Image_features = pkl.dump(image_features, open('Images_features.pkl','wb'))

In [None]:
filenames = pkl.dump(filenames, open('filenames.pkl','wb'))

In [None]:
#Loading Pickle Files

In [None]:
Image_features = pkl.load(open('Images_features.pkl','rb'))

In [None]:
filenames = pkl.load(open('filenames.pkl','rb'))

In [None]:
np.array(Image_features).shape

In [None]:
neighbors = NearestNeighbors(n_neighbors=6, algorithm='brute', metric='euclidean')

In [None]:
neighbors.fit(Image_features)

In [None]:
input_image = extract_features_from_images('16871.jpg',model)

In [None]:
distance,indices = neighbors.kneighbors([input_image])

In [None]:
indices[0]

In [None]:
from IPython.display import Image

In [None]:
Image('16871.jpg')

In [None]:
Image(filenames[indices[0][1]])

In [None]:
Image(filenames[indices[0][2]])

In [None]:
Image(filenames[indices[0][3]])

In [None]:
Image(filenames[indices[0][4]])

In [None]:
Image(filenames[indices[0][5]])