In [9]:
#Importing libraries

In [11]:
import numpy as np
import os
import pickle as pkl
from tqdm import tqdm
import tensorflow as tf
from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input
from tensorflow.keras.preprocessing import image
from tensorflow.keras.layers import GlobalMaxPool2D, Dense, Input, Lambda
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
import faiss 
from sklearn.model_selection import train_test_split

In [12]:
#Extract filenames from folder

In [15]:
image_dir = 'data/images'
filenames = [os.path.join(image_dir, f) for f in os.listdir(image_dir) if f.endswith(('.jpg', '.png'))]
print(f"Total images found: {len(filenames)}")

Total images found: 44441


In [17]:
#Define feature extractor model with fine-tuning capability

In [19]:
def build_feature_extractor(finetune=False):
    base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224,224,3))
    
    if not finetune:
        base_model.trainable = False
    else:
        for layer in base_model.layers[:-20]:
            layer.trainable = False
        for layer in base_model.layers[-20:]:
            layer.trainable = True
    
    x = base_model.output
    x = GlobalMaxPool2D()(x)  
    x = tf.keras.layers.Lambda(lambda t: tf.math.l2_normalize(t, axis=1))(x)  # L2 normalize embeddings
    
    model = Model(inputs=base_model.input, outputs=x)
    return model

feature_extractor = build_feature_extractor(finetune=False)
feature_extractor.summary()





In [21]:
#Extract features function

In [23]:
def extract_features(image_paths, model, batch_size=32):
    features = []
    for i in tqdm(range(0, len(image_paths), batch_size)):
        batch_paths = image_paths[i:i+batch_size]
        batch_images = []
        for path in batch_paths:
            img = image.load_img(path, target_size=(224,224))
            img_array = image.img_to_array(img)
            batch_images.append(img_array)
        batch_images = preprocess_input(np.array(batch_images))
        batch_features = model.predict(batch_images)
        features.append(batch_features)
    features = np.vstack(features)
    return features

In [None]:
features = extract_features(filenames, feature_extractor)
np.save('features/image_features.npy', features)
with open('features/filenames.pkl', 'wb') as f:
    pkl.dump(filenames, f)

  0%|                                                                                         | 0/1389 [00:00<?, ?it/s]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 9s/step


  0%|                                                                               | 1/1389 [00:11<4:34:39, 11.87s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  0%|                                                                               | 2/1389 [00:14<2:33:49,  6.65s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  0%|▏                                                                              | 3/1389 [00:17<1:54:16,  4.95s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  0%|▏                                                                              | 4/1389 [00:20<1:33:01,  4.03s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  0%|▎                                                                              | 5/1389 [00:22<1:20:02,  3.47s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  0%|▎                                                                              | 6/1389 [00:25<1:14:52,  3.25s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  1%|▍                                                                              | 7/1389 [00:28<1:10:38,  3.07s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  1%|▍                                                                              | 8/1389 [00:30<1:06:17,  2.88s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  1%|▌                                                                              | 9/1389 [00:33<1:02:09,  2.70s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  1%|▌                                                                             | 10/1389 [00:35<1:00:02,  2.61s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  1%|▋                                                                               | 11/1389 [00:37<58:23,  2.54s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  1%|▋                                                                               | 12/1389 [00:40<57:40,  2.51s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  1%|▋                                                                               | 13/1389 [00:42<57:19,  2.50s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  1%|▊                                                                               | 14/1389 [00:45<59:46,  2.61s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  1%|▊                                                                             | 15/1389 [00:48<1:01:57,  2.71s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  1%|▉                                                                             | 16/1389 [00:51<1:00:58,  2.66s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  1%|▉                                                                               | 17/1389 [00:53<59:23,  2.60s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5s/step


  1%|█                                                                             | 18/1389 [00:59<1:21:17,  3.56s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step


  1%|█                                                                             | 19/1389 [01:03<1:24:40,  3.71s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step


  1%|█                                                                             | 20/1389 [01:07<1:23:31,  3.66s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step


  2%|█▏                                                                            | 21/1389 [01:10<1:22:54,  3.64s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step


  2%|█▏                                                                            | 22/1389 [01:14<1:23:16,  3.66s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  2%|█▎                                                                            | 23/1389 [01:17<1:17:18,  3.40s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  2%|█▎                                                                            | 24/1389 [01:23<1:36:09,  4.23s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  2%|█▍                                                                            | 25/1389 [01:26<1:25:36,  3.77s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  2%|█▍                                                                            | 26/1389 [01:28<1:18:14,  3.44s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  2%|█▌                                                                            | 27/1389 [01:31<1:15:03,  3.31s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  2%|█▌                                                                            | 28/1389 [01:34<1:09:43,  3.07s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  2%|█▋                                                                            | 29/1389 [01:36<1:07:11,  2.96s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  2%|█▋                                                                            | 30/1389 [01:39<1:07:27,  2.98s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  2%|█▋                                                                            | 31/1389 [01:42<1:05:35,  2.90s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  2%|█▊                                                                            | 32/1389 [01:45<1:02:44,  2.77s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  2%|█▊                                                                            | 33/1389 [01:47<1:02:57,  2.79s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  2%|█▉                                                                            | 34/1389 [01:50<1:02:24,  2.76s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  3%|█▉                                                                            | 35/1389 [01:53<1:01:01,  2.70s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  3%|██                                                                              | 36/1389 [01:55<59:41,  2.65s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  3%|██▏                                                                             | 37/1389 [01:58<58:35,  2.60s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  3%|██▏                                                                             | 38/1389 [02:00<57:21,  2.55s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  3%|██▏                                                                             | 39/1389 [02:03<56:38,  2.52s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  3%|██▎                                                                             | 40/1389 [02:05<56:34,  2.52s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  3%|██▎                                                                             | 41/1389 [02:08<56:18,  2.51s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  3%|██▍                                                                             | 42/1389 [02:10<56:01,  2.50s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  3%|██▍                                                                             | 43/1389 [02:13<56:00,  2.50s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  3%|██▌                                                                             | 44/1389 [02:15<55:33,  2.48s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  3%|██▌                                                                             | 45/1389 [02:18<56:39,  2.53s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  3%|██▋                                                                             | 46/1389 [02:20<56:03,  2.50s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  3%|██▋                                                                             | 47/1389 [02:23<58:00,  2.59s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  3%|██▋                                                                           | 48/1389 [02:29<1:19:09,  3.54s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  4%|██▊                                                                           | 49/1389 [02:31<1:11:57,  3.22s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 6s/step


  4%|██▊                                                                           | 50/1389 [02:38<1:36:11,  4.31s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step


  4%|██▊                                                                           | 51/1389 [02:41<1:30:39,  4.07s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step


  4%|██▉                                                                           | 52/1389 [02:45<1:26:15,  3.87s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  4%|██▉                                                                           | 53/1389 [02:48<1:21:59,  3.68s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step


  4%|███                                                                           | 54/1389 [02:52<1:19:53,  3.59s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  4%|███                                                                           | 55/1389 [02:54<1:15:33,  3.40s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  4%|███▏                                                                          | 56/1389 [02:57<1:09:35,  3.13s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  4%|███▏                                                                          | 57/1389 [03:00<1:05:48,  2.96s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  4%|███▎                                                                          | 58/1389 [03:02<1:02:30,  2.82s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  4%|███▎                                                                          | 59/1389 [03:05<1:00:24,  2.73s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  4%|███▍                                                                            | 60/1389 [03:07<58:16,  2.63s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  4%|███▌                                                                            | 61/1389 [03:10<57:50,  2.61s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  4%|███▌                                                                            | 62/1389 [03:12<59:34,  2.69s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  5%|███▌                                                                          | 63/1389 [03:15<1:00:54,  2.76s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  5%|███▋                                                                            | 64/1389 [03:18<58:43,  2.66s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  5%|███▋                                                                          | 65/1389 [03:21<1:00:14,  2.73s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  5%|███▊                                                                            | 66/1389 [03:23<59:28,  2.70s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  5%|███▊                                                                            | 67/1389 [03:26<58:56,  2.67s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  5%|███▉                                                                            | 68/1389 [03:29<58:51,  2.67s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  5%|███▉                                                                            | 69/1389 [03:31<59:40,  2.71s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  5%|████                                                                            | 70/1389 [03:34<57:45,  2.63s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  5%|███▉                                                                          | 71/1389 [03:38<1:05:44,  2.99s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  5%|████                                                                          | 72/1389 [03:40<1:02:21,  2.84s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  5%|████                                                                          | 73/1389 [03:43<1:00:21,  2.75s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  5%|████▏                                                                         | 74/1389 [03:46<1:02:32,  2.85s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  5%|████▏                                                                         | 75/1389 [03:48<1:00:00,  2.74s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  5%|████▍                                                                           | 76/1389 [03:51<58:14,  2.66s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  6%|████▍                                                                           | 77/1389 [03:53<58:51,  2.69s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  6%|████▍                                                                           | 78/1389 [03:56<59:14,  2.71s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  6%|████▌                                                                           | 79/1389 [03:59<58:30,  2.68s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  6%|████▌                                                                           | 80/1389 [04:02<59:25,  2.72s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  6%|████▋                                                                           | 81/1389 [04:04<58:16,  2.67s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  6%|████▋                                                                           | 82/1389 [04:07<58:09,  2.67s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  6%|████▊                                                                           | 83/1389 [04:09<57:44,  2.65s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  6%|████▊                                                                           | 84/1389 [04:12<58:39,  2.70s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  6%|████▉                                                                           | 85/1389 [04:15<58:16,  2.68s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  6%|████▉                                                                           | 86/1389 [04:18<57:34,  2.65s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  6%|█████                                                                           | 87/1389 [04:20<57:55,  2.67s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  6%|█████                                                                           | 88/1389 [04:23<58:20,  2.69s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  6%|█████▏                                                                          | 89/1389 [04:26<58:25,  2.70s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  6%|█████▏                                                                          | 90/1389 [04:28<59:07,  2.73s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  7%|█████▏                                                                          | 91/1389 [04:31<59:54,  2.77s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  7%|█████▏                                                                        | 92/1389 [04:34<1:00:47,  2.81s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  7%|█████▏                                                                        | 93/1389 [04:37<1:00:14,  2.79s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  7%|█████▎                                                                        | 94/1389 [04:40<1:00:58,  2.83s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  7%|█████▍                                                                          | 95/1389 [04:43<59:32,  2.76s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  7%|█████▌                                                                          | 96/1389 [04:45<59:52,  2.78s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  7%|█████▌                                                                          | 97/1389 [04:48<59:01,  2.74s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  7%|█████▋                                                                          | 98/1389 [04:51<59:33,  2.77s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  7%|█████▋                                                                          | 99/1389 [04:54<59:15,  2.76s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  7%|█████▋                                                                         | 100/1389 [04:56<59:47,  2.78s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  7%|█████▋                                                                         | 101/1389 [04:59<58:30,  2.73s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  7%|█████▊                                                                         | 102/1389 [05:02<58:30,  2.73s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  7%|█████▋                                                                       | 103/1389 [05:05<1:00:52,  2.84s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  7%|█████▊                                                                       | 104/1389 [05:08<1:01:11,  2.86s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  8%|█████▉                                                                         | 105/1389 [05:10<58:23,  2.73s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  8%|██████                                                                         | 106/1389 [05:13<57:33,  2.69s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  8%|██████                                                                         | 107/1389 [05:15<56:27,  2.64s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  8%|██████▏                                                                        | 108/1389 [05:18<55:39,  2.61s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  8%|██████▏                                                                        | 109/1389 [05:20<55:26,  2.60s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  8%|██████▎                                                                        | 110/1389 [05:23<55:42,  2.61s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  8%|██████▎                                                                        | 111/1389 [05:26<55:54,  2.62s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  8%|██████▎                                                                        | 112/1389 [05:28<56:02,  2.63s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  8%|██████▍                                                                        | 113/1389 [05:31<56:21,  2.65s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  8%|██████▍                                                                        | 114/1389 [05:34<57:21,  2.70s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  8%|██████▌                                                                        | 115/1389 [05:37<58:54,  2.77s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  8%|██████▌                                                                        | 116/1389 [05:40<59:04,  2.78s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  8%|██████▋                                                                        | 117/1389 [05:42<59:13,  2.79s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  8%|██████▋                                                                        | 118/1389 [05:45<58:18,  2.75s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  9%|██████▊                                                                        | 119/1389 [05:48<57:58,  2.74s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  9%|██████▊                                                                        | 120/1389 [05:51<59:30,  2.81s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  9%|██████▉                                                                        | 121/1389 [05:53<58:33,  2.77s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  9%|██████▉                                                                        | 122/1389 [05:56<56:34,  2.68s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  9%|██████▉                                                                        | 123/1389 [05:59<57:29,  2.73s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  9%|███████                                                                        | 124/1389 [06:01<57:23,  2.72s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  9%|██████▉                                                                      | 125/1389 [06:05<1:00:05,  2.85s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  9%|███████▏                                                                       | 126/1389 [06:07<59:34,  2.83s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  9%|███████                                                                      | 127/1389 [06:10<1:01:19,  2.92s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  9%|███████                                                                      | 128/1389 [06:13<1:00:19,  2.87s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  9%|███████▎                                                                       | 129/1389 [06:16<58:50,  2.80s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  9%|███████▍                                                                       | 130/1389 [06:19<57:47,  2.75s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


  9%|███████▎                                                                     | 131/1389 [06:22<1:00:20,  2.88s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


 10%|███████▎                                                                     | 132/1389 [06:25<1:00:39,  2.90s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


 10%|███████▌                                                                       | 133/1389 [06:27<58:46,  2.81s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


 10%|███████▌                                                                       | 134/1389 [06:30<57:09,  2.73s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


 10%|███████▋                                                                       | 135/1389 [06:32<55:27,  2.65s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


 10%|███████▋                                                                       | 136/1389 [06:35<54:55,  2.63s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


 10%|███████▊                                                                       | 137/1389 [06:38<55:22,  2.65s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


 10%|███████▊                                                                       | 138/1389 [06:40<56:01,  2.69s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


 10%|███████▋                                                                     | 139/1389 [06:47<1:20:14,  3.85s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 7s/step


 10%|███████▊                                                                     | 140/1389 [06:54<1:43:17,  4.96s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step


 10%|███████▊                                                                     | 141/1389 [06:58<1:34:20,  4.54s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step


 10%|███████▊                                                                     | 142/1389 [07:02<1:32:13,  4.44s/it]

In [None]:
features = np.load('features/image_features.npy')
with open('features/filenames.pkl', 'rb') as f:
    filenames = pkl.load(f)

print(f"Feature shape: {features.shape}")

In [None]:
#Build FAISS index for fast nearest neighbor search

In [None]:
embedding_dim = features.shape[1]

In [None]:
index = faiss.IndexFlatL2(embedding_dim)
index.add(features)  # Add all feature vectors to index

print(f"Total indexed vectors: {index.ntotal}")

In [None]:
def triplet_loss(y_true, y_pred, margin=0.3):
    # y_pred shape: (batch_size, 3*embedding_dim) concatenated anchor, positive, negative
    anchor, positive, negative = y_pred[:, :embedding_dim], y_pred[:, embedding_dim:2*embedding_dim], y_pred[:, 2*embedding_dim:]
    
    pos_dist = tf.reduce_sum(tf.square(anchor - positive), axis=1)
    neg_dist = tf.reduce_sum(tf.square(anchor - negative), axis=1)
    
    loss = tf.maximum(pos_dist - neg_dist + margin, 0.0)
    return tf.reduce_mean(loss)

# Example input: 3 images per sample (anchor, positive, negative)
def build_triplet_model(feature_extractor):
    input_anchor = Input((224,224,3))
    input_positive = Input((224,224,3))
    input_negative = Input((224,224,3))
    
    feat_anchor = feature_extractor(input_anchor)
    feat_positive = feature_extractor(input_positive)
    feat_negative = feature_extractor(input_negative)
    
    merged = tf.keras.layers.Concatenate(axis=1)([feat_anchor, feat_positive, feat_negative])
    model = Model(inputs=[input_anchor, input_positive, input_negative], outputs=merged)
    
    model.compile(optimizer=Adam(1e-4), loss=triplet_loss)
    return model

In [None]:
#Search function example

In [None]:
img = image.load_img(image_path, target_size=(224,224))
    img_array = image.img_to_array(img)
    img_array = preprocess_input(np.expand_dims(img_array, axis=0))
    
    query_feature = model.predict(img_array)
    query_feature = query_feature / np.linalg.norm(query_feature)
    
    distances, indices = faiss_index.search(query_feature, top_k)
    return distances[0], [filenames[i] for i in indices[0]]

In [None]:
#Example

In [None]:
query_img = filenames[0]  # Just for demo
distances, recommendations = recommend(query_img, feature_extractor, index, filenames)

print("Recommended images:")
for i, img_path in enumerate(recommendations[1:], start=1):
    print(f"{i}. {img_path} (Distance: {distances[i]:.4f})")