In [2]:

import torch
from torchvision import transforms
from torchvision.models import vgg16
import faiss
import os
import numpy as np
from PIL import Image
import pickle



In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [4]:
transform = transforms.Compose([
    transforms.Resize(256),  # Resize image to 256x256
    transforms.CenterCrop(224),  # Crop central 224x224 region
    transforms.ToTensor(),  # Convert image to PyTorch tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),  # Normalize based on ImageNet statistics
])

# Load pre-trained VGG16 model (without final classification layer)
model = vgg16(pretrained=True).features.to(device)
model.eval()  # Set model to evaluation mode




Sequential(
  (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): ReLU(inplace=True)
  (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (3): ReLU(inplace=True)
  (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (6): ReLU(inplace=True)
  (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (8): ReLU(inplace=True)
  (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (11): ReLU(inplace=True)
  (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (13): ReLU(inplace=True)
  (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (15): ReLU(inplace=True)
  (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (17): Conv2d(256, 512, kernel_si

In [5]:
# Function to encode an image
def get_image_embedding(img_path):
  img = transform(Image.open(img_path))  # Load and transform image
  img = img.unsqueeze(0).to(device)  # Add batch dimension and move to device
  with torch.no_grad():  # Disable gradient calculation for efficiency
    embedding = model(img)  # Pass image through VGG16
  embedding = embedding.flatten(start_dim=0)  # Flatten feature map to vector
  return embedding.cpu().detach().numpy()  # Move embedding to CPU and numpy array



In [6]:
# Function to create Faiss index from image embeddings
def create_faiss_index(embeddings):
  d = embeddings.shape[1]  # Embedding dimension
  #metric_type = faiss.METRIC_INNER_PRODUCT
  index = faiss.IndexFlatIP(d)
  #index = faiss.IndexIVFFlat(quantizer, d,nlist, metric_type)
  
  index.add(embeddings)
  #index.precompute_codes = True
  return index

In [7]:
def get_image_paths(folder_path):
  image_paths = []
  for filename in os.listdir(folder_path):
    # Check if the file extension is an image format
    if filename.lower().endswith((".jpg", ".jpeg", ".png")):
      # Construct the absolute path for the image
      image_path = os.path.join(folder_path, filename)
      image_paths.append(image_path)
  return image_paths

In [8]:
def get_all_filenames(folder_path):
  filenames = []
  for root, _, files in os.walk(folder_path):
    for filename in files:
 
      # Construct the absolute path for the file
      filepath = os.path.join(root, filename).replace("\\","/")
      filenames.append(filepath)
  return filenames

In [9]:
#image_paths = get_image_paths("data/images/")
image_paths = get_all_filenames("data/animal/")
#print(image_paths[:-10])
#print(img_dict)

In [10]:
embeddings = np.array([get_image_embedding(path) for path in image_paths])

print(embeddings[:10])

[[10.494837    7.2048087   3.141008   ...  0.          0.
   1.0820426 ]
 [ 0.10750625  4.06217     3.910373   ...  0.          0.
   0.        ]
 [ 0.          2.164272    3.0900533  ...  0.          0.
   0.        ]
 ...
 [ 2.459496    0.3876895   0.         ...  4.72737     4.177019
   4.9082494 ]
 [ 0.95982873  0.          0.         ...  0.9100795   0.9312251
   3.060404  ]
 [ 2.6453047   0.          0.         ...  0.          0.
   0.        ]]


In [11]:
# Create Faiss index
faiss_index = create_faiss_index(embeddings)

# Save embeddings and index (optional, for deployment)
np.save("embeddings.npy", embeddings)
faiss.write_index(faiss_index, "faiss_index.bin")

with open("img_dict.pkl", "wb") as f:
    pickle.dump(image_paths, f)