In [27]:
!pip install faiss-cpu



## Download Data from Kaggle

In [28]:
# ! pip install -q kaggle
from google.colab import files
files.upload()

Saving kaggle.json to kaggle (1).json


{'kaggle (1).json': b'{"username":"yahyadaqour","key":"c8d918fe4c652da5bce2d678e2c50605"}'}

In [29]:
! mkdir ~/.kaggle
! cp kaggle.json ~/.kaggle/
! chmod 600 ~/.kaggle/kaggle.json
! kaggle datasets list

mkdir: cannot create directory ‘/root/.kaggle’: File exists
ref                                                           title                                              size  lastUpdated          downloadCount  voteCount  usabilityRating  
------------------------------------------------------------  ------------------------------------------------  -----  -------------------  -------------  ---------  ---------------  
asinow/car-price-dataset                                      Car Price Dataset                                 135KB  2025-01-26 19:53:28           4896         61  1.0              
anandshaw2001/netflix-movies-and-tv-shows                     Netflix Movies and TV Shows                         1MB  2025-01-03 10:33:01          15292        399  1.0              
ashaychoudhary/diabetes-prediction-in-america-dataset         Diabetes Prediction in America Dataset              2MB  2025-02-04 10:35:49           1238         22  1.0              
ashaychoudhary/adver

In [30]:
!pip install opendatasets
!pip install pandas



In [31]:
import opendatasets as od
import pandas

od.download(
	"https://www.kaggle.com/datasets/pavansanagapati/images-dataset")


Skipping, found downloaded files in "./images-dataset" (use force=True to force download)


## Run model demo

In [32]:
from PIL import Image
import requests
from transformers import AutoProcessor, AutoModel
import torch

model = AutoModel.from_pretrained("google/siglip-base-patch16-224")
processor = AutoProcessor.from_pretrained("google/siglip-base-patch16-224")

url = "http://images.cocodataset.org/val2017/000000039769.jpg"
image = Image.open(requests.get(url, stream=True).raw)

texts = ["a photo of 2 cats", "a photo of 2 dogs"]
inputs = processor(text=texts, images=image, padding="max_length", return_tensors="pt")

with torch.no_grad():
    outputs = model(**inputs)

logits_per_image = outputs.logits_per_image
probs = torch.sigmoid(logits_per_image) # these are the probabilities
print(f"{probs[0][0]:.1%} that image 0 is '{texts[0]}'")


31.9% that image 0 is 'a photo of 2 cats'


In [33]:
# Preprocess image
inputs = processor(images=image, return_tensors="pt")

# Extract image embeddings
with torch.no_grad():
    embeddings = model.get_image_features(**inputs)  # Extract raw embeddings

# Normalize (optional, depends on use case)
embeddings = torch.nn.functional.normalize(embeddings, dim=-1)

# Convert to NumPy
image_embedding = embeddings.cpu().numpy()

print("Image Embedding Shape:", image_embedding.shape)

Image Embedding Shape: (1, 768)


## Embed

In [34]:
## Move model to GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

In [35]:
import os
from glob import glob
from tqdm import tqdm

def retrieve_images(image_dir):
    image_paths = glob(os.path.join(image_dir, '**', '*bmp'), recursive=True)
    return image_paths

def generate_embeddings_image(model, processor, image_paths):
    embeddings = []

    for image_path in tqdm(image_paths, desc="Processing Images"):
        image = Image.open(image_path)
        inputs = processor(images=image,padding="max_length", return_tensors="pt").to(device)

        with torch.no_grad():
            image_embedding = model.get_image_features(**inputs)
            torch.nn.functional.normalize(image_embedding, dim=-1)
            embeddings.append(image_embedding[0])

    return embeddings

def generate_embeddings_text(model, processor, texts):
    embeddings = []

    for query in tqdm(texts, desc="Processing Texts"):
        inputs = processor(text=query, return_tensors="pt").to(device)

        with torch.no_grad():
            text_embedding = model.get_text_features(**inputs)
            torch.nn.functional.normalize(text_embedding, dim=-1)
            embeddings.append(text_embedding[0])
    return embeddings

In [36]:
IMAGES_DIR = "/content/images-dataset/data"
image_paths = retrieve_images(IMAGES_DIR)
embeddings = generate_embeddings_image(model, processor, image_paths)

Processing Images: 100%|██████████| 1570/1570 [00:55<00:00, 28.07it/s]


## Store FAISS Vector DB

In [37]:
import faiss
import numpy as np

def create_faiss_index(embeddings, image_paths, output_path):
  embeddings = [t.cpu().numpy() for t in embeddings]

  dimension = len(embeddings[0])
  index = faiss.IndexFlatIP(dimension)
  index = faiss.IndexIDMap(index)
  print("Dimension: ", dimension)
  print("Dimension Shape: ", embeddings[0].shape)

  vectors = np.array(embeddings).astype(np.float32)

  # Add vectors to index with IDs
  index.add_with_ids(vectors, np.array(range(len(embeddings))))

  # Save index to disk
  faiss.write_index(index, output_path)
  print(f"Index saved to {output_path}")

  # Save image paths
  with open(output_path + '.paths', 'w') as f:
      for path in image_paths:
          f.write(path + '\n')
  return index

In [38]:
OUTPUT_PATH = "/content/index.faiss"
index = create_faiss_index(embeddings, image_paths, OUTPUT_PATH)

Dimension:  768
Dimension Shape:  (768,)
Index saved to /content/index.faiss


# Inference

## Load VectorDB

In [38]:
import matplotlib.pyplot as plt

def load_faiss_index(index_path):
  index = faiss.read_index(index_path)
  return index

def load_faiss_image_paths(index_path):
  """
  Load the image paths associated with FAISS index
  """
  with open(index_path + '.paths', 'r') as f:
      image_paths = f.read().splitlines()
  return image_paths