### Libraries

In [25]:
import torch
import faiss
from torchvision import transforms
from transformers import AutoImageProcessor, AutoModel
from transformers import AutoProcessor, AutoTokenizer, CLIPModel
from transformers import CLIPProcessor, CLIPModel
from PIL import Image
import numpy as np
import os
import time
import json

In [2]:
images_folder_path = "../../1) Data_Collection/OID/Dataset/train"
output = "clip_embeddings"
bin_file = 'clipIndex.bin'

In [3]:
index = faiss.IndexFlatL2(512)

In [4]:
# Load DINOv2 model (replace with the actual loading function or model URL)
def load_clip_model():

    model_clip = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
    processor_clip = AutoProcessor.from_pretrained("openai/clip-vit-base-patch32")
    return processor_clip, model_clip

# Preprocess the image using ImageGPT's feature extractor
def preprocess_image(processor_clip, image_path):
    image = Image.open(image_path).convert("RGB")  # Ensure image is in RGB mode
    inputs = processor_clip(images=image, return_tensors="pt")
    return inputs # Return preprocessed image tensor

# Generate embeddings using ImageGPT
def generate_embeddings(model, image_tensor):
    with torch.no_grad():
        outputs = model.get_image_features(**image_tensor)

    return outputs.cpu().numpy()

# Save embeddings to a .bin file
def save_embeddings_to_bin(file_path):
    faiss.write_index(index, file_path)
    

In [5]:
# Main function
def main(image_path, output_path):

    global total_time, total_images
    
    feature_extractor_clip, model = load_clip_model()


    start_time = time.time()
    
    image_tensor = preprocess_image(feature_extractor_clip, image_path)
    embeddings = generate_embeddings(model, image_tensor)

    end_time = time.time()
    elapsed_time = end_time - start_time

    total_time += elapsed_time
    total_images += 1

    index.add(embeddings)

In [6]:
total_time = 0
total_images = 0

indexes = {}
idx = 0

for one_category in os.listdir(images_folder_path):
    one_category_path = os.path.join(images_folder_path, one_category)

    all_images_in_category_folder = [x for x in os.listdir(one_category_path) if x.endswith("jpg")]

    for img in all_images_in_category_folder:

        img_path = os.path.join(one_category_path, img)
        
        output_path = os.path.join(output, bin_file)

        main(img_path, output_path)

        indexes[idx] = img_path
        idx += 1

save_embeddings_to_bin(output_path)

In [7]:
average_time = total_time / total_images if total_images > 0 else 0

print(f"Total Time: {total_time:.2f} seconds")
print(f"Average Time per Image: {average_time:.2f} seconds")

Total Time: 8.95 seconds
Average Time per Image: 0.16 seconds


### Saving indices

In [26]:
with open(f'{output}/indices.json', 'w') as file:
    json.dump(indexes, file, indent=4)