In [1]:
import numpy as np
import pandas as pd
import os

from tqdm import tqdm
from PIL import Image, ImageFile

from sentence_transformers import SentenceTransformer
import torch

def load_image_paths(directory):
    for file in os.listdir(directory):
        file_name = os.fsdecode(file)

        if file_name.endswith((".jpg")):
            yield os.path.join(directory, file_name)

def process_images(image_paths, model):
    embeddings = []
    embeddings_map = []
    
    for image_path in tqdm(image_paths, desc="Processing images"):
        try:
            with Image.open(image_path) as img:
                if img.mode != "RGB":
                    img = img.convert("RGB")
                
                embedding = model.encode(img)
                embeddings.append(embedding)
                embeddings_map.append(image_path.split("\\")[-1])
        except Exception as e:
            print(f"Error processing {image_path}: {e}")
    return pd.Series(embeddings_map), np.array(embeddings)

  from tqdm.autonotebook import tqdm, trange





In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
img_model = SentenceTransformer('clip-ViT-B-32', device=device)

directory = "..\data\images\dev-images"
image_paths = list(load_image_paths(directory))
embeddings_map, img_embeddings = process_images(image_paths, img_model)

print(f"Number of images processed: {len(img_embeddings)}")
print(f"Shape of embeddings: {img_embeddings.shape}")

  attn_output = torch.nn.functional.scaled_dot_product_attention(
Processing images: 100%|██████████| 1413/1413 [00:29<00:00, 48.31it/s]

Number of images processed: 1413
Shape of embeddings: (1413, 512)





In [3]:
embeddings_map.to_csv("..\data\embeddings-public-test-clip\embeddings-map.csv", header=None, index=0)
output_embeddings = "..\data\embeddings-public-test-clip\embeddings.npy"
np.save(output_embeddings, img_embeddings)
print(f"Embeddings saved to {output_embeddings}")

Embeddings saved to ..\data\embeddings-public-test-clip\embeddings.npy
