In [2]:
from PIL import Image
import torch
import time 
import clip
from transformers import AutoProcessor, CLIPModel, CLIPProcessor
device = torch.device('cuda' if torch.cuda.is_available() else "cpu")
import json


def get_image_embedding1(image_path, model, processor,device):

    # Load and process the image
    image = Image.open(image_path)
    #image = transform(image).unsqueeze(0).to(device)
    # Generate the image embedding
    with torch.no_grad():
        image_inputs = processor(images=image, return_tensors="pt", padding=True, truncation=True)
        image_inputs = {name: tensor.to(device) for name, tensor in image_inputs.items()}
        image_embeddings = model.to(device).get_image_features(**image_inputs)
    # Convert the tensor to a list
    image_embeddings_list = image_embeddings.tolist()
    #image_embeddings_list = image_embeddings.cpu().tolist()
    with open("embeddings1.json", "w") as outfile:
        json.dump(image_embeddings_list[0], outfile)
    print(len(image_embeddings_list[0]))
    return image_embeddings_list[0]

def get_image_embedding2(image_path, model, processor,device):

    # Load and process the image
    image = Image.open(image_path)
    # Generate the image embedding
    with torch.no_grad():
        image_inputs = processor(images=image, return_tensors="pt").to(device)
        image_embeddings = model.get_image_features(**image_inputs)
    # Convert the tensor to a list
    image_embeddings_list = image_embeddings.tolist()
    #image_embeddings_list = image_embeddings.cpu().tolist()
    with open("embeddings2.json", "w") as outfile:
        json.dump(image_embeddings_list[0], outfile)
    print(len(image_embeddings_list[0]))
    return image_embeddings_list[0]

def get_image_embedding3(image_path, model, processor,device):

    # Generate the image embedding
    with torch.no_grad():
        image_preprocess = processor(Image.open(image_path)).unsqueeze(0).to(device)
        image_embeddings = model.encode_image( image_preprocess)
    # Convert the tensor to a list
    image_embeddings_list = image_embeddings.tolist()
    #image_embeddings_list = image_embeddings.cpu().tolist()
    with open("embeddings3.json", "w") as outfile:
        json.dump(image_embeddings_list[0], outfile)
    print(len(image_embeddings_list[0]))
    return image_embeddings_list[0]



#Input image
source='./unsplash-main/ZZw-8XuYs0s.jpg'

processor_auto = AutoProcessor.from_pretrained("openai/clip-vit-base-patch32")
processor_clip1 = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
model_clip1 = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to(device)
model_clip2, processor_clip2 = clip.load("ViT-B/32", device=device)


start = time.time()
get_image_embedding1( source, model_clip1, processor_clip1, device)
end = time.time()
print(end - start)

start = time.time()
get_image_embedding2( source, model_clip1, processor_auto, device)
end = time.time()
print(end - start)

start = time.time()
get_image_embedding3( source, model_clip2, processor_clip2, device)
end = time.time()
print(end - start)


512
0.596951961517334
512
0.026210546493530273
512
0.251232385635376


In [None]:
import os
import clip
import torch
import json
import datetime
import numpy as np
from PIL import Image
from opensearchpy import helpers
from sentence_transformers import SentenceTransformer
from opensearchpy import OpenSearch, RequestsHttpConnection

device = "cuda" if torch.cuda.is_available() else "cpu"
model_clip, preprocess = clip.load("ViT-B/32", device=device)

SERVER_URL = "http://localhost:9200"
INDEX_NAME = "unsplash_knn_bulk_hybrid_multi_modal_index_03"

UNSPLASH_METADATA_PATH = "./meta_data.json"
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')


def normalize_data(data):
    return data / np.linalg.norm(data, ord=2)


def load_file(file_path):
    try:
        json_objects = []
        with open(file_path, "r") as json_file:
            for line in json_file:
                data = json.loads(line)
                json_objects.append(data)
        print("Done")
    finally:
        json_file.close()
    return json_objects


def get_client(server_url: str) -> OpenSearch:
    os_client_instance = OpenSearch('http://localhost:9200', use_ssl=False, verify_certs=False,
                                    connection_class=RequestsHttpConnection)
    print("OS connected")
    print(datetime.datetime.now())
    return os_client_instance


def create_index(index_name: str, os_client: OpenSearch, metadata: np):
    mapping = {
        "mappings": {
            "properties": {
                "asin": {
                    "type": "keyword"
                },
                "text_field": {
                    "type": "text",
                    "analyzer": "standard",
                    "fields": {
                        "keyword_field": {
                            "type": "keyword"
                        }
                    }
                },
                "description_vector": {
                    "type": "knn_vector",
                    "dimension": get_vector_dimension(metadata),
                },
                "item_image": {
                    "type": "keyword",
                },
                "image_vector": {
                    "type": "knn_vector",
                    "dimension": 512
                }

            }
        },
        "settings": {
            "index": {
                "number_of_shards": "1",
                "knn": "false",
                "number_of_replicas": "1"
            }
        }

    }
    os_client.indices.create(index=index_name, body=mapping)


def delete_index(index_name: str, os_client: OpenSearch):
    os_client.indices.delete(index_name)


def get_vector_dimension(metadata: list):
    meta_data = metadata[0]["meta_data"]
    embeddings = model.encode(meta_data)
    return len(embeddings)

def get_image_embedding(image):
    image = preprocess(Image.open(image)).unsqueeze(0).to(device)
    # Generate the image embedding
    with torch.no_grad():
        image_embedding = model_clip.encode_image(image)
        image_embedding /= image_embedding.norm(dim=-1, keepdim=True)

    image_embedding = image_embedding.squeeze().cpu().tolist()

    return image_embedding

def store_index(index_name: str, data: np.array, metadata: list, os_client: OpenSearch):
    documents = []
    for index_num, vector in enumerate(data,start=1):
        metadata_line = metadata[index_num]
        text_field = metadata_line["meta_data"]
        embedding = model.encode(text_field)
        norm_text_vector_np = normalize_data(embedding)
        image_id = metadata_line["photo_id"]
        image_file = f"/home/shivazi/Desktop/ai-rnd/vector-data/dataset/unsplash/unsplash_25k/{image_id}.jpg"

        if not os.path.exists(image_file):
            image_embedding = [0] * 512
        else:
            image_embedding = get_image_embedding(image_file)

        document = {
            "_index": index_name,
            "_id": index_num,
            "asin": metadata_line["photo_id"],
            "text_field": text_field,
            "description_vector": norm_text_vector_np.tolist(),
            "item_image": metadata_line["photo_url"],
            "image_vector": image_embedding
        }
        documents.append(document)
        if index_num % 1000 == 0 or index_num == len(data):
            helpers.bulk(os_client, documents, request_timeout=1800)
            documents = []
            print(f"bulk {index_num} indexed successfully")
            os_client.indices.refresh(INDEX_NAME)

    os_client.indices.refresh(INDEX_NAME)


def main():
    os_client = get_client(SERVER_URL)
    metadata = load_file(UNSPLASH_METADATA_PATH)
    create_index(INDEX_NAME, os_client, metadata)
    store_index(INDEX_NAME, metadata, metadata, os_client)


if __name__ == "__main__":
    main()
