In [1]:
from transformers import CLIPProcessor, CLIPModel
import torch
from PIL import Image
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
torch.cuda.is_available()

False

In [4]:
# torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [2]:
class ProductModel:
    def __init__(self):
        self.model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
        self.processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

        print("Quantizing model components for CPU compatibility (mocking TensorRT)...")
        self.model.vision_model = torch.quantization.quantize_dynamic(
            self.model.vision_model, {torch.nn.Linear}, dtype=torch.qint8
        )
        self.model.text_model = torch.quantization.quantize_dynamic(
            self.model.text_model, {torch.nn.Linear}, dtype=torch.qint8
        )
        self.model.visual_projection = torch.quantization.quantize_dynamic(
            self.model.visual_projection, {torch.nn.Linear}, dtype=torch.qint8
        )
        self.model.text_projection = torch.quantization.quantize_dynamic(
            self.model.text_projection, {torch.nn.Linear}, dtype=torch.qint8
        )
        self.model.eval()

    def get_image_embeddings(self, image_path):
        try:
            # Open and process the image
            image = Image.open(image_path).convert('RGB')
            inputs = self.processor(images=image, return_tensors="pt")

            # Extract image embeddings using the vision model
            with torch.no_grad():
                # Use only the vision model to get image embeddings
                vision_outputs = self.model.vision_model(pixel_values=inputs['pixel_values'])
                image_embeds = vision_outputs.pooler_output  # Pooled output from vision transformer
                image_embeds = self.model.visual_projection(image_embeds)  # Project to common space
            return image_embeds.numpy()
        except Exception as e:
            raise ValueError(f"Failed to get Image embeddings: {str(e)}")
        

    def get_text_embeddings(self, text):
        try:
            inputs = self.processor(text=[text], return_tensors="pt", padding=True, truncation=True)
            with torch.no_grad():
                text_outputs = self.model.text_model(
                    input_ids=inputs['input_ids'],
                    attention_mask=inputs['attention_mask']
                )
                text_embeds = text_outputs.pooler_output
                text_embeds = self.model.text_projection(text_embeds)
            return text_embeds.numpy()
        except Exception as e:
            raise ValueError(f"Failed to get text embeddings: {str(e)}")

In [11]:
image = "data/Mountain Sun and Camera.png"

In [12]:
pm = ProductModel()

Quantizing model components for CPU compatibility (mocking TensorRT)...


In [13]:
im_em = pm.get_image_embeddings(image)

In [41]:
np.array(im_em.tolist(), dtype=np.float32)

array([[ 4.60496694e-02,  1.32213145e-01,  2.98414789e-02,
         3.30222934e-01, -7.07582608e-02, -9.33331102e-02,
        -4.70201448e-02,  3.10114682e-01,  3.68814170e-03,
         7.87665881e-03,  1.26694962e-01, -5.57681620e-01,
         6.72545850e-01, -8.59713912e-01,  1.12481929e-01,
         2.39247814e-01, -1.37999535e+00, -3.17272544e-02,
         7.23719835e-01, -6.66723430e-01,  8.43707800e-01,
         2.87548393e-01, -6.84398115e-02, -9.63700771e-01,
        -2.07327247e-01,  3.29471678e-01, -2.33646482e-01,
        -3.52603942e-03, -1.60966754e-01,  2.62004882e-01,
         4.51686829e-02,  2.29838505e-01, -1.46200567e-01,
        -2.64058024e-01, -1.16075426e-01,  4.16915119e-01,
         2.01606214e-01,  4.01477814e-01, -8.67180526e-03,
         8.10218096e-01, -3.84349138e-01,  1.29440457e-01,
        -5.55368602e-01, -5.47148585e-01,  4.03766036e-02,
        -9.66724098e-01,  9.91781428e-02,  2.64228404e-01,
        -1.96643397e-01,  1.04953766e-01,  4.39612120e-0

In [14]:
db = Database()

In [15]:
db.query_vector(image_embedding=im_em)

{'ids': [['2689']],
 'embeddings': None,
 'documents': [[None]],
 'uris': None,
 'data': None,
 'metadatas': [[None]],
 'distances': [[12.759922326591681]],
 'included': [<IncludeEnum.distances: 'distances'>,
  <IncludeEnum.documents: 'documents'>,
  <IncludeEnum.metadatas: 'metadatas'>]}

In [4]:
from pymongo import MongoClient
import chromadb
from src.config import Config

class Database:
    def __init__(self):
        # MongoDB Atlas
        self.mongo_client = MongoClient(Config.MONGO_URI)
        self.db = self.mongo_client[Config.DB_NAME]
        self.products = self.db['products']
        self.logs = self.db['logs']
        
        # Local ChromaDB
        self.chroma_client = chromadb.PersistentClient(path="./chroma_db")
        self.image_collection = self.chroma_client.create_collection(Config.IMAGE_COLLECTION, get_or_create=True)
        self.text_collection = self.chroma_client.create_collection(Config.TEXT_COLLECTION, get_or_create=True)

    def add_product(self, image_embedding, text_embedding, metadata, product_id):
        try:
            self.image_collection.add(embeddings=image_embedding.tolist(), ids=[product_id])
            self.text_collection.add(embeddings=text_embedding.tolist(), ids=[product_id])
            metadata["embedding_id"] = product_id
            self.products.insert_one(metadata)
            return True
        except Exception as e:
            self.logs.insert_one({"error": str(e)})
            return False

    def find_product(self, product_id):
        return self.products.find_one({"embedding_id": product_id})

    def query_vector(self, image_embedding=None, text_embedding=None):
        if image_embedding is None and text_embedding is None:
            raise ValueError("At least one embedding must be provided in Query Vector Search")
        
        if image_embedding is not None and text_embedding is not None:
            image_result = self.image_collection.query(query_embeddings=image_embedding.tolist(), n_results=1)
            text_result = self.text_collection.query(query_embeddings=text_embedding.tolist(), n_results=1)
            combined_distance = (image_result['distances'][0][0] + text_result['distances'][0][0]) / 2
            return {
                'ids': image_result['ids'],  # Assuming same order; could refine with more logic
                'distances': [[combined_distance]]
            }
        elif image_embedding is not None:
            return self.image_collection.query(query_embeddings=image_embedding.tolist(), n_results=1)
        elif text_embedding is not None:
            return self.text_collection.query(query_embeddings=text_embedding.tolist(), n_results=1)

    def log_error(self, error):
        self.logs.insert_one({"error": str(error)})

In [15]:
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

In [16]:
image = Image.open('data/image2.jpg').convert('RGB')

In [17]:
inputs = processor(images=image, return_tensors="pt")

In [18]:
with torch.no_grad():
    # Use only the vision model to get image embeddings
    vision_outputs = model.vision_model(pixel_values=inputs['pixel_values'])
    image_embeds = vision_outputs.pooler_output  # Pooled output from vision transformer
    image_embeds = model.visual_projection(image_embeds)  # Project to common space

In [19]:
em = image_embeds.numpy()

In [20]:
metadata = {'name': 'image2', 'category': 'image2', 'price': 2.0, 'filename': 'image2.jpg'}

In [21]:
product_id = metadata.get("id", str(np.random.randint(10000)))

In [22]:
product_id

'7196'

In [27]:
from pymongo import MongoClient
import chromadb
from config import Config

class Database:
    def __init__(self):
        # MongoDB Atlas
        self.mongo_client = MongoClient(Config.MONGO_URI)
        self.db = self.mongo_client[Config.DB_NAME]
        self.products = self.db['products']
        self.logs = self.db['logs']
        
        # Local ChromaDB
        self.chroma_client = chromadb.PersistentClient(path="./chroma_db")
        self.image_collection = self.chroma_client.create_collection(Config.IMAGE_COLLECTION, get_or_create=True)
        self.text_collection = self.chroma_client.create_collection(Config.TEXT_COLLECTION, get_or_create=True)

    def add_product(self, image_embedding, text_embedding, metadata, product_id):
        try:
            self.image_collection.add(embeddings=image_embedding.tolist(), ids=[product_id])
            self.text_collection.add(embeddings=text_embedding.tolist(), ids=[product_id])
            metadata["embedding_id"] = product_id
            self.products.insert_one(metadata)
            return True
        except Exception as e:
            self.logs.insert_one({"error": str(e)})
            return False

    def find_product(self, product_id):
        return self.products.find_one({"embedding_id": product_id})

    def query_vector(self, image_embedding=None, text_embedding=None):
        if image_embedding is None and text_embedding is None:
            raise ValueError("At least one embedding must be provided in Query Vector Search")
        
        if image_embedding is not None and text_embedding is not None:
            image_result = self.image_collection.query(query_embeddings=image_embedding.tolist(), n_results=1)
            text_result = self.text_collection.query(query_embeddings=text_embedding.tolist(), n_results=1)
            combined_distance = (image_result['distances'][0][0] + text_result['distances'][0][0]) / 2
            return {
                'ids': image_result['ids'],  # Assuming same order; could refine with more logic
                'distances': [[combined_distance]]
            }
        elif image_embedding is not None:
            return self.image_collection.query(query_embeddings=image_embedding.tolist(), n_results=1)
        elif text_embedding is not None:
            return self.text_collection.query(query_embeddings=text_embedding.tolist(), n_results=1)

    def log_error(self, error):
        self.logs.insert_one({"error": str(error)})

In [28]:
db = Database()

In [29]:
db.add_product(em, metadata, product_id)

TypeError: Database.add_product() missing 1 required positional argument: 'product_id'

In [31]:
products.find_one({"embedding_id": product_id})

NameError: name 'products' is not defined

In [21]:
image_embeds

tensor([[ 4.3531e-01, -2.7229e-01, -4.1955e-01,  1.4281e-01,  1.1481e-01,
         -2.7920e-01,  1.6093e-01,  1.1949e-01,  2.9444e-01,  4.5332e-01,
          1.9529e-01, -8.7197e-02,  3.8882e-01, -3.4051e-01, -6.9022e-02,
          2.9721e-01,  1.8352e-01,  1.2734e-01,  2.7938e-01, -5.5388e-02,
         -1.3446e-01,  2.8717e-03, -2.4914e-01, -1.7240e-01,  5.6754e-02,
         -1.1113e-01,  5.4551e-01, -1.4033e-01,  3.1976e-01,  1.9605e-01,
          1.4207e-01,  6.4291e-01, -1.1071e-01,  7.6126e-02, -6.4245e-01,
          3.5593e-01,  1.6354e-02,  2.9927e-01,  1.5811e-01, -9.0015e-01,
          1.2340e-01, -2.1670e-01, -1.3033e-01, -5.9603e-01,  4.4137e-01,
          7.5225e-01,  1.0451e-01, -1.1869e-01, -7.4424e-01,  4.2923e-01,
         -2.0678e-02,  2.5914e-01,  8.7514e-02, -3.1141e-01,  1.1439e-01,
         -1.5802e-01, -1.9444e-01, -3.5095e-01,  6.7247e-01,  5.4727e-01,
          1.5959e-02, -2.1647e-02,  1.7429e-01, -1.7409e-01,  7.9685e-02,
         -5.9897e-01, -3.2357e-01, -1.