In [None]:
import weaviate
from weaviate.classes.config import Property, DataType, Configure
from weaviate.util import generate_uuid5

In [None]:
client = weaviate.connect_to_local()

In [None]:
if client.collections.exists("Amazon"):
    client.collections.delete("Amazon")

In [None]:
amazon = client.collections.create(
        name="Amazon",
        properties=[
            Property(name="name", data_type=DataType.TEXT),
            Property(name="description", data_type=DataType.TEXT),
            Property(name="technical_details", data_type=DataType.TEXT),
            Property(name="image_url", data_type=DataType.TEXT),
            Property(name="image_blob", data_type=DataType.BLOB),
        ],
        vector_config=[
            Configure.Vectors.multi2vec_clip(
                name="laion",
                image_fields=["image_blob"],
                vector_index_config=Configure.VectorIndex.hnsw(),
                quantizer=Configure.VectorIndex.Quantizer.rq(),
            ),
            Configure.Vectors.multi2vec_clip(
                name="siglip2",
                image_fields=["image_blob"],
                inference_url="http://google-siglip2-so400m-patch16-384:8080",
                vector_index_config=Configure.VectorIndex.hnsw(),
                quantizer=Configure.VectorIndex.Quantizer.rq(),
            ),
        ],
    )

Prepare data


In [None]:
from datasets import load_from_disk
from helpers import get_first_n_elements

weaviate_data = load_from_disk(dataset_path="../.data/datasets_saved_to_disk/amazon")
weaviate_data = get_first_n_elements(weaviate_data, 100)

len(weaviate_data)

In [None]:
amazon = client.collections.get("Amazon")

with amazon.batch.dynamic() as batch:
    for d in weaviate_data:
        batch.add_object(properties=d, uuid=generate_uuid5(d["name"]))
    batch.flush()

In [None]:
amazon = client.collections.get("Amazon")

In [None]:
from helpers import display_amazon_results

res = amazon.query.near_text(query="bicycle", target_vector="laion", limit=10)
display_amazon_results(res)

In [None]:
from helpers import display_amazon_results

res = amazon.query.near_text(query="bicycle", target_vector="siglip2", limit=10)
display_amazon_results(res)

Perform image similarity search

In [None]:
from helpers import display_amazon_results, get_image_blob

res = amazon.query.near_image(
    near_image=get_image_blob("https://images-na.ssl-images-amazon.com/images/I/41bejMzMMgL.jpg"), 
    target_vector="laion", 
    limit=10
)
display_amazon_results(res)

In [None]:
from helpers import display_amazon_results, get_image_blob

res = amazon.query.near_image(
    near_image=get_image_blob("https://images-na.ssl-images-amazon.com/images/I/41bejMzMMgL.jpg"), 
    target_vector="siglip2", 
    limit=10
)
display_amazon_results(res)