In [None]:
import weaviate
from weaviate.classes.config import Property, DataType, Configure
from weaviate.util import generate_uuid5

In [None]:
client = weaviate.connect_to_local()

In [None]:
if client.collections.exists("Books"):
    client.collections.delete("Books")

In [None]:
books = client.collections.create(
        name="Books",
        properties=[
            Property(name="title", data_type=DataType.TEXT),
            Property(name="year", data_type=DataType.INT),
            Property(name="avg_rating", data_type=DataType.NUMBER),
            Property(name="author", data_type=DataType.TEXT),
            Property(name="description", data_type=DataType.TEXT),
            Property(name="length", data_type=DataType.NUMBER),
            Property(name="cover_image", data_type=DataType.TEXT),
            Property(name="cover_image_blob", data_type=DataType.BLOB),
        ],
        vector_config=[
            Configure.Vectors.multi2vec_clip(
                name="laion",
                image_fields=["cover_image_blob"],
                vector_index_config=Configure.VectorIndex.hnsw(),
                quantizer=Configure.VectorIndex.Quantizer.rq(),
            ),
            Configure.Vectors.multi2vec_clip(
                name="siglip2",
                image_fields=["cover_image_blob"],
                inference_url="http://google-siglip2-so400m-patch16-384:8080",
                vector_index_config=Configure.VectorIndex.hnsw(),
                quantizer=Configure.VectorIndex.Quantizer.rq(),
            ),
        ],
    )

Prepare data

In [None]:
from datasets import load_from_disk
from helpers import get_first_n_elements

weaviate_data = load_from_disk(dataset_path="../.data/datasets_saved_to_disk/books")
weaviate_data = get_first_n_elements(weaviate_data, 100)

len(weaviate_data)

Import data and generate embeddings

In [None]:
books = client.collections.get("Books")

with books.batch.dynamic() as batch:
    for d in weaviate_data:
        batch.add_object(properties=d, uuid=generate_uuid5(d["title"]))
    batch.flush()

In [None]:
books = client.collections.get("Books")

In [None]:
res = books.query.fetch_objects(limit=10, include_vector=True)

for o in res.objects:
    print(f"id={o.uuid} url={o.properties["cover_image"]} len(vector[\"siglip2\"]): {len(o.vector["siglip2"])} len(vector[\"laion\"]): {len(o.vector["laion"])}")

Perform text search over images using laion vector index

In [None]:
from helpers import display_book_results

res = books.query.near_text(query="stars in the sky", target_vector="laion", limit=10)
display_book_results(res)

Perform text search over images using siglip2 vector index

In [None]:
from helpers import display_book_results

res = books.query.near_text(query="stars in the sky", target_vector="siglip2", limit=10)
display_book_results(res)

Perform text search over images using both siglip2 and laion vector index

In [None]:
from helpers import display_book_results
from weaviate.classes.query import TargetVectors

res = books.query.near_text(
    query="stars in the sky", 
    target_vector=TargetVectors.sum(["siglip2", "laion"]), 
    limit=10
)
display_book_results(res)

Find similar images

In [None]:
from helpers import display_book_results, get_image_blob

res = books.query.near_image(
    near_image=get_image_blob("https://images.gr-assets.com/books/1347352439l/333766.jpg"), 
    target_vector="laion", 
    limit=10
)
display_book_results(res)

In [None]:
from helpers import display_book_results, get_image_blob

res = books.query.near_image(
    near_image=get_image_blob("https://images.gr-assets.com/books/1347352439l/333766.jpg"), 
    target_vector="siglip2", 
    limit=10
)
display_book_results(res)

Hybrid search

In [None]:
from helpers import display_book_results, get_image_blob
from weaviate.classes.query import TargetVectors

res = books.query.hybrid(
    query="James Ramsey",
    query_properties=["title","description"],
    target_vector=["laion"], 
    limit=10,
)
display_book_results(res)

In [None]:
client.close()