## Scaling Weaviate - Live Workshop

Follow README.md for initial setup steps.

In [None]:
!docker-compose up -d

In [None]:
import weaviate
import os

COLLECTION_NAME = "SupportChat"

# Connect to Weaviate
client = weaviate.connect_to_local(
    port=8080,
    headers={
        # Pass inference provider keys as required (we are using Cohere today)
        "X-COHERE-API-KEY": os.environ["COHERE_API_KEY"],
    },
)

In [None]:
# Delete existing collection if it exists
client.collections.delete(COLLECTION_NAME)

Take a look at the app - it should load, but it will not work properly, as the collection does not exist yet.

In [None]:
from weaviate.classes.config import Property, DataType, Configure

client.collections.create(
    name=COLLECTION_NAME,
    properties=[
        Property(name="text", data_type=DataType.TEXT),
        Property(name="dialogue_id", data_type=DataType.INT),
        Property(name="company_author", data_type=DataType.TEXT),
        Property(name="created_at", data_type=DataType.DATE),
    ],
    vectorizer_config=[
        Configure.NamedVectors.text2vec_cohere(
            name="text_with_metadata",
            source_properties=["text", "company_author"],
            vector_index_config=Configure.VectorIndex.hnsw(),
            model="embed-multilingual-light-v3.0"
        ),
    ],
    generative_config=Configure.Generative.cohere(model="command-r"),
)

In [None]:
assert client.collections.exists(COLLECTION_NAME)

Now, if you take a look at the app again - you'll see the outline. But there is no data in the collection yet, so the app will not display any items.

Let's add some data to the collection.

In [None]:
import h5py
import json
import numpy as np

# Arbitrary tenant names to sub-divide our dataset
tenant_names = ["AcmeCo", "Globex", "Initech", "UmbrellaCorp", "WayneEnterprises"]


def get_support_data(file_path):
    """
    Args:
        file_path (str): Path to the HDF5 file

    Yields:
        dict: Dictionary with 'uuid', 'properties' and 'vector' keys
    """

    with h5py.File(file_path, "r") as hf:
        for uuid in hf.keys():
            group = hf[uuid]

            # Get the object properties
            properties = json.loads(group["object"][()])

            # Arbitrarily assign tenant to data
            tenant_index = len(properties['company_author']) % 5
            tenant = tenant_names[tenant_index]

            # Get the vector(s) - handle multiple vectors if they exist
            vectors = {}
            for key in group.keys():
                if key.startswith("vector_"):
                    vector_name = key.split("_", 1)[1]
                    vectors[vector_name] = np.asarray(group[key])

            yield {
                "uuid": uuid,
                "properties": properties,
                "tenant": tenant,
                "vector": {"text_with_metadata": vectors["text_with_metadata"]}
            }

In [None]:
from tqdm import tqdm

file_path = "data/twitter_customer_support.h5"

chats = client.collections.get(COLLECTION_NAME)

with chats.batch.fixed_size(batch_size=200) as batch:
    for item in tqdm(get_support_data(file_path), desc="Importing to Weaviate"):
        batch.add_object(
            uuid=item["uuid"],
            properties=item["properties"],
            vector=item["vector"]
        )

print(f"🎉 Import completed! {len(chats)} objects imported.")

# Handle any errors
if len(chats.batch.failed_objects) > 0:
    print("⚠️  Some objects failed to import:")
    print(f"   Failed: {len(chats.batch.failed_objects)} objects")
    print("   First few errors:", chats.batch.failed_objects[:3])
else:
    print("✅ All objects imported successfully!")

Search examples

In [None]:
response = chats.query.near_text(
    query="Problem with delivery",
    limit=3
)

for o in response.objects:
    print(f"\n\nDialogID {o.properties['dialogue_id']} with Company: {o.properties['company_author']}")
    print("=" * 50)
    print(o.properties["text"][:500])

In [None]:
from weaviate.classes.query import Filter

response = chats.query.near_text(
    query="Delay compensation",
    limit=3,
    # filters=Filter.by_property("company_author").like("*Air*")
    filters=Filter.by_property("company_author").equal("Uber_Support")
)

for o in response.objects:
    print(f"\n\nDialogID {o.properties['dialogue_id']} with Company: {o.properties['company_author']}")
    print("=" * 50)
    print(o.properties["text"][:500])

Check the app again! It should now be working properly - try a few searches and RAG queries.

How can we scale this solution?
- What options do we have to manage the memory footprint? 
- What if we want to isolate the data for end users?
- Can we scale out with Weaviate?

## Quantization

Stop & restart Weaviate to reset memory footprint

In [None]:
!docker-compose down && docker-compose up -d

In [None]:
# Delete existing collection if it exists
client.collections.delete(COLLECTION_NAME)

In [None]:
from weaviate.classes.config import Property, DataType, Configure

client.collections.create(
    name=COLLECTION_NAME,
    properties=[
        Property(name="text", data_type=DataType.TEXT),
        Property(name="dialogue_id", data_type=DataType.INT),
        Property(name="company_author", data_type=DataType.TEXT),
        Property(name="created_at", data_type=DataType.DATE),
    ],
    vectorizer_config=[
        Configure.NamedVectors.text2vec_cohere(
            name="text_with_metadata",
            source_properties=["text", "company_author"],
            vector_index_config=Configure.VectorIndex.hnsw(
                # Add quantization here
            ),
            model="embed-multilingual-light-v3.0"
        ),
    ],
    generative_config=Configure.Generative.cohere(model="command-r"),
)

Import data again

Queries will run the same

But check the memory footprint!

## Vector index types

In [None]:
!docker-compose down && docker-compose up -d

In [None]:
# Delete existing collection if it exists
client.collections.delete(COLLECTION_NAME)

Review indexing options

In [None]:
from weaviate.classes.config import Property, DataType, Configure

client.collections.create(
    name=COLLECTION_NAME,
    properties=[
        Property(name="text", data_type=DataType.TEXT),
        Property(name="dialogue_id", data_type=DataType.INT),
        Property(name="company_author", data_type=DataType.TEXT),
        Property(name="created_at", data_type=DataType.DATE),
    ],
    vectorizer_config=[
        Configure.NamedVectors.text2vec_cohere(
            name="text_with_metadata",
            source_properties=["text", "company_author"],
            # Try different index types
            vector_index_config=Configure.VectorIndex.hnsw(),
            model="embed-multilingual-light-v3.0"
        ),
    ],
    generative_config=Configure.Generative.cohere(model="command-r"),
)

When to choose different indexing types?

## Multi-tenancy

In [None]:
# Delete existing collection if it exists
client.collections.delete(COLLECTION_NAME)

In [None]:
from weaviate.classes.config import Property, DataType, Configure

client.collections.create(
    name=COLLECTION_NAME,
    properties=[
        Property(name="text", data_type=DataType.TEXT),
        Property(name="dialogue_id", data_type=DataType.INT),
        Property(name="company_author", data_type=DataType.TEXT),
        Property(name="created_at", data_type=DataType.DATE),
    ],
    vectorizer_config=[
        Configure.NamedVectors.text2vec_cohere(
            name="text_with_metadata",
            source_properties=["text", "company_author"],
            vector_index_config=Configure.VectorIndex.hnsw(),
            model="embed-multilingual-light-v3.0"
        ),
    ],
    generative_config=Configure.Generative.cohere(model="command-r"),
    # Set up multi-tenancy
)

In [None]:
from tqdm import tqdm

file_path = "data/twitter_customer_support.h5"

# Notice using `client` here not collection `chats`
with client.batch.fixed_size(batch_size=200) as batch:
    for item in tqdm(get_support_data(file_path), desc="Importing to Weaviate"):
        batch.add_object(
            uuid=item["uuid"],
            properties=item["properties"],
            vector=item["vector"],
            # Add collection name
            # Add tenant
        )

# Handle any errors - note using client again
if len(client.batch.failed_objects) > 0:
    print("⚠️  Some objects failed to import:")
    print(f"   Failed: {len(client.batch.failed_objects)} objects")
    print("   First few errors:", client.batch.failed_objects[:3])
else:
    print("✅ All objects imported successfully!")

Search examples with multi-tenancy

In [None]:
# Try the same query

In [None]:
# Try again

## Multi-node example

In [None]:
!docker-compose down && docker compose -f docker-compose-three-nodes.yml up -d

In [None]:
import weaviate
import os

COLLECTION_NAME = "SupportChat"

# Connect to Weaviate
client = weaviate.connect_to_local(
    port=8080,
    headers={
        # Pass inference provider keys as required (we are using Cohere today)
        "X-COHERE-API-KEY": os.environ["COHERE_API_KEY"],
    },
)

In [None]:
# Delete existing collection if it exists
client.collections.delete(COLLECTION_NAME)

## Replication setup

In [None]:
# Delete existing collection if it exists
client.collections.delete(COLLECTION_NAME)