## Scaling Weaviate - Live Workshop

Follow README.md for initial setup steps.

In [1]:
!docker-compose up -d

[1A[1B[0G[?25l[+] Running 2/0
 [32m✔[0m Network scalable-rag-workshop_default                 [32mCreated[0m           [34m0.0s [0m
 [32m✔[0m Volume "scalable-rag-workshop_scalable_rag_workshop"  [32mCreated[0m           [34m0.0s [0m
 [33m⠋[0m Container scalable-rag-workshop-weaviate_anon-1       Starting          [34m0.1s [0m
[?25h[1A[1A[1A[1A[0G[?25l[+] Running 2/3
 [32m✔[0m Network scalable-rag-workshop_default                 [32mCreated[0m           [34m0.0s [0m
 [32m✔[0m Volume "scalable-rag-workshop_scalable_rag_workshop"  [32mCreated[0m           [34m0.0s [0m
 [33m⠙[0m Container scalable-rag-workshop-weaviate_anon-1       Starting          [34m0.2s [0m
[?25h[1A[1A[1A[1A[0G[?25l[34m[+] Running 3/3[0m
 [32m✔[0m Network scalable-rag-workshop_default                 [32mCreated[0m           [34m0.0s [0m
 [32m✔[0m Volume "scalable-rag-workshop_scalable_rag_workshop"  [32mCreated[0m           [34m0.0s [0m
 [32m✔[0m Con

In [2]:
import weaviate
import os

COLLECTION_NAME = "SupportChat"

# Connect to Weaviate
client = weaviate.connect_to_local(
    port=8080,
    headers={
        # Pass inference provider keys as required (we are using Cohere today)
        "X-COHERE-API-KEY": os.environ["COHERE_API_KEY"],
    },
)

In [3]:
# Delete existing collection if it exists
client.collections.delete(COLLECTION_NAME)

Take a look at the app - it should load, but it will not work properly, as the collection does not exist yet.

In [4]:
from weaviate.classes.config import Property, DataType, Configure

client.collections.create(
    name=COLLECTION_NAME,
    properties=[
        Property(name="text", data_type=DataType.TEXT),
        Property(name="dialogue_id", data_type=DataType.INT),
        Property(name="company_author", data_type=DataType.TEXT),
        Property(name="created_at", data_type=DataType.DATE),
    ],
    vectorizer_config=[
        Configure.NamedVectors.text2vec_cohere(
            name="text_with_metadata",
            source_properties=["text", "company_author"],
            vector_index_config=Configure.VectorIndex.hnsw(),
            model="embed-multilingual-light-v3.0"
        ),
    ],
    generative_config=Configure.Generative.cohere(model="command-r"),
)

<weaviate.collections.collection.sync.Collection at 0x1079811d0>

In [5]:
assert client.collections.exists(COLLECTION_NAME)

Now, if you take a look at the app again - you'll see the outline. But there is no data in the collection yet, so the app will not display any items.

Let's add some data to the collection.

In [6]:
import h5py
import json
import numpy as np

# Arbitrary tenant names to sub-divide our dataset
tenant_names = ["AcmeCo", "Globex", "Initech", "UmbrellaCorp", "WayneEnterprises"]


def get_support_data(file_path):
    """
    Args:
        file_path (str): Path to the HDF5 file

    Yields:
        dict: Dictionary with 'uuid', 'properties' and 'vector' keys
    """

    with h5py.File(file_path, "r") as hf:
        for uuid in hf.keys():
            group = hf[uuid]

            # Get the object properties
            properties = json.loads(group["object"][()])

            # Arbitrarily assign tenant to data
            tenant_index = len(properties['company_author']) % 5
            tenant = tenant_names[tenant_index]

            # Get the vector(s) - handle multiple vectors if they exist
            vectors = {}
            for key in group.keys():
                if key.startswith("vector_"):
                    vector_name = key.split("_", 1)[1]
                    vectors[vector_name] = np.asarray(group[key])

            yield {
                "uuid": uuid,
                "properties": properties,
                "tenant": tenant,
                "vector": {"text_with_metadata": vectors["text_with_metadata"]}
            }

In [7]:
from tqdm import tqdm

file_path = "data/twitter_customer_support.h5"

chats = client.collections.get(COLLECTION_NAME)

with chats.batch.fixed_size(batch_size=200) as batch:
    for item in tqdm(get_support_data(file_path), desc="Importing to Weaviate"):
        batch.add_object(
            uuid=item["uuid"],
            properties=item["properties"],
            vector=item["vector"]
        )

print(f"🎉 Import completed! {len(chats)} objects imported.")

# Handle any errors
if len(chats.batch.failed_objects) > 0:
    print("⚠️  Some objects failed to import:")
    print(f"   Failed: {len(chats.batch.failed_objects)} objects")
    print("   First few errors:", chats.batch.failed_objects[:3])
else:
    print("✅ All objects imported successfully!")

Importing to Weaviate: 100000it [00:40, 2469.96it/s]

🎉 Import completed! 100000 objects imported.
✅ All objects imported successfully!





Search examples

In [8]:
response = chats.query.near_text(
    query="Problem with delivery",
    limit=3
)

for o in response.objects:
    print(f"\n\nDialogID {o.properties['dialogue_id']} with Company: {o.properties['company_author']}")
    print("=" * 50)
    print(o.properties["text"][:500])



DialogID 533243 with Company: AmazonHelp
User_242734: your delivery driver that was bringing my package didn't even bother to knock on my door and then claimed delivery was unsuccessful, I spoke to someone on customer services who wasn't helpful, this has really annoyed me and I will be reviewing my account with Amazon.
AmazonHelp: Hi Joe I am sorry to hear that! Have you received your delivery yet? ^HS
User_242734: No I haven't and it is already 24 hours late.
User_242734: I would understand if nobody was at my address but myself and 


DialogID 464283 with Company: AmazonHelp
User_225395: After 3 online chat attempts, and several call centre reps, including a supervisor, I reached a logistics rep who says the delivery driver is doing sketchy things on his route, and I have yet to get my delivery. #badcustomerservice
AmazonHelp: I'm so sorry for the frustration with this delivery. I'd like a member of our Social Media team to take a look into this. Please provide your details here: 

In [9]:
from weaviate.classes.query import Filter

response = chats.query.near_text(
    query="Delay compensation",
    limit=3,
    # filters=Filter.by_property("company_author").like("*Air*")
    filters=Filter.by_property("company_author").equal("Uber_Support")
)

for o in response.objects:
    print(f"\n\nDialogID {o.properties['dialogue_id']} with Company: {o.properties['company_author']}")
    print("=" * 50)
    print(o.properties["text"][:500])



DialogID 258188 with Company: Uber_Support
User_177416: Hey, I received a promo code last week as a form of compensation for a late delivery, but it didn't work when trying to redeem
Uber_Support: Sorry to hear this, Thomas! Please go here https://t.co/lqVgKo4ZO3, so we can follow up.
User_177416: These pages don't seem to refer to codes handed out as compensation. Is there any other way I can resolve the issue?
Uber_Support: Hey, Thomas. The previous link will still work for your particular issue. However, feel free to DM us your email 


DialogID 83592 with Company: Uber_Support
User_134360: if I’m waiting far longer for my driver then it suggests, why do I get charged for cancelling for their delay
Uber_Support: We're here to help! Please send us a note via https://t.co/JyzdtQmyxT so we can connect.


DialogID 473391 with Company: Uber_Support
User_227717: @Uber_Support over an hour and only now it says there’s a slight delay? Will expect a full refund once my food arrives https:/

Check the app again! It should now be working properly - try a few searches and RAG queries.

How can we scale this solution?
- What options do we have to manage the memory footprint? 
- What if we want to isolate the data for end users?
- Can we scale out with Weaviate?

## Quantization

Stop & restart Weaviate to reset memory footprint

In [10]:
!docker-compose down && docker-compose up -d

[1A[1B[0G[?25l[+] Running 0/0
 [33m⠋[0m Container scalable-rag-workshop-weaviate_anon-1  Stopping               [34m0.1s [0m
[?25h[1A[1A[0G[?25l[+] Running 0/1
 [33m⠙[0m Container scalable-rag-workshop-weaviate_anon-1  Stopping               [34m0.2s [0m
[?25h[1A[1A[0G[?25l[+] Running 0/1
 [33m⠹[0m Container scalable-rag-workshop-weaviate_anon-1  Stopping               [34m0.3s [0m
[?25h[1A[1A[0G[?25l[+] Running 0/1
 [33m⠸[0m Container scalable-rag-workshop-weaviate_anon-1  Stopping               [34m0.4s [0m
[?25h[1A[1A[0G[?25l[+] Running 0/1
 [33m⠼[0m Container scalable-rag-workshop-weaviate_anon-1  Stopping               [34m0.5s [0m
[?25h[1A[1A[0G[?25l[+] Running 0/1
 [33m⠴[0m Container scalable-rag-workshop-weaviate_anon-1  Stopping               [34m0.6s [0m
[?25h[1A[1A[0G[?25l[+] Running 0/1
 [33m⠦[0m Container scalable-rag-workshop-weaviate_anon-1  Stopping               [34m0.7s [0m
[?25h[1A[1A[0G[?25l[+] Runni

In [11]:
# Delete existing collection if it exists
client.collections.delete(COLLECTION_NAME)

In [12]:
from weaviate.classes.config import Property, DataType, Configure

client.collections.create(
    name=COLLECTION_NAME,
    properties=[
        Property(name="text", data_type=DataType.TEXT),
        Property(name="dialogue_id", data_type=DataType.INT),
        Property(name="company_author", data_type=DataType.TEXT),
        Property(name="created_at", data_type=DataType.DATE),
    ],
    vectorizer_config=[
        Configure.NamedVectors.text2vec_cohere(
            name="text_with_metadata",
            source_properties=["text", "company_author"],
            # Quantization here
            vector_index_config=Configure.VectorIndex.hnsw(
                quantizer=Configure.VectorIndex.Quantizer.bq()
            ),
            model="embed-multilingual-light-v3.0"
        ),
    ],
    generative_config=Configure.Generative.cohere(model="command-r"),
)

<weaviate.collections.collection.sync.Collection at 0x13c23fa10>

Import data again

In [13]:
from tqdm import tqdm

file_path = "data/twitter_customer_support.h5"

chats = client.collections.get(COLLECTION_NAME)

with chats.batch.fixed_size(batch_size=200) as batch:
    for item in tqdm(get_support_data(file_path), desc="Importing to Weaviate"):
        batch.add_object(
            uuid=item["uuid"],
            properties=item["properties"],
            vector=item["vector"]
        )

print(f"🎉 Import completed! {len(chats)} objects imported.")

# Handle any errors
if len(chats.batch.failed_objects) > 0:
    print("⚠️  Some objects failed to import:")
    print(f"   Failed: {len(chats.batch.failed_objects)} objects")
    print("   First few errors:", chats.batch.failed_objects[:3])
else:
    print("✅ All objects imported successfully!")

Importing to Weaviate: 100000it [00:36, 2720.25it/s]

🎉 Import completed! 100000 objects imported.
✅ All objects imported successfully!





Queries will run the same

In [14]:
response = chats.query.near_text(
    query="Problem with delivery",
    limit=3
)

for o in response.objects:
    print(f"\n\nDialogID {o.properties['dialogue_id']} with Company: {o.properties['company_author']}")
    print("=" * 50)
    print(o.properties["text"][:500])



DialogID 464283 with Company: AmazonHelp
User_225395: After 3 online chat attempts, and several call centre reps, including a supervisor, I reached a logistics rep who says the delivery driver is doing sketchy things on his route, and I have yet to get my delivery. #badcustomerservice
AmazonHelp: I'm so sorry for the frustration with this delivery. I'd like a member of our Social Media team to take a look into this. Please provide your details here: https://t.co/gmTuBIfTju. ^DG
User_225395: I have screenshots too, and have replied to t


DialogID 493347 with Company: AmazonHelp
User_232615: Once again, your drivers can't deliver groceries. You have the ability to arrange a specific delivery time, people arrange their day around this and, once again, you can't access the building because your driver doesn't feel like pushing a buzzer.@136881 I'm coming
AmazonHelp: I'm sorry for the recurring delivery issues. Have you contacted us via phone or chat: https://t.co/JzP7hlA23B? If so, what

But check the memory footprint!

## Vector index types

In [15]:
!docker-compose down && docker-compose up -d

[1A[1B[0G[?25l[+] Running 0/0
 [33m⠋[0m Container scalable-rag-workshop-weaviate_anon-1  Stopping               [34m0.1s [0m
[?25h[1A[1A[0G[?25l[+] Running 0/1
 [33m⠙[0m Container scalable-rag-workshop-weaviate_anon-1  Stopping               [34m0.2s [0m
[?25h[1A[1A[0G[?25l[+] Running 0/1
 [33m⠹[0m Container scalable-rag-workshop-weaviate_anon-1  Stopping               [34m0.3s [0m
[?25h[1A[1A[0G[?25l[+] Running 0/1
 [33m⠸[0m Container scalable-rag-workshop-weaviate_anon-1  Stopping               [34m0.4s [0m
[?25h[1A[1A[0G[?25l[+] Running 0/1
 [33m⠼[0m Container scalable-rag-workshop-weaviate_anon-1  Stopping               [34m0.5s [0m
[?25h[1A[1A[0G[?25l[+] Running 0/1
 [33m⠴[0m Container scalable-rag-workshop-weaviate_anon-1  Stopping               [34m0.6s [0m
[?25h[1A[1A[0G[?25l[+] Running 0/1
 [33m⠦[0m Container scalable-rag-workshop-weaviate_anon-1  Stopping               [34m0.7s [0m
[?25h[1A[1A[0G[?25l[+] Runni

In [17]:
# Delete existing collection if it exists
client.collections.delete(COLLECTION_NAME)

Review indexing options

In [18]:
from weaviate.classes.config import Property, DataType, Configure

client.collections.create(
    name=COLLECTION_NAME,
    properties=[
        Property(name="text", data_type=DataType.TEXT),
        Property(name="dialogue_id", data_type=DataType.INT),
        Property(name="company_author", data_type=DataType.TEXT),
        Property(name="created_at", data_type=DataType.DATE),
    ],
    vectorizer_config=[
        Configure.NamedVectors.text2vec_cohere(
            name="text_with_metadata",
            source_properties=["text", "company_author"],
            # Indexing options
            vector_index_config=Configure.VectorIndex.flat(),
            model="embed-multilingual-light-v3.0"
        ),
    ],
    generative_config=Configure.Generative.cohere(model="command-r"),
)

<weaviate.collections.collection.sync.Collection at 0x168b91ad0>

In [19]:
from tqdm import tqdm

file_path = "data/twitter_customer_support.h5"

chats = client.collections.get(COLLECTION_NAME)

with chats.batch.fixed_size(batch_size=200) as batch:
    for item in tqdm(get_support_data(file_path), desc="Importing to Weaviate"):
        batch.add_object(
            uuid=item["uuid"],
            properties=item["properties"],
            vector=item["vector"]
        )

print(f"🎉 Import completed! {len(chats)} objects imported.")

# Handle any errors
if len(chats.batch.failed_objects) > 0:
    print("⚠️  Some objects failed to import:")
    print(f"   Failed: {len(chats.batch.failed_objects)} objects")
    print("   First few errors:", chats.batch.failed_objects[:3])
else:
    print("✅ All objects imported successfully!")

Importing to Weaviate: 100000it [00:35, 2797.43it/s]


🎉 Import completed! 100000 objects imported.
✅ All objects imported successfully!


In [20]:
response = chats.query.near_text(
    query="Problem with delivery",
    limit=3
)

for o in response.objects:
    print(f"\n\nDialogID {o.properties['dialogue_id']} with Company: {o.properties['company_author']}")
    print("=" * 50)
    print(o.properties["text"][:500])



DialogID 533243 with Company: AmazonHelp
User_242734: your delivery driver that was bringing my package didn't even bother to knock on my door and then claimed delivery was unsuccessful, I spoke to someone on customer services who wasn't helpful, this has really annoyed me and I will be reviewing my account with Amazon.
AmazonHelp: Hi Joe I am sorry to hear that! Have you received your delivery yet? ^HS
User_242734: No I haven't and it is already 24 hours late.
User_242734: I would understand if nobody was at my address but myself and 


DialogID 464283 with Company: AmazonHelp
User_225395: After 3 online chat attempts, and several call centre reps, including a supervisor, I reached a logistics rep who says the delivery driver is doing sketchy things on his route, and I have yet to get my delivery. #badcustomerservice
AmazonHelp: I'm so sorry for the frustration with this delivery. I'd like a member of our Social Media team to take a look into this. Please provide your details here: 

When to choose different indexing types?

## Multi-tenancy

In [21]:
# Delete existing collection if it exists
client.collections.delete(COLLECTION_NAME)

In [22]:
from weaviate.classes.config import Property, DataType, Configure

client.collections.create(
    name=COLLECTION_NAME,
    properties=[
        Property(name="text", data_type=DataType.TEXT),
        Property(name="dialogue_id", data_type=DataType.INT),
        Property(name="company_author", data_type=DataType.TEXT),
        Property(name="created_at", data_type=DataType.DATE),
    ],
    vectorizer_config=[
        Configure.NamedVectors.text2vec_cohere(
            name="text_with_metadata",
            source_properties=["text", "company_author"],
            vector_index_config=Configure.VectorIndex.hnsw(),
            model="embed-multilingual-light-v3.0"
        ),
    ],
    generative_config=Configure.Generative.cohere(model="command-r"),
    multi_tenancy_config=Configure.multi_tenancy(
        enabled=True,
        auto_tenant_creation=True
    )
)

<weaviate.collections.collection.sync.Collection at 0x16929c310>

In [23]:
from tqdm import tqdm

file_path = "data/twitter_customer_support.h5"

with client.batch.fixed_size(batch_size=200) as batch:
    for item in tqdm(get_support_data(file_path), desc="Importing to Weaviate"):
        batch.add_object(
            collection=COLLECTION_NAME,
            uuid=item["uuid"],
            properties=item["properties"],
            vector=item["vector"],
            tenant=item["tenant"]
        )

# Handle any errors
if len(client.batch.failed_objects) > 0:
    print("⚠️  Some objects failed to import:")
    print(f"   Failed: {len(client.batch.failed_objects)} objects")
    print("   First few errors:", client.batch.failed_objects[:3])
else:
    print("✅ All objects imported successfully!")

Importing to Weaviate: 100000it [00:36, 2705.65it/s]


✅ All objects imported successfully!


Search examples with multi-tenancy

In [24]:
# This will fail! (Does not have tenant specified)

response = chats.query.near_text(
    query="Problem with delivery",
    limit=3
)

for o in response.objects:
    print(f"\n\nDialogID {o.properties['dialogue_id']} with Company: {o.properties['company_author']}")
    print("=" * 50)
    print(o.properties["text"][:500])

WeaviateQueryError: Query call with protocol GRPC search failed with message explorer: get class: concurrentTargetVectorSearch): explorer: get class: vector search: object vector search at index supportchat: class SupportChat has multi-tenancy enabled, but request was without tenant.

In [25]:
tenant = chats.with_tenant(tenant_names[1])

response = tenant.query.near_text(
    query="Problem with delivery",
    limit=3
)

for o in response.objects:
    print(f"\n\nDialogID {o.properties['dialogue_id']} with Company: {o.properties['company_author']}")
    print("=" * 50)
    print(o.properties["text"][:500])



DialogID 734362 with Company: AldiUK
User_295708: looks like a problem with 3d printer delivery :( was tracking fine this morning. https://t.co/giDq3Bh9e7
AldiUK: We're sorry to hear this, Ed. Please could you send us a DM with your order number so that our customer services team can look into this further? Thanks.
User_295708: it got delivered today, please offer choice not to use hermes delivery....i would have paid more to get a different service!


DialogID 276880 with Company: AldiUK
User_182159: - @127161 lied about trying to delivery package on Friday. No updates, no redelivery, nothing. How do I deal with YOU?
AldiUK: @127161 We're sorry to hear this, Gareth. Please could you send us a DM with your order number so we can look into this further?


DialogID 455419 with Company: NikeSupport
User_223105: hi, I have just had a part of my order that has been cancelled, meaning half my delivery will not be fulfilled. I ordered 3 days ago, delivery is due tomorrow. This is unacceptab

In [26]:
from weaviate.classes.query import Filter

tenant = chats.with_tenant(tenant_names[0])

response = tenant.query.near_text(
    query="Delay compensation",
    limit=3,
    # filters=Filter.by_property("company_author").like("*Air*")
    filters=Filter.by_property("company_author").equal("Uber_Support")
)

for o in response.objects:
    print(f"\n\nDialogID {o.properties['dialogue_id']} with Company: {o.properties['company_author']}")
    print("=" * 50)
    print(o.properties["text"][:500])

## Multi-node example

In [27]:
!docker-compose down && docker compose -f docker-compose-three-nodes.yml up -d

[1A[1B[0G[?25l[+] Running 0/0
 [33m⠋[0m Container scalable-rag-workshop-weaviate_anon-1  Stopping               [34m0.1s [0m
[?25h[1A[1A[0G[?25l[+] Running 0/1
 [33m⠙[0m Container scalable-rag-workshop-weaviate_anon-1  Stopping               [34m0.2s [0m
[?25h[1A[1A[0G[?25l[+] Running 0/1
 [33m⠹[0m Container scalable-rag-workshop-weaviate_anon-1  Stopping               [34m0.3s [0m
[?25h[1A[1A[0G[?25l[+] Running 0/1
 [33m⠸[0m Container scalable-rag-workshop-weaviate_anon-1  Stopping               [34m0.4s [0m
[?25h[1A[1A[0G[?25l[+] Running 0/1
 [33m⠼[0m Container scalable-rag-workshop-weaviate_anon-1  Stopping               [34m0.5s [0m
[?25h[1A[1A[0G[?25l[+] Running 0/1
 [33m⠴[0m Container scalable-rag-workshop-weaviate_anon-1  Stopping               [34m0.6s [0m
[?25h[1A[1A[0G[?25l[+] Running 0/1
 [33m⠦[0m Container scalable-rag-workshop-weaviate_anon-1  Stopping               [34m0.7s [0m
[?25h[1A[1A[0G[?25l[+] Runni

In [28]:
import weaviate
import os

COLLECTION_NAME = "SupportChat"

# Connect to Weaviate
client = weaviate.connect_to_local(
    port=8080,
    headers={
        # Pass inference provider keys as required (we are using Cohere today)
        "X-COHERE-API-KEY": os.environ["COHERE_API_KEY"],
    },
)

In [29]:
# Delete existing collection if it exists
client.collections.delete(COLLECTION_NAME)

In [30]:
from weaviate.classes.config import Property, DataType, Configure

client.collections.create(
    name=COLLECTION_NAME,
    properties=[
        Property(name="text", data_type=DataType.TEXT),
        Property(name="dialogue_id", data_type=DataType.INT),
        Property(name="company_author", data_type=DataType.TEXT),
        Property(name="created_at", data_type=DataType.DATE),
    ],
    vectorizer_config=[
        Configure.NamedVectors.text2vec_cohere(
            name="text_with_metadata",
            source_properties=["text", "company_author"],
            vector_index_config=Configure.VectorIndex.hnsw(),
            model="embed-multilingual-light-v3.0"
        ),
    ],
    generative_config=Configure.Generative.cohere(model="command-r"),
)

<weaviate.collections.collection.sync.Collection at 0x169dd6ed0>

In [31]:
from tqdm import tqdm

file_path = "data/twitter_customer_support.h5"

chats = client.collections.get(COLLECTION_NAME)

with chats.batch.fixed_size(batch_size=200) as batch:
    for item in tqdm(get_support_data(file_path), desc="Importing to Weaviate"):
        batch.add_object(
            uuid=item["uuid"],
            properties=item["properties"],
            vector=item["vector"]
        )

print(f"🎉 Import completed! {len(chats)} objects imported.")

# Handle any errors
if len(chats.batch.failed_objects) > 0:
    print("⚠️  Some objects failed to import:")
    print(f"   Failed: {len(chats.batch.failed_objects)} objects")
    print("   First few errors:", chats.batch.failed_objects[:3])
else:
    print("✅ All objects imported successfully!")

Importing to Weaviate: 100000it [00:37, 2647.86it/s]

🎉 Import completed! 100000 objects imported.
✅ All objects imported successfully!





## Replication setup

In [32]:
# Delete existing collection if it exists
client.collections.delete(COLLECTION_NAME)

In [33]:
from weaviate.classes.config import Property, DataType, Configure

client.collections.create(
    name=COLLECTION_NAME,
    properties=[
        Property(name="text", data_type=DataType.TEXT),
        Property(name="dialogue_id", data_type=DataType.INT),
        Property(name="company_author", data_type=DataType.TEXT),
        Property(name="created_at", data_type=DataType.DATE),
    ],
    vectorizer_config=[
        Configure.NamedVectors.text2vec_cohere(
            name="text_with_metadata",
            source_properties=["text", "company_author"],
            vector_index_config=Configure.VectorIndex.hnsw(),
            model="embed-multilingual-light-v3.0"
        ),
    ],
    generative_config=Configure.Generative.cohere(model="command-r"),
    replication_config=Configure.replication(factor=3)
)

<weaviate.collections.collection.sync.Collection at 0x11c1fc390>

In [34]:
from tqdm import tqdm

file_path = "data/twitter_customer_support.h5"

chats = client.collections.get(COLLECTION_NAME)

with chats.batch.fixed_size(batch_size=200) as batch:
    for item in tqdm(get_support_data(file_path), desc="Importing to Weaviate"):
        batch.add_object(
            uuid=item["uuid"],
            properties=item["properties"],
            vector=item["vector"]
        )

print(f"🎉 Import completed! {len(chats)} objects imported.")

# Handle any errors
if len(chats.batch.failed_objects) > 0:
    print("⚠️  Some objects failed to import:")
    print(f"   Failed: {len(chats.batch.failed_objects)} objects")
    print("   First few errors:", chats.batch.failed_objects[:3])
else:
    print("✅ All objects imported successfully!")

Importing to Weaviate: 100000it [00:58, 1705.21it/s]

🎉 Import completed! 100000 objects imported.
✅ All objects imported successfully!



