In [None]:
'''
Demonstrates how to migrate a databse from qdrant to weaviate

Note this is not being used for ASK as the intent for Weaviate is to utilize its collections feature to separate out document level data from page level data to reduce overhead while enabling rovust filtering
'''

In [None]:
%pip install --upgrade pip
%pip install --pre -U "weaviate-client==4.*"
%pip install qdrant-client

In [2]:
import random
import json
import weaviate
import weaviate.classes as wvc

from qdrant_client import QdrantClient, models
from qdrant_client.http.models import Distance, VectorParams
from weaviate.util import generate_uuid5

from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv())


In [3]:
api_key = os.environ.get("QDRANT_API_KEY")
url = os.environ.get("QDRANT_URL")

qdrant_client = QdrantClient(
        url=QDRANT_URL,
        api_key=QDRANT_API_KEY
)

url = os.getenv("WEAVIATE_URL_COMP")
api_key = os.getenv("WEAVIATE_API_KEY_COMP")


weaviate_client = weaviate.connect_to_wcs(
    cluster_url=WEAVIATE_URL_COMP,
    auth_credentials=weaviate.AuthApiKey(WEAVIATE_API_KEY_COMP)
)

In [4]:
def create_weaviate_collection(collection_name):
    return weaviate_client.collections.create(
        name=collection_name,
        vectorizer_config=wvc.Configure.Vectorizer.text2vec_openai(),
        generative_config=wvc.Configure.Generative.openai(),
    )

In [5]:
# this migrates both the payload/metadata and the vectors as evidenced in the .scroll parameters below

def migrate_data(collection_name, batch_size=100):
    weaviate_collection = weaviate_client.collections.get(collection_name)
    if weaviate_collection is None:
        weaviate_collection = create_weaviate_collection(collection_name)
    else:
        print("Collection already exists, deleting and recreating")
        weaviate_client.collections.delete(collection_name)
        weaviate_collection = create_weaviate_collection(collection_name)
    offset = 0
    while True:
        print(f"Batch: {offset}")
        # Retrieve a batch of data from Qdrant
        points = qdrant_client.scroll(
            collection_name=collection_name,
            limit=batch_size,
            offset=offset,
            with_payload=True,
            with_vectors=True
        )

        if points[1] is None:
            break

        data_objects = []

        for point in points[0]:
            properties = point.payload
            data_object = weaviate.classes.DataObject(
                vector=point.vector,
                properties=properties,
                uuid=generate_uuid5(point.id),
            )
            data_objects.append(data_object)

        weaviate_collection.data.insert_many(data_objects)
        offset = points[1]

In [6]:
collections = qdrant_client.get_collections()
for collection in collections.collections:
    collection_name = collection.name
    get_collection = qdrant_client.get_collection(collection_name)
    migrate_data(collection_name, 100)

Collection already exists, deleting and recreating
Batch: 0




Batch: 026872c3-a733-4014-a90a-570e3e8fa765
Batch: 04bd77b1-f5ee-4f35-812b-b92ff40b1b4c
Batch: 079872c6-5e90-48ab-8e49-c0ced7098ceb
Batch: 0a15fa19-c428-4ee4-b868-e7ee191b0d4a
Batch: 0c4cdb50-15e6-4072-a69d-1aa7b8cb443d
Batch: 0eae2648-6aca-40c7-a473-49db16e1264c
Batch: 10a3bc93-6088-4f26-9b60-ec4646f53bdc
Batch: 13330521-8f45-4926-bef0-15a8d96fe2ed
Batch: 158d2f33-9aec-4578-98e6-152cf9a6c2cd
Batch: 17ad5df1-50fa-4468-bd01-400621ac59bb
Batch: 1aa6bb3d-c159-494a-9e04-e8137c66900e
Batch: 1d485e66-aa88-4e3e-8592-2d3cd985f883
Batch: 1fe5880d-6c05-48d3-ad83-9cc216df4a88
Batch: 2248b9c9-cce6-4085-867c-181aae6171bd
Batch: 245224f1-10b0-4a61-b732-d77bc32d916e
Batch: 2669d4d9-3b72-4d8d-91d0-d643104bb63c
Batch: 293ab4c3-986f-4689-a267-108b2765361b
Batch: 2be39ec8-812d-4b35-ad10-71d180bf7ae4
Batch: 2e3c3321-faaa-47d0-a407-5a3e12b9f6bf
Batch: 3058756a-395e-4462-809e-31811ec66718
Batch: 32c86af2-cf04-4373-8cb2-efcc2321c4c7
Batch: 35ad389c-fb21-49c2-b2c3-4a0a180842da
Batch: 385bfd39-8f6c-4f26-b870-2