In [None]:
!pip install qdrant-client

Collecting qdrant-client
  Downloading qdrant_client-1.15.1-py3-none-any.whl.metadata (11 kB)
Collecting portalocker<4.0,>=2.7.0 (from qdrant-client)
  Downloading portalocker-3.2.0-py3-none-any.whl.metadata (8.7 kB)
Downloading qdrant_client-1.15.1-py3-none-any.whl (337 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m337.3/337.3 kB[0m [31m6.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading portalocker-3.2.0-py3-none-any.whl (22 kB)
Installing collected packages: portalocker, qdrant-client
Successfully installed portalocker-3.2.0 qdrant-client-1.15.1


In [None]:
from qdrant_client import QdrantClient
from qdrant_client.http import models
import os


QDRANT_API_KEY=os.getenv("QDRANT_API_KEY")
QDRANT_URL=os.getenv("QDRANT_URL")

source_client = QdrantClient(
    url=QDRANT_URL,
    api_key=QDRANT_API_KEY,
    timeout=300
)

QDRANT_API_KEY_1=os.getenv("QDRANT_API_KEY_1")
QDRANT_URL_1=os.getenv("QDRANT_URL_1")

target_client = QdrantClient(
    url=QDRANT_URL_1,
    api_key=QDRANT_API_KEY_1,
    timeout=300
)


source_collection = ""
target_collection = ""




In [None]:
source_info = source_client.get_collection(source_collection)
vectors_config = source_info.config.params.vectors

# Only pass allowed diff configs for Cloud
hnsw_config_diff = models.HnswConfigDiff(
    m=source_info.config.hnsw_config.m,
    ef_construct=source_info.config.hnsw_config.ef_construct,
    full_scan_threshold=source_info.config.hnsw_config.full_scan_threshold,
    max_indexing_threads=source_info.config.hnsw_config.max_indexing_threads
)

optimizer_config_diff = models.OptimizersConfigDiff(
    deleted_threshold=source_info.config.optimizer_config.deleted_threshold,
    vacuum_min_vector_number=source_info.config.optimizer_config.vacuum_min_vector_number,
    default_segment_number=source_info.config.optimizer_config.default_segment_number,
    indexing_threshold=source_info.config.optimizer_config.indexing_threshold,
    flush_interval_sec=source_info.config.optimizer_config.flush_interval_sec
)


target_client.recreate_collection(
    collection_name=target_collection,
    vectors_config=vectors_config,
    shard_number=source_info.config.params.shard_number,
    replication_factor=source_info.config.params.replication_factor,
    write_consistency_factor=source_info.config.params.write_consistency_factor,
    on_disk_payload=source_info.config.params.on_disk_payload,
    hnsw_config=hnsw_config_diff,
    optimizers_config=optimizer_config_diff
)



print(f" Created collection {target_collection} with same config as {source_collection}")

  target_client.recreate_collection(


 Created collection ESA EO Knowledge Base with same config as esa-rag-scraped


In [None]:
target_client.create_payload_index(
    collection_name=target_collection,
    field_name="year",
    field_schema=models.PayloadSchemaType.INTEGER
)

target_client.create_payload_index(
    collection_name=target_collection,
    field_name="title",
    field_schema=models.TextIndexParams(
        type="text",
        tokenizer="word",
        min_token_len=1,
        max_token_len=50,
        lowercase=True
    )
)

UpdateResult(operation_id=46379, status=<UpdateStatus.COMPLETED: 'completed'>)

In [None]:
target_client.create_payload_index(
    collection_name=target_collection,
    field_name="n_citations",
    field_schema=models.PayloadSchemaType.INTEGER
)

target_client.create_payload_index(
    collection_name=target_collection,
    field_name="journal",
    field_schema=models.TextIndexParams(
        type="text",
        tokenizer="word",
        min_token_len=1,
        max_token_len=20,
        lowercase=True
    )
)

UpdateResult(operation_id=46383, status=<UpdateStatus.COMPLETED: 'completed'>)

In [None]:
from tqdm import tqdm
from qdrant_client import models

total_vectors = source_client.get_collection(source_collection).points_count

limit = 1000
copied = 0
next_offset = None

with tqdm(total=total_vectors, desc="Copying points", unit="pt") as pbar:
    while True:
        points, next_offset = source_client.scroll(
            collection_name=source_collection,
            limit=limit,
            offset=next_offset,
            with_payload=True,
            with_vectors=True
        )

        if not points:
            break

        target_points = [
            models.PointStruct(
                id=p.id,
                vector=p.vector,
                payload=p.payload
            )
            for p in points
        ]

        target_client.upsert(
            collection_name=target_collection,
            points=target_points,
            wait=True
        )

        copied += len(points)
        pbar.update(len(points))

        if next_offset is None:
            break

print(f" Done! Total copied: {copied} points from {source_collection} to {target_collection}")



Copying points: 100%|██████████| 15614/15614 [01:43<00:00, 150.67pt/s]

 Done! Total copied: 15614 points from esa-rag-scraped to ESA EO Knowledge Base





In [None]:
target_client.update_collection(
    collection_name=target_collection,
    optimizers_config=models.OptimizersConfigDiff(
        indexing_threshold=500
    )
)

True