In [1]:
%%capture
!pip install qdrant-client
!pip install tqdm
!pip install transformers
!pip install sentence-transformers
!pip install accelerate

In [None]:
from qdrant_client.models import VectorParams, Distance
from qdrant_client import QdrantClient, models
from qdrant_client.http.models import VectorParams, Distance, HnswConfigDiff, OptimizersConfigDiff,PointStruct
import os

QDRANT_API_KEY=os.getenv('QDRANT_API_KEY')
QDRANT_URL=os.getenv('QDRANT_URL')

client = QdrantClient(
    url=QDRANT_URL,
    api_key=QDRANT_API_KEY,
    timeout=300
)

QDRANT_API_KEY_1=os.getenv('QDRANT_API_KEY_1')
QDRANT_URL_1=os.getenv('QDRANT_URL_1')

new_client = QdrantClient(
    url=QDRANT_URL_1,
    api_key=QDRANT_API_KEY_1,
    timeout=300
)

source_collection = "esa-data-indus-512-1024-recursive"
quantized_collection = "esa-data-qwen-1024-recursive"

In [7]:
client.create_collection(
    collection_name=quantized_collection,
    vectors_config=models.VectorParams(size=2560, distance=models.Distance.COSINE,on_disk=True),
    shard_number=8,  # Increase shards for large data
    on_disk_payload=True,
    quantization_config=models.BinaryQuantization(
        binary=models.BinaryQuantizationConfig(
       #     encoding=models.BinaryQuantizationEncoding.TWO_BITS,
            always_ram=False,
        ),
    ),
)

True

In [8]:
client.update_collection(
    collection_name=quantized_collection,
    hnsw_config=models.HnswConfigDiff(
        m=16,
        ef_construct=128,
        full_scan_threshold=10_000,
        max_indexing_threads=2,
        on_disk=True
    ),
    optimizers_config=models.OptimizersConfigDiff(
            indexing_threshold=20000,          # start indexing after 20k vectors per segment
            memmap_threshold=5000,             # smaller than indexing_threshold; helps with RAM limits
            deleted_threshold=0.2,             # when >20% deleted, trigger segment cleanup
            vacuum_min_vector_number=1000,     # minimum segment size for vacuuming
            default_segment_number=4,          # spread data across 4 segments instead of 2
            max_segment_size=6_000_000,       # keep segments smaller (avoid huge merges)
            max_optimization_threads=1,        # limit parallel merges (less memory/disk pressure)
        ),
)

True

In [None]:
from concurrent.futures import ThreadPoolExecutor, as_completed
from tqdm import tqdm
import time
from qdrant_client.http.exceptions import UnexpectedResponse
import torch
from transformers import AutoTokenizer, AutoModel
from sentence_transformers import SentenceTransformer

MAX_RETRIES = 5
RETRY_DELAY = 2  # seconds



class qwen_embedder:
    def __init__(self, model_name="Qwen/Qwen3-Embedding-4B"):
        # Load the sentence-transformers model
        self.model = SentenceTransformer(
                                    model_name,
                                    model_kwargs={
                                        "dtype": "auto",       # important: will use float16/bfloat16 automatically
                                        "device_map": "auto",
                                    },
                                    tokenizer_kwargs={"padding_side": "left",
                                                      "max_length": 2048,
                                                      "truncation": True                                                      
                                                      }
                                                      )

    def embed_documents(self, 
                        texts,
                        batch_size=4, 
                        padding=True, 
                        truncation=True, 
                        max_length=2048, 
                        normalize=True):
        """
        Encodes a list of texts into embeddings.

        Args:
            texts (list[str]): Documents to embed
            padding (bool/str): True = dynamic padding, 'max_length' = fixed length
            truncation (bool): Whether to truncate texts beyond max_length
            max_length (int): Max tokens allowed
            normalize (bool): Whether to L2 normalize embeddings

        Returns:
            np.ndarray: Embeddings array (num_texts x embedding_dim)
        """
        embeddings = self.model.encode(
            texts,
            batch_size=batch_size, 
            padding=padding,
            truncation=truncation,
            max_length=max_length,
            normalize_embeddings=normalize,
            convert_to_numpy=True,
            convert_to_tensor=False 
        )
        embeddings = embeddings.tolist()
        return embeddings
    

    def embed_query(self,query):
            
        embeddings = self.model.encode( query,prompt_name="query")

        embeddings = embeddings.tolist()
        return embeddings


In [4]:
emb=qwen_embedder()

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
from tqdm import tqdm
#Get the offset id if collection migration stopped in middle
SCROLL_BATCH = 1000 # increase batch size to reduce API calls
TARGET = 20000
offset = None
processed = 0

with tqdm(total=TARGET, desc="Skipping points", unit="pts") as pbar:
    while True:
        scroll_result, next_offset = new_client.scroll(
            collection_name=source_collection,
            limit=SCROLL_BATCH,
            offset=offset,
            with_vectors=False,
            with_payload=False  # fastest: no vectors, no payloads
        )

        if not scroll_result:
            print("Reached end of collection before target")
            break

        batch_count = len(scroll_result)
        processed += batch_count
        pbar.update(batch_count)

        if processed >= TARGET:
            print(f"Reached target at offset: {next_offset}")
            break

        offset = next_offset

Skipping points: 100%|██████████| 20000/20000 [00:00<00:00, 23630.52pts/s]

Reached target at offset: 154619806622784387





In [None]:
SCROLL_BATCH = 2000
UPLOAD_BATCH = 1000
MAX_WORKERS = 2
TOTAL_POINTS = 1377173
processed=0
offset = None # copy the offset from above to put here to continue migration if first time keep None 

def upload_batch(points):
    if points:
        client.upload_points(collection_name=quantized_collection, points=points)
    return len(points)

In [7]:
from concurrent.futures import ThreadPoolExecutor, as_completed
from tqdm import tqdm
import time
from qdrant_client.http.exceptions import UnexpectedResponse
from qdrant_client.models import PointStruct

MAX_RETRIES = 5
RETRY_DELAY = 2  # seconds

def retry_qdrant(func, *args, **kwargs):
    for attempt in range(1, MAX_RETRIES + 1):
        try:
            return func(*args, **kwargs)
        except (UnexpectedResponse, ConnectionError, TimeoutError) as e:
            print(f"Attempt {attempt} failed: {e}")
            if attempt < MAX_RETRIES:
                time.sleep(RETRY_DELAY)
            else:
                raise

def embed_points_batch(points):
    """
    Generate embeddings for a batch of points using the 'content' field.
    Returns a list of PointStruct with vectors.
    """
    contents = []
    point_ids = []
    payloads = []

    for p in points:
        content = p.payload.get("content")
        if content is None:
            raise ValueError(f"Point {p.id} has no 'content' field")
        contents.append(content)
        point_ids.append(p.id)
        payloads.append(p.payload)

    # Generate embeddings for the entire batch
    vectors = emb.embed_documents(contents)

    # Build PointStructs with new vectors
    embedded_points = [
        PointStruct(id=pid, vector=vec, payload=pl)
        for pid, vec, pl in zip(point_ids, vectors, payloads)
    ]
    return embedded_points


In [None]:
with tqdm(total=TOTAL_POINTS-processed, desc="Migrating points", unit="pts") as pbar:
    while processed < TOTAL_POINTS:
        # Scroll points without fetching vectors from Qdrant
        scroll_result, next_offset = retry_qdrant(
            new_client.scroll,
            collection_name=source_collection,
            limit=SCROLL_BATCH,
            offset=offset,
            with_vectors=False  # important
        )

        if not scroll_result:
            break

        # Embed points in batch
        points = embed_points_batch(scroll_result)

        # Split into batches for parallel upload
        batches = [points[i:i+UPLOAD_BATCH] for i in range(0, len(points), UPLOAD_BATCH)]

        with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
            futures = [executor.submit(retry_qdrant, upload_batch, b) for b in batches]
            for f in as_completed(futures):
                uploaded = f.result()
                pbar.update(uploaded)

        if next_offset is None:
            break
        offset = next_offset

        processed+=SCROLL_BATCH

print("Collection migrated with new embeddings from 'content' field")