In [1]:
pip install qdrant-client fastembed sentence-transformers numpy

Defaulting to user installation because normal site-packages is not writeable
Collecting qdrant-client
  Downloading qdrant_client-1.13.3-py3-none-any.whl (306 kB)
[K     |████████████████████████████████| 306 kB 5.8 MB/s eta 0:00:01
[?25hCollecting fastembed
  Downloading fastembed-0.6.0-py3-none-any.whl (85 kB)
[K     |████████████████████████████████| 85 kB 10.5 MB/s eta 0:00:01
[?25hCollecting sentence-transformers
  Downloading sentence_transformers-3.4.1-py3-none-any.whl (275 kB)
[K     |████████████████████████████████| 275 kB 12.9 MB/s eta 0:00:01
[?25hCollecting numpy
  Downloading numpy-2.0.2-cp39-cp39-macosx_14_0_arm64.whl (5.3 MB)
[K     |████████████████████████████████| 5.3 MB 13.0 MB/s eta 0:00:01
[?25hCollecting grpcio-tools>=1.41.0
  Downloading grpcio_tools-1.71.0-cp39-cp39-macosx_10_14_universal2.whl (6.0 MB)
[K     |████████████████████████████████| 6.0 MB 28.5 MB/s eta 0:00:01
[?25hCollecting grpcio>=1.41.0
  Downloading grpcio-1.71.0-cp39-cp39-macosx_10_

In [None]:
import random
import torch
from tqdm import tqdm
from sentence_transformers import SentenceTransformer
from fastembed.sparse import SparseTextEmbedding

# Enable Metal GPU acceleration
device = "mps" if torch.backends.mps.is_available() else "cpu"
torch.mps.set_per_process_memory_fraction(0.75)  # Prevent OOM

# Generate synthetic dataset
documents = [
    f"Product review {i}: Features include {random.choice(['wireless', '4K', 'OLED', 'smart'])} tech"
    for i in range(1_000_000)
]

# Initialize models with GPU optimizations
dense_model = SentenceTransformer("all-MiniLM-L6-v2", device=device).half()  # FP16 precision
sparse_model = SparseTextEmbedding( model_name="prithivida/Splade_PP_en_v1", providers=["metal"]  # Metal-accelerated ONNX 
                                   )

# Configure batch processing parameters
batch_size = 1024  # Optimal for M3's GPU architecture
data_points = []

# Add a progress bar using tqdm
with tqdm(total=len(documents), desc="Data Points Progress") as progress_bar:
    for batch_idx in range(0, len(documents), batch_size):
        batch_docs = documents[batch_idx:batch_idx + batch_size]
        
        # Generate dense embeddings (GPU-accelerated)
        with torch.inference_mode(), torch.autocast(device_type=device):
            dense_vectors = dense_model.encode(
                batch_docs,
                batch_size=batch_size,
                convert_to_numpy=True
            )
        
        # Generate sparse embeddings (Metal-optimized)
        sparse_vectors = list(sparse_model.embed(
            batch_docs,
            batch_size=batch_size
        ))
        
        # Batch processing for payloads
        batch_user_ids = [f"user_{random.randint(1,10)}" for _ in batch_docs]
        
        # Format for Qdrant and update progress bar
        for i, (doc, dense_vec, sparse_vec) in enumerate(zip(batch_docs, dense_vectors, sparse_vectors)):
            data_points.append({
                "id": batch_idx + i,
                "vector": {
                    "dense": dense_vec.tolist(),
                    "sparse": {
                        "indices": sparse_vec.indices.tolist(),
                        "values": sparse_vec.values.tolist()
                    }
                },
                "payload": {
                    "text": doc,
                    "user_id": batch_user_ids[i]
                }
            })
            progress_bar.update(1)  # Increment progress bar by 1 for each data point created

# Optional: Force GPU memory cleanup
torch.mps.empty_cache()


Fetching 5 files: 100%|██████████| 5/5 [00:29<00:00,  5.89s/it]


In [1]:
from qdrant_client import QdrantClient, models

client = QdrantClient(host="localhost", port=6333)

client.create_collection(
    collection_name="hybrid-search-demo-gpu",
    vectors_config={
        "dense": models.VectorParams(
            size=768,
            distance=models.Distance.COSINE,
            hnsw_config=models.HnswConfigDiff(
                gpu_indexing=True,  # Enable GPU acceleration
                gpu_resources=models.GpuResourceConfig(
                    device_id=0,
                    memory_limit=4096  # 4GB GPU memory allocation
                )
            )
        )
    },
    sparse_vectors_config={
        "sparse": models.SparseVectorParams(
            index=models.SparseIndexParams(
                on_disk=False,  # Keep in GPU memory
                gpu_acceleration=True
            )
        )
    },
    optimizers_config=models.OptimizersConfigDiff(
        indexing_threshold=20000,
        memmap_threshold=20000,       
        search_threshold=20000
    ),
    shard_number=3,
    replication_factor=2,
)

  from .autonotebook import tqdm as notebook_tqdm


AttributeError: module 'qdrant_client.models' has no attribute 'GpuResourceConfig'

In [None]:
for idx, dense_vec, sparse_vec, payload in data_points:
    client.upsert(
        collection_name="hybrid-search-demo",
        points=[
            models.PointStruct(
                id=idx,
                vector={"dense": dense_vec, "sparse": sparse_vec},
                payload=payload,
            )
        ]
    )


In [None]:
from colbert import Searcher

# Initialize ColBERT for reranking
colbert_searcher = Searcher(index="colbertv2.0")

def hybrid_search(query, user_filter=None):
    # Generate query embeddings
    dense_query_vec = dense_model.encode(query)
    sparse_query_vec = sparse_model.embed(query)

    # Perform hybrid search in Qdrant
    results = client.search_batch(
        collection_name="hybrid-search-demo",
        requests=[
            models.SearchRequest(
                vector=models.NamedVector(name="dense", vector=dense_query_vec),
                filter=models.Filter(
                    must=[models.FieldCondition(key="user_id", match=user_filter)]
                ) if user_filter else None,
                limit=100,
                with_payload=True,
            ),
            models.SearchRequest(
                vector=models.NamedSparseVector(name="sparse", vector=sparse_query_vec),
                filter=models.Filter(
                    must=[models.FieldCondition(key="user_id", match=user_filter)]
                ) if user_filter else None,
                limit=100,
                with_payload=True,
            ),
        ]
    )

    # Combine results (reciprocal rank fusion)
    fused_results = reciprocal_rank_fusion(results[0], results[1])

    # Rerank results using ColBERT
    reranked_results = colbert_searcher.rerank(query, [res.payload["text"] for res in fused_results])
    
    return reranked_results[:10]


In [None]:
from qdrant_client.fusion import reciprocal_rank_fusion

def hybrid_search(query: str, user_id: str):
    # Generate GPU-accelerated query vectors
    query_dense = list(dense_model.query_embed(query))[0]
    query_sparse = list(sparse_model.query_embed(query))[0]
    
    # Parallel GPU-accelerated searches
    results = client.search_batch(
        collection_name="hybrid-gpu-demo",
        requests=[
            models.SearchRequest(
                vector=models.NamedVector(
                    name="dense",
                    vector=query_dense.tolist()
                ),
                filter=models.FieldCondition(
                    key="user_id",
                    match=models.MatchValue(value=user_id)
                ),
                limit=100
            ),
            models.SearchRequest(
                vector=models.NamedSparseVector(
                    name="sparse",
                    vector=query_sparse.as_object()
                ),
                filter=models.FieldCondition(
                    key="user_id",
                    match=models.MatchValue(value=user_id)
                ),
                limit=100
            )
        ]
    )
    
    # GPU-accelerated reranking
    fused = reciprocal_rank_fusion(
        results[0],
        results[1],
        method="gpu"  # Use Metal-accelerated RRF
    )
    
    return fused[:10]


In [None]:
query = "Sample query about product features"
user_filter = {"value": "user_5"}  # Example filter by user_id

results = hybrid_search(query=query, user_filter=user_filter)

for idx, (score, text) in enumerate(results):
    print(f"{idx + 1}. [Score: {score:.2f}] {text}")
