In [None]:
pip -q install qdrant-client fastembed sentence-transformers numpy pandas pyarrow colbert accelerate

In [None]:
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"  # Disable tokenizer warnings
os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"  # M3 GPU fallback


In [None]:
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer
from fastembed.sparse import SparseTextEmbedding
import torch
import sys
import gc

# Load Seattle Police data from official source
url = "~/Downloads/SPD_Crime_Data__2008-Present.csv"
# url = "https://data.seattle.gov/api/views/tazs-3rd5/rows.csv?accessType=DOWNLOAD"
df = pd.read_csv(url)

# Column names to use
crime_type_col = 'NIBRS Offense Code Description'
subcategory_col = 'Offense Sub Category'
location_col = 'Neighborhood'
date_col = 'Offense Date'
precinct_col = 'Precinct'

# Filter and prepare documents from police reports
df = df.dropna(subset=[crime_type_col, subcategory_col]).reset_index(drop=True)
documents = [
    f"Police report {i}: {row[crime_type_col]} - {row[subcategory_col]} " 
    f"at {row[location_col]} on {pd.to_datetime(row[date_col]).strftime('%Y-%m-%d')}"
    for i, row in df.iterrows()
]

# Memory optimization: Clear unused objects
del df
gc.collect()

# Initialize models with GPU optimizations
device = "mps" if torch.backends.mps.is_available() else "cpu"
torch.mps.set_per_process_memory_fraction(0.65)  # Increased safety margin

# Optimized model configuration
dense_model = SentenceTransformer("all-MiniLM-L6-v2", device=device)
dense_model = dense_model.half().eval()  # FP16 + inference mode

# Switch to CoreML-compatible sparse model
sparse_model = SparseTextEmbedding(
    model_name="Qdrant/bm42-all-minilm-l6-v2-attentions",
    providers=["CoreMLExecutionProvider"],
    provider_options={
        "MLComputeUnits": "CPUAndGPU",
        "RequireStaticInputShapes": "0",
        "EnableOnSubgraphs": "1"
    },
    quantize=True  # 4-bit quantization
)

# Batch processing configuration (reduced for 8GB RAM)
batch_size = 512  # Reduced from 1024
data_points = []
total_documents = len(documents)
print(f"\nTotal documents to process: {total_documents}")

for idx in range(0, total_documents, batch_size):
    batch_docs = documents[idx:idx+batch_size]
    
    # Process in smaller chunks
    for micro_batch in np.array_split(batch_docs, 4):
        # Generate embeddings with memory cleanup
        with torch.inference_mode(), torch.autocast(device_type=device):
            dense_vectors = dense_model.encode(micro_batch, convert_to_numpy=True)
            sparse_vectors = list(sparse_model.embed(micro_batch))
        
        # Format for Qdrant
        for i, (doc, dense_vec, sparse_vec) in enumerate(zip(micro_batch, dense_vectors, sparse_vectors)):
            data_points.append({
                "id": idx + i,
                "vector": {
                    "dense": dense_vec.tolist(),
                    "sparse": {
                        "indices": sparse_vec.indices.tolist(),
                        "values": sparse_vec.values.tolist()
                    }
                },
                "payload": {
                    "text": doc,
                    "user_id": f"user_{(idx + i) % 10 + 1}"  # Simplified user assignment
                }
            })
            
        # Explicit memory cleanup
        del dense_vectors, sparse_vectors
        torch.mps.empty_cache()
        gc.collect()
    
    # Update status
    sys.stdout.write(f"\rProcessed: {len(data_points)}/{total_documents} | Batch size: {batch_size} | Mem usage: {torch.mps.current_allocated_memory()/1e6:.1f}MB")
    sys.stdout.flush()

print("\n\nProcessing complete!")


In [None]:
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer
from fastembed.sparse import SparseTextEmbedding
import torch
import sys
import gc

# Initialize models with GPU optimizations
device = "mps" if torch.backends.mps.is_available() else "cpu"
torch.mps.set_per_process_memory_fraction(0.65)  # Increased safety margin

# Optimized model configuration
dense_model = SentenceTransformer("all-MiniLM-L6-v2", device=device)
dense_model = dense_model.half().eval()  # FP16 + inference mode

# Switch to CoreML-compatible sparse model
sparse_model = SparseTextEmbedding(
    model_name="Qdrant/bm42-all-minilm-l6-v2-attentions",
    providers=["CoreMLExecutionProvider"],
    provider_options={
        "MLComputeUnits": "CPUAndGPU",
        "RequireStaticInputShapes": "0",
        "EnableOnSubgraphs": "1"
    },
    quantize=True  # 4-bit quantization
)

# Reverse processing configuration
batch_size = 512
data_points = []
total_documents = len(documents)
document_indices = reversed(range(total_documents))  # Create reverse index mapping

print(f"\nTotal documents to process (reverse order): {total_documents}")

# Process in reverse batches
for batch_num, idx in enumerate(range(total_documents - 1, -1, -batch_size)):
    start_idx = max(0, idx - batch_size + 1)
    end_idx = idx + 1
    batch_docs = documents[start_idx:end_idx]
    
    # Reverse batch to maintain original order within chunks
    batch_docs = batch_docs[::-1]
    batch_indices = range(idx, start_idx - 1, -1)

    # Process in smaller chunks
    for micro_batch, micro_indices in zip(
        np.array_split(batch_docs, 4),
        np.array_split(batch_indices, 4)
    ):
        with torch.inference_mode(), torch.autocast(device_type=device):
            dense_vectors = dense_model.encode(micro_batch, convert_to_numpy=True)
            sparse_vectors = list(sparse_model.embed(micro_batch))
        
        # Format with original indices
        for i, (doc, dense_vec, sparse_vec) in enumerate(zip(micro_batch, dense_vectors, sparse_vectors)):
            original_index = micro_indices[i]
            data_points.append({
                "id": original_index,
                "vector": {
                    "dense": dense_vec.tolist(),
                    "sparse": {
                        "indices": sparse_vec.indices.tolist(),
                        "values": sparse_vec.values.tolist()
                    }
                },
                "payload": {
                    "text": doc,
                    "user_id": f"user_{original_index % 10 + 1}"
                }
            })
        
        # Memory cleanup
        del dense_vectors, sparse_vectors
        torch.mps.empty_cache()
        gc.collect()

    # Progress tracking
    processed = min((batch_num + 1) * batch_size, total_documents)
    sys.stdout.write(f"\rProcessed: {processed}/{total_documents}")


In [None]:
total_documents = len(documents)
print(f"\nTotal documents to process: {total_documents}")

In [None]:
import json
print(json.dumps(data_points[0], indent=4))

In [None]:
from qdrant_client import QdrantClient, models

client = QdrantClient(url="http://localhost:6333")

# Create collection With additional index configuration
client.create_collection(
    collection_name="hybrid-search-demo",
    vectors_config={
        "dense": models.VectorParams(
            size=384,
            distance=models.Distance.COSINE,
            hnsw_config=models.HnswConfigDiff(
                m=16,
                ef_construct=100
            )
        )
    },
    sparse_vectors_config={
        "sparse": models.SparseVectorParams(
            index=models.SparseIndexParams(
                on_disk=False,
                full_scan_threshold=20000
            )
        )
    },
    optimizers_config=models.OptimizersConfigDiff(
        indexing_threshold=20000,
        memmap_threshold=20000
    ),
    shard_number=3,
    replication_factor=2
)

In [None]:
import sys
from qdrant_client import models
from qdrant_client import QdrantClient, models

client = QdrantClient(url="http://localhost:6333")

# Batch processing for better performance
batch_size = 500  # Adjust based on your system's memory
points_to_upsert = []
total_points = len(data_points)
batches_processed = 0
points_upserted = 0

for idx, point in enumerate(data_points):
    # Convert sparse vector to Qdrant's required format
    sparse_vector = models.SparseVector(
        indices=point["vector"]["sparse"]["indices"],
        values=point["vector"]["sparse"]["values"]
    )
    
    # Create PointStruct with proper vector configuration
    points_to_upsert.append(
        models.PointStruct(
            id=point["id"],
            vector={
                "dense": point["vector"]["dense"],
                "sparse": sparse_vector,
            },
            payload=point["payload"]
        )
    )
    
    # Upsert in batches
    if len(points_to_upsert) >= batch_size or idx == total_points - 1:
        client.upsert(
            collection_name="hybrid-search-demo",
            points=points_to_upsert
        )
        batches_processed += 1
        points_upserted += len(points_to_upsert)
        
        # Print progress on a single line
        sys.stdout.write(f"\rBatches processed: {batches_processed}, Points upserted: {points_upserted}/{total_points}")
        sys.stdout.flush()
        
        # Clear the batch
        points_to_upsert = []

# Final progress message on a new line
print("\nUpserting complete!")


In [None]:
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"

from qdrant_client import QdrantClient, models
from sentence_transformers import SentenceTransformer
from fastembed.sparse import SparseTextEmbedding
from fastembed.rerank.cross_encoder import TextCrossEncoder
from typing import List, Tuple

# Initialize Qdrant client
client = QdrantClient(host="localhost", port=6333)

# Initialize models
dense_model = SentenceTransformer("all-MiniLM-L6-v2")
sparse_model = SparseTextEmbedding("prithivida/Splade_PP_en_v1")
reranker = TextCrossEncoder(model_name='jinaai/jina-reranker-v2-base-multilingual')

def hybrid_search(query: str, user_filter: str = None) -> List[Tuple[float, str]]:
    # Generate embeddings
    dense_vec = dense_model.encode(query).tolist()
    sparse_embedding = next(sparse_model.embed(query))
    
    # Create Qdrant-compatible sparse vector
    sparse_query = models.SparseVector(
        indices=sparse_embedding.indices.tolist(),
        values=sparse_embedding.values.tolist()
    )
    
    # Build search requests with payload validation
    requests = [
        models.SearchRequest(
            vector=models.NamedVector(
                name="dense",
                vector=dense_vec
            ),
            filter=models.Filter(
                must=[models.FieldCondition(
                    key="user_id",
                    match=models.MatchValue(value=user_filter)
                )]
            ) if user_filter else None,
            limit=100,
            with_payload=["text"]  # Explicitly request text field
        ),
        models.SearchRequest(
            vector=models.NamedSparseVector(
                name="sparse",
                vector=sparse_query
            ),
            filter=models.Filter(
                must=[models.FieldCondition(
                    key="user_id",
                    match=models.MatchValue(value=user_filter)
                )]
            ) if user_filter else None,
            limit=100,
            with_payload=["text"]  # Explicitly request text field
        )
    ]
    
    # Execute search with error handling
    try:
        results = client.search_batch(
            collection_name="hybrid-search-demo",
            requests=requests
        )
    except Exception as e:
        print(f"Search failed: {str(e)}")
        return []

    # Combine results with empty check
    if len(results) < 2 or not results[0] or not results[1]:
        print("No results from one or both search types")
        return []

    fused = reciprocal_rank_fusion([results[0], results[1]])
    
    # Validate and prepare documents for reranking
    documents = []
    for hit in fused:
        if hit.payload and "text" in hit.payload:
            documents.append(hit.payload["text"])
        else:
            print(f"Skipping hit {hit.id} with missing text payload")
    
    if not documents:
        print("No valid documents to rerank")
        return []

    # Rerank results with type conversion
    reranked_scores = list(reranker.rerank(
        query=query,
        documents=documents,
        k=10
    ))

    # Pairing scores with their corresponding documents
    reranked = [(score, documents[idx]) for idx, score in enumerate(reranked_scores)]

    # Sort the results by score (from highest negative to lowest negative score)
    reranked_sorted = sorted(reranked, key=lambda x: x[0])

    # Return top 20 results
    return reranked_sorted[:20]

def reciprocal_rank_fusion(results_list: list, k: int = 60) -> list:
    """Safe RRF implementation with input validation"""
    fused_scores = {}
    all_hits = {}
    
    for results in results_list:
        if not isinstance(results, list):
            continue
            
        for rank, hit in enumerate(results, 1):
            if not hit.payload or "text" not in hit.payload:
                continue
                
            if hit.id not in fused_scores:
                fused_scores[hit.id] = 0.0
                all_hits[hit.id] = hit
            fused_scores[hit.id] += 1.0 / (rank + k)
    
    return sorted(all_hits.values(), key=lambda x: fused_scores.get(x.id, 0), reverse=True)


In [None]:
client = QdrantClient(url="http://localhost:6333")

query = "car crimes happened in may 2009"
user_filter = "user_2"  # Example filter by user_id

results = hybrid_search(query=query, user_filter=user_filter)

for idx, (score, text) in enumerate(results):
    print(f"{idx + 1}. [Score: {score:.2f}] {text}")
