# Phase 3 Evaluation: Dense vs. Hybrid Search

**Objective:** Quantitatively measure improvements from hybrid search implementation.

**Comparison:**
- **Baseline:** Dense-only search (Phase 1/2)
- **Enhanced:** Hybrid search with RRF fusion (Phase 3)

**Metrics:**
- Precision@K, Recall@K, MRR, Top-1 Accuracy, Latency

In [None]:
import sys
import os
from pathlib import Path
import time
import numpy as np

project_root = Path.cwd().parent.absolute()
os.chdir(project_root)
if str(project_root) not in sys.path:
    sys.path.insert(0, str(project_root))

from app.database.qdrant_client import QdrantService
from app.services.embedder import EmbeddingService
from app.utils.sparse_vector import create_sparse_vector
from qdrant_client.models import PointStruct, SparseVector

print("âœ… Setup complete")

## Test Dataset (20 documents)

In [None]:
test_docs = [
    {"id": 1, "text": "Machine Learning Workshop: Neural Networks", "cat": "ml"},
    {"id": 2, "text": "AI Conference: Transformers in Production", "cat": "ml"},
    {"id": 3, "text": "Introduction to Machine Learning Course", "cat": "ml"},
    {"id": 4, "text": "Advanced Neural Network Architectures", "cat": "ml"},
    {"id": 5, "text": "React and Node.js Bootcamp", "cat": "web"},
    {"id": 6, "text": "JavaScript Full-Stack Development", "cat": "web"},
    {"id": 7, "text": "Building Modern Web Apps with React", "cat": "web"},
    {"id": 8, "text": "Data Science with Python Workshop", "cat": "data"},
    {"id": 9, "text": "Statistical Analysis Course", "cat": "data"},
    {"id": 10, "text": "Understanding Data Pipelines", "cat": "data"},
    {"id": 11, "text": "Kubernetes Container Workshop", "cat": "cloud"},
    {"id": 12, "text": "AWS Solutions Architect Course", "cat": "cloud"},
    {"id": 13, "text": "Docker Security Best Practices", "cat": "cloud"},
    {"id": 14, "text": "Blockchain Smart Contracts Workshop", "cat": "blockchain"},
    {"id": 15, "text": "Ethereum DApp Development", "cat": "blockchain"},
    {"id": 16, "text": "Understanding DeFi", "cat": "blockchain"},
    {"id": 17, "text": "Software Engineering Practices", "cat": "gen"},
    {"id": 18, "text": "System Design Interview Prep", "cat": "gen"},
    {"id": 19, "text": "Agile Methodologies Guide", "cat": "gen"},
    {"id": 20, "text": "Career Growth in Tech", "cat": "gen"},
]

test_queries = [
    {"q": "machine learning workshop", "rel": [1,3], "top": [1]},
    {"q": "neural networks", "rel": [1,4], "top": [4]},
    {"q": "web development react", "rel": [5,7], "top": [7]},
    {"q": "data science python", "rel": [8,10], "top": [8]},
    {"q": "kubernetes container", "rel": [11,13], "top": [11]},
    {"q": "blockchain smart contracts", "rel": [14,15], "top": [14]},
    {"q": "transformers deep learning", "rel": [2,4], "top": [2]},
    {"q": "javascript fullstack", "rel": [5,6], "top": [6]},
    {"q": "aws certification", "rel": [12], "top": [12]},
    {"q": "system design", "rel": [18], "top": [18]},
]

print(f"âœ… {len(test_docs)} docs, {len(test_queries)} queries")

## Index Data

In [None]:
qdrant = QdrantService()
embedder = EmbeddingService()
qdrant.create_collection(recreate=True)

points = []
for doc in test_docs:
    dense = embedder.embed_text(doc["text"])
    sparse_dict = create_sparse_vector(doc["text"])
    sparse = SparseVector(**sparse_dict)
    
    points.append(PointStruct(
        id=doc["id"],
        vector={"": dense, "keywords": sparse},
        payload={"text": doc["text"], "cat": doc["cat"]}
    ))

qdrant.upsert_points(points)
time.sleep(2)
print(f"âœ… Indexed {len(points)} documents")

## Evaluation Functions

In [None]:
def prec_at_k(ret, rel, k):
    return len([i for i in ret[:k] if i in rel]) / k

def recall_at_k(ret, rel, k):
    return len([i for i in ret[:k] if i in rel]) / len(rel) if rel else 0

def mrr(ret, rel):
    for rank, i in enumerate(ret, 1):
        if i in rel:
            return 1.0 / rank
    return 0.0

def run_eval(method):
    p3, r3, m, t1, lat = [], [], [], [], []
    
    for tq in test_queries:
        qv = embedder.embed_text(tq["q"])
        
        start = time.time()
        if method == "dense":
            res = qdrant.search(qv, limit=10)
        else:
            res = qdrant.hybrid_search(qv, tq["q"], limit=10)
        lat.append((time.time() - start) * 1000)
        
        ids = [int(r["id"]) for r in res]
        p3.append(prec_at_k(ids, tq["rel"], 3))
        r3.append(recall_at_k(ids, tq["rel"], 3))
        m.append(mrr(ids, tq["rel"]))
        t1.append(1.0 if ids and ids[0] in tq["top"] else 0.0)
    
    return {
        "p@3": np.mean(p3),
        "r@3": np.mean(r3),
        "mrr": np.mean(m),
        "top1": np.mean(t1),
        "lat": np.mean(lat)
    }

print("âœ… Evaluation functions ready")

## Run Evaluations

In [None]:
print("Running Dense-only...")
dense_m = run_eval("dense")

print("Running Hybrid...")
hybrid_m = run_eval("hybrid")

print("\nâœ… Evaluation complete!")

## Results Comparison

In [None]:
print("="*70)
print("PHASE 3 HYBRID SEARCH EVALUATION".center(70))
print("="*70)
print(f"\n{'Metric':<20} {'Dense':<15} {'Hybrid':<15} {'Î”%':<10}")
print("-"*70)

for m in ["p@3", "r@3", "mrr", "top1"]:
    d, h = dense_m[m], hybrid_m[m]
    delta = ((h-d)/d*100) if d > 0 else 0
    print(f"{m:<20} {d:<15.3f} {h:<15.3f} {delta:>+6.1f}%")

d_lat, h_lat = dense_m["lat"], hybrid_m["lat"]
lat_delta = ((h_lat-d_lat)/d_lat*100) if d_lat > 0 else 0
print(f"{'latency (ms)':<20} {d_lat:<15.1f} {h_lat:<15.1f} {lat_delta:>+6.1f}%")

print("="*70)
print("\nðŸ’¡ Key Findings:")
print(f"âœ“ Precision improved {((hybrid_m['p@3']-dense_m['p@3'])/dense_m['p@3']*100):.0f}%")
print(f"âœ“ Top-1 accuracy up {((hybrid_m['top1']-dense_m['top1'])/dense_m['top1']*100):.0f}%")
print(f"âœ“ Latency overhead: +{h_lat-d_lat:.1f}ms ({lat_delta:.0f}%)")
print("\nðŸŽ¯ Recommendation: Use hybrid search for production")