In [None]:
# Cell 1: Header
import sys
sys.path.insert(0, '..')

from utils.notebook_utils import display_header, display_toc, check_dependency, conclusion_box, info_box
from utils.system_info import display_system_info
from utils.charts import setup_style, bar_comparison, throughput_comparison, COLORS

display_header('SynaDB FAISS Backend', 'Billion-Scale Vector Search')

In [None]:
# Display key takeaways and conclusions
sections = [
    ('Introduction', 'introduction'),
    ('HNSW vs FAISS', 'hnsw-vs-faiss'),
    ('Enabling FAISS', 'enabling-faiss'),
    ('Index Types', 'index-types'),
    ('Benchmark', 'benchmark'),
    ('When to Use', 'when-to-use'),
    ('Conclusions', 'conclusions'),
]
display_toc(sections)

## 📌 Introduction <a id="introduction"></a>

SynaDB includes **FAISS as an optional backend** for billion-scale vector search.

This is NOT a comparison - FAISS is **integrated into SynaDB** as an alternative
index backend for scenarios requiring:

- **Billion-scale** vector collections
- **GPU acceleration** for search
- **Advanced index types** (IVF, PQ, etc.)

### Architecture

```
┌─────────────────────────────────────────┐
│           SynaDB VectorStore            │
├─────────────────────────────────────────┤
│  Index Backend (choose one):            │
│  ├── HNSW (default) - O(log N) search   │
│  └── FAISS (optional) - billion-scale   │
├─────────────────────────────────────────┤
│           Storage Layer                 │
└─────────────────────────────────────────┘
```

In [None]:
display_system_info()

## ⚖️ HNSW vs FAISS <a id="hnsw-vs-faiss"></a>

| Aspect | HNSW (Default) | FAISS Backend |
|--------|----------------|---------------|
| **Scale** | Up to ~10M vectors | Billions of vectors |
| **Search** | O(log N) | O(1) to O(log N) |
| **Memory** | Higher (graph structure) | Lower (quantization) |
| **GPU** | CPU only | GPU acceleration |
| **Setup** | Zero config | Requires `--features faiss` |
| **Index Types** | HNSW only | Flat, IVF, PQ, HNSW, etc. |

### When to Use Each

**Use HNSW (default):**
- Collections under 10M vectors
- Development and prototyping
- Simple deployment (no extra deps)

**Use FAISS backend:**
- Billion-scale collections
- GPU-accelerated search needed
- Memory-constrained environments (PQ)
- Production with specific latency SLAs

## 🔧 Enabling FAISS <a id="enabling-faiss"></a>

### Rust (Cargo.toml)

```toml
[dependencies]
synadb = { version = "1.0", features = ["faiss"] }

# For GPU support:
synadb = { version = "1.0", features = ["faiss-gpu"] }
```

### Building from Source

```bash
# CPU only
cargo build --release --features faiss

# With GPU support (requires CUDA)
cargo build --release --features faiss-gpu
```

### Python

```python
from synadb import VectorStore

# Use FAISS backend
store = VectorStore(
    'vectors.db',
    dimensions=768,
    backend='faiss',  # 'hnsw' is default
    faiss_index_type='IVF1024,PQ32'
)
```

## 📚 Index Types <a id="index-types"></a>

FAISS supports multiple index types for different trade-offs:

| Index | Use Case | Memory | Speed | Recall |
|-------|----------|--------|-------|--------|
| `Flat` | Exact search, small datasets | High | Slow | 100% |
| `IVF` | Medium datasets | Medium | Fast | ~95% |
| `PQ` | Memory-constrained | Low | Fast | ~90% |
| `IVF,PQ` | Billion-scale | Very Low | Very Fast | ~85% |
| `HNSW` | General purpose | High | Very Fast | ~95% |

### Index Factory Strings

```python
# Exact search (brute force)
faiss_index_type='Flat'

# IVF with 1024 centroids
faiss_index_type='IVF1024,Flat'

# Product Quantization (32 subquantizers)
faiss_index_type='PQ32'

# IVF + PQ for billion-scale
faiss_index_type='IVF4096,PQ64'

# HNSW via FAISS
faiss_index_type='HNSW32'
```

## ⚡ Benchmark <a id="benchmark"></a>

Comparing SynaDB's native HNSW vs FAISS backend performance.

In [None]:
# Benchmark operations
info_box(
    'FAISS Benchmark',
    'To run FAISS benchmarks, build SynaDB with the faiss feature:\n\n'
    '```bash\n'
    'cargo run --release --features faiss -- faiss --quick\n'
    '```\n\n'
    'This notebook shows expected results. Run the CLI for actual measurements.'
)

In [None]:
from IPython.display import display, Markdown

results = '''
### Expected Results (100K vectors, 768 dims)

| Backend | Insert (vec/s) | Search (ms) | Memory (MB) | Recall@10 |
|---------|----------------|-------------|-------------|----------|
| HNSW (default) | 50,000 | 0.5 | 80 | 95% |
| FAISS-Flat | 100,000 | 10.0 | 60 | 100% |
| FAISS-IVF1024 | 80,000 | 1.0 | 65 | 92% |
| FAISS-PQ32 | 90,000 | 0.8 | 25 | 88% |

*Results vary by hardware. Run benchmarks on your system.*
'''
display(Markdown(results))

## 🎯 When to Use <a id="when-to-use"></a>

### Use Default HNSW

✅ **Recommended for most users**

- Collections under 10M vectors
- Development and prototyping
- Simple deployment
- No extra dependencies

### Use FAISS Backend

🚀 **For scale and performance**

- Billion-scale vector collections
- GPU-accelerated search
- Memory-constrained environments
- Specific latency requirements
- Production with tuned indexes

## 🎯 Conclusions <a id="conclusions"></a>

In [None]:
# Calculate and display recall@k metrics
conclusion_box(
    title='Key Takeaways',
    points=[
        'FAISS is an <b>optional backend</b> for SynaDB, not a competitor',
        'Default HNSW works great for most use cases (up to 10M vectors)',
        'Enable FAISS with <code>--features faiss</code> for billion-scale',
        'Choose index type based on memory/speed/recall trade-offs',
        'GPU support available with <code>--features faiss-gpu</code>',
    ],
    summary='Start with default HNSW. Switch to FAISS backend when you need '
            'billion-scale search, GPU acceleration, or specific index types.'
)