# IVFFlat Algorithm Examples in FAISS

This notebook demonstrates the IVFFlat (Inverted File with Flat quantizer) algorithm in FAISS with various parameter combinations.

## Key Parameters

- **nlist**: Number of clusters (Voronoi cells) in the inverted file index. Higher nlist = finer partitioning, faster search but longer training.
- **nprobe**: Number of clusters to visit during search. Higher nprobe = better recall but slower search.

We'll explore how these parameters affect:
1. Search recall (accuracy)
2. Search time
3. Training time
4. Memory usage

## How IVFFlat Works

1. **Training**: K-means clustering partitions the vector space into `nlist` clusters (Voronoi cells)
2. **Adding**: Each vector is assigned to its nearest cluster centroid and stored in that cluster's inverted list
3. **Searching**: Query vector is compared to cluster centroids, then `nprobe` nearest clusters are searched exhaustively

In [None]:
import numpy as np
import faiss
import time
import matplotlib.pyplot as plt
import pandas as pd

# Set random seed for reproducibility
np.random.seed(42)

# Plotting style
plt.style.use('seaborn-v0_8-whitegrid')
plt.rcParams['figure.figsize'] = (12, 6)
plt.rcParams['font.size'] = 11

## 1. Dataset Generation

We'll create a synthetic dataset for our experiments.

In [None]:
def generate_dataset(nb, nq, d):
    """
    Generate random database and query vectors.
    
    Args:
        nb: Number of database vectors
        nq: Number of query vectors  
        d: Vector dimension
    
    Returns:
        xb: Database vectors (nb x d)
        xq: Query vectors (nq x d)
    """
    xb = np.random.random((nb, d)).astype('float32')
    xq = np.random.random((nq, d)).astype('float32')
    return xb, xq

# Dataset parameters
nb = 100000   # Database size
nq = 1000     # Number of queries
d = 128       # Vector dimension
k = 10        # Number of nearest neighbors to find

print(f"Generating dataset: {nb:,} database vectors, {nq:,} queries, dimension {d}")
xb, xq = generate_dataset(nb, nq, d)
print(f"Database shape: {xb.shape}, Query shape: {xq.shape}")

## 2. Ground Truth Computation

Compute exact nearest neighbors using brute-force search for recall evaluation.

In [None]:
def compute_ground_truth(xb, xq, k):
    """Compute exact nearest neighbors using IndexFlatL2."""
    index_flat = faiss.IndexFlatL2(xb.shape[1])
    index_flat.add(xb)
    distances_gt, labels_gt = index_flat.search(xq, k)
    return distances_gt, labels_gt

def compute_recall(labels_gt, labels_approx, k):
    """Compute recall@k: fraction of true neighbors found."""
    n = labels_gt.shape[0]
    recall = 0.0
    for i in range(n):
        gt_set = set(labels_gt[i, :k])
        approx_set = set(labels_approx[i, :k])
        recall += len(gt_set & approx_set) / k
    return recall / n

print("Computing ground truth with brute-force search...")
start = time.time()
distances_gt, labels_gt = compute_ground_truth(xb, xq, k)
gt_time = time.time() - start
print(f"Ground truth computed in {gt_time:.2f} seconds")

## 3. Helper Functions for IVFFlat Experiments

In [None]:
def build_ivfflat_index(xb, nlist):
    """
    Build IVFFlat index with given number of clusters.
    
    Returns:
        index: Built IVFFlat index
        train_time: Time taken to train (seconds)
        add_time: Time taken to add vectors (seconds)
    """
    d = xb.shape[1]
    
    # Create quantizer (flat index for centroids)
    quantizer = faiss.IndexFlatL2(d)
    
    # Create IVFFlat index
    index = faiss.IndexIVFFlat(quantizer, d, nlist)
    
    # Train the index (learn cluster centroids)
    start = time.time()
    index.train(xb)
    train_time = time.time() - start
    
    # Add vectors to the index
    start = time.time()
    index.add(xb)
    add_time = time.time() - start
    
    return index, train_time, add_time

def search_ivfflat_index(index, xq, k, nprobe):
    """
    Search IVFFlat index with given nprobe.
    
    Returns:
        distances: Distance array
        labels: Label array
        search_time: Time taken to search (seconds)
    """
    index.nprobe = nprobe
    
    start = time.time()
    distances, labels = index.search(xq, k)
    search_time = time.time() - start
    
    return distances, labels, search_time

def estimate_memory_usage(index, d):
    """Estimate memory usage of IVFFlat index in MB."""
    # Vector storage: ntotal * d * 4 bytes (vectors stored in full)
    vector_mem = index.ntotal * d * 4
    
    # Centroid storage: nlist * d * 4 bytes
    centroid_mem = index.nlist * d * 4
    
    # ID storage: ntotal * 8 bytes (int64 IDs)
    id_mem = index.ntotal * 8
    
    total_mb = (vector_mem + centroid_mem + id_mem) / (1024 * 1024)
    return total_mb

## 4. Experiment 1: Effect of nlist (Number of Clusters)

nlist controls the number of Voronoi cells. Higher nlist means:
- Finer partitioning of the vector space
- Faster search (fewer vectors per cluster to scan)
- Longer training time (more k-means iterations)
- Need more training data (rule of thumb: 30-256 × nlist vectors)

In [None]:
# Test different values of nlist
nlist_values = [16, 32, 64, 128, 256, 512, 1024]
nprobe_fixed = 10

results_nlist = []

print("Experiment 1: Varying nlist")
print(f"Fixed nprobe={nprobe_fixed}\n")
print(f"{'nlist':>8} {'Train(s)':>10} {'Add(s)':>8} {'Search(ms)':>12} {'Recall':>10} {'Vecs/List':>12}")
print("-" * 66)

for nlist in nlist_values:
    # Build index
    index, train_time, add_time = build_ivfflat_index(xb, nlist)
    
    # Search
    distances, labels, search_time = search_ivfflat_index(index, xq, k, nprobe_fixed)
    
    # Compute metrics
    recall = compute_recall(labels_gt, labels, k)
    vecs_per_list = nb / nlist
    
    results_nlist.append({
        'nlist': nlist,
        'train_time': train_time,
        'add_time': add_time,
        'search_time_ms': search_time * 1000,
        'recall': recall,
        'vecs_per_list': vecs_per_list
    })
    
    print(f"{nlist:>8} {train_time:>10.2f} {add_time:>8.2f} {search_time*1000:>12.2f} {recall:>10.4f} {vecs_per_list:>12.0f}")

df_nlist = pd.DataFrame(results_nlist)

In [None]:
# Visualize effect of nlist
fig, axes = plt.subplots(2, 2, figsize=(14, 10))

# Recall vs nlist
ax1 = axes[0, 0]
ax1.plot(df_nlist['nlist'], df_nlist['recall'], 'o-', linewidth=2, markersize=8, color='#2ecc71')
ax1.set_xlabel('nlist (number of clusters)')
ax1.set_ylabel('Recall@10')
ax1.set_title('Recall vs nlist')
ax1.set_xscale('log', base=2)
ax1.set_ylim([0, 1.02])
ax1.grid(True, alpha=0.3)

# Search time vs nlist
ax2 = axes[0, 1]
ax2.plot(df_nlist['nlist'], df_nlist['search_time_ms'], 'o-', linewidth=2, markersize=8, color='#e74c3c')
ax2.set_xlabel('nlist (number of clusters)')
ax2.set_ylabel('Search Time (ms)')
ax2.set_title('Search Time vs nlist')
ax2.set_xscale('log', base=2)
ax2.grid(True, alpha=0.3)

# Train time vs nlist
ax3 = axes[1, 0]
ax3.plot(df_nlist['nlist'], df_nlist['train_time'], 'o-', linewidth=2, markersize=8, color='#3498db')
ax3.set_xlabel('nlist (number of clusters)')
ax3.set_ylabel('Training Time (s)')
ax3.set_title('Training Time vs nlist')
ax3.set_xscale('log', base=2)
ax3.grid(True, alpha=0.3)

# Vectors per list vs nlist
ax4 = axes[1, 1]
ax4.plot(df_nlist['nlist'], df_nlist['vecs_per_list'], 'o-', linewidth=2, markersize=8, color='#9b59b6')
ax4.set_xlabel('nlist (number of clusters)')
ax4.set_ylabel('Vectors per Inverted List')
ax4.set_title('Average Cluster Size vs nlist')
ax4.set_xscale('log', base=2)
ax4.set_yscale('log')
ax4.grid(True, alpha=0.3)

plt.suptitle(f'Effect of nlist (nprobe={nprobe_fixed})', fontsize=14, fontweight='bold')
plt.tight_layout()
plt.show()

## 5. Experiment 2: Effect of nprobe (Search Probes)

nprobe controls how many clusters are searched during query. This is the main recall/speed trade-off:
- Higher nprobe = better recall but slower search
- Can be adjusted at query time without rebuilding the index
- Setting nprobe = nlist gives exact search (but defeats the purpose of IVF)

In [None]:
# Test different values of nprobe
nprobe_values = [1, 2, 4, 8, 16, 32, 64, 128, 256]
nlist_fixed = 256

# Build index once
print(f"Building index with nlist={nlist_fixed}...")
index, train_time, add_time = build_ivfflat_index(xb, nlist_fixed)
print(f"Index trained in {train_time:.2f}s, vectors added in {add_time:.2f}s\n")

results_nprobe = []

print("Experiment 2: Varying nprobe")
print(f"{'nprobe':>8} {'Search(ms)':>12} {'Recall':>10} {'QPS':>12} {'% Lists':>10}")
print("-" * 56)

for nprobe in nprobe_values:
    if nprobe > nlist_fixed:
        continue
        
    # Search
    distances, labels, search_time = search_ivfflat_index(index, xq, k, nprobe)
    
    # Compute metrics
    recall = compute_recall(labels_gt, labels, k)
    qps = nq / search_time
    pct_lists = (nprobe / nlist_fixed) * 100
    
    results_nprobe.append({
        'nprobe': nprobe,
        'search_time_ms': search_time * 1000,
        'recall': recall,
        'qps': qps,
        'pct_lists': pct_lists
    })
    
    print(f"{nprobe:>8} {search_time*1000:>12.2f} {recall:>10.4f} {qps:>12.0f} {pct_lists:>10.1f}%")

df_nprobe = pd.DataFrame(results_nprobe)

In [None]:
# Visualize effect of nprobe
fig, axes = plt.subplots(1, 3, figsize=(15, 5))

# Recall vs nprobe
ax1 = axes[0]
ax1.plot(df_nprobe['nprobe'], df_nprobe['recall'], 'o-', linewidth=2, markersize=8, color='#2ecc71')
ax1.set_xlabel('nprobe')
ax1.set_ylabel('Recall@10')
ax1.set_title('Recall vs nprobe')
ax1.set_xscale('log', base=2)
ax1.set_ylim([0, 1.02])
ax1.grid(True, alpha=0.3)

# Search time vs nprobe
ax2 = axes[1]
ax2.plot(df_nprobe['nprobe'], df_nprobe['search_time_ms'], 'o-', linewidth=2, markersize=8, color='#e74c3c')
ax2.set_xlabel('nprobe')
ax2.set_ylabel('Search Time (ms)')
ax2.set_title('Search Time vs nprobe')
ax2.set_xscale('log', base=2)
ax2.grid(True, alpha=0.3)

# QPS vs nprobe
ax3 = axes[2]
ax3.plot(df_nprobe['nprobe'], df_nprobe['qps'], 'o-', linewidth=2, markersize=8, color='#3498db')
ax3.set_xlabel('nprobe')
ax3.set_ylabel('Queries Per Second')
ax3.set_title('Throughput vs nprobe')
ax3.set_xscale('log', base=2)
ax3.grid(True, alpha=0.3)

plt.suptitle(f'Effect of nprobe (nlist={nlist_fixed})', fontsize=14, fontweight='bold')
plt.tight_layout()
plt.show()

In [None]:
# Recall-QPS trade-off curve (Pareto frontier)
fig, ax = plt.subplots(figsize=(10, 6))

ax.plot(df_nprobe['recall'], df_nprobe['qps'], 'o-', linewidth=2, markersize=10, color='#8e44ad')

# Annotate points with nprobe values
for i, row in df_nprobe.iterrows():
    ax.annotate(f"nprobe={int(row['nprobe'])}", 
                (row['recall'], row['qps']),
                textcoords="offset points", 
                xytext=(5, 5), 
                fontsize=9)

ax.set_xlabel('Recall@10', fontsize=12)
ax.set_ylabel('Queries Per Second (QPS)', fontsize=12)
ax.set_title('Recall vs Throughput Trade-off\n(Pareto Frontier for nprobe)', fontsize=14, fontweight='bold')
ax.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

## 6. Experiment 3: Combined Parameter Grid Search

Let's explore the combined effect of nlist and nprobe.

In [None]:
# Grid search over nlist and nprobe
nlist_grid = [64, 128, 256, 512]
nprobe_grid = [1, 4, 8, 16, 32, 64]

# Pre-build indexes for each nlist
indexes = {}
print("Building indexes...")
for nlist in nlist_grid:
    index, train_time, add_time = build_ivfflat_index(xb, nlist)
    indexes[nlist] = index
    print(f"  nlist={nlist}: trained in {train_time:.2f}s, added in {add_time:.2f}s")

# Run grid search
grid_results = []

print("\nRunning grid search...")
for nlist in nlist_grid:
    index = indexes[nlist]
    for nprobe in nprobe_grid:
        if nprobe > nlist:
            continue
            
        distances, labels, search_time = search_ivfflat_index(index, xq, k, nprobe)
        recall = compute_recall(labels_gt, labels, k)
        qps = nq / search_time
        
        grid_results.append({
            'nlist': nlist,
            'nprobe': nprobe,
            'recall': recall,
            'search_time_ms': search_time * 1000,
            'qps': qps,
            'nprobe_ratio': nprobe / nlist
        })

df_grid = pd.DataFrame(grid_results)
print("\nGrid search complete!")

In [None]:
# Visualize grid results
fig, axes = plt.subplots(1, 2, figsize=(14, 6))

colors = plt.cm.viridis(np.linspace(0, 1, len(nlist_grid)))

# Recall curves for different nlist
ax1 = axes[0]
for i, nlist in enumerate(nlist_grid):
    data = df_grid[df_grid['nlist'] == nlist]
    ax1.plot(data['nprobe'], data['recall'], 'o-', 
             linewidth=2, markersize=8, color=colors[i], label=f'nlist={nlist}')
ax1.set_xlabel('nprobe')
ax1.set_ylabel('Recall@10')
ax1.set_title('Recall vs nprobe for Different nlist')
ax1.set_xscale('log', base=2)
ax1.set_ylim([0, 1.02])
ax1.legend()
ax1.grid(True, alpha=0.3)

# Recall vs QPS for different nlist
ax2 = axes[1]
for i, nlist in enumerate(nlist_grid):
    data = df_grid[df_grid['nlist'] == nlist]
    ax2.plot(data['recall'], data['qps'], 'o-', 
             linewidth=2, markersize=8, color=colors[i], label=f'nlist={nlist}')
ax2.set_xlabel('Recall@10')
ax2.set_ylabel('Queries Per Second')
ax2.set_title('Recall-Throughput Trade-off')
ax2.legend()
ax2.grid(True, alpha=0.3)

plt.suptitle('Combined Effect of nlist and nprobe', fontsize=14, fontweight='bold')
plt.tight_layout()
plt.show()

In [None]:
# Create heatmaps for a clearer view of parameter interactions
# We need to handle the fact that not all nprobe values exist for all nlist values
pivot_data = df_grid.pivot_table(index='nlist', columns='nprobe', values='recall', aggfunc='first')
pivot_qps = df_grid.pivot_table(index='nlist', columns='nprobe', values='qps', aggfunc='first')

fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Recall heatmap
im1 = axes[0].imshow(pivot_data.values, cmap='RdYlGn', aspect='auto', vmin=0, vmax=1.0)
axes[0].set_xticks(range(len(pivot_data.columns)))
axes[0].set_xticklabels(pivot_data.columns.astype(int))
axes[0].set_yticks(range(len(pivot_data.index)))
axes[0].set_yticklabels(pivot_data.index.astype(int))
axes[0].set_xlabel('nprobe')
axes[0].set_ylabel('nlist')
axes[0].set_title('Recall@10 Heatmap')

# Add text annotations
for i in range(len(pivot_data.index)):
    for j in range(len(pivot_data.columns)):
        val = pivot_data.values[i, j]
        if not np.isnan(val):
            axes[0].text(j, i, f'{val:.3f}', ha='center', va='center', color='black', fontsize=9)

fig.colorbar(im1, ax=axes[0])

# QPS heatmap
im2 = axes[1].imshow(pivot_qps.values, cmap='YlOrRd_r', aspect='auto')
axes[1].set_xticks(range(len(pivot_qps.columns)))
axes[1].set_xticklabels(pivot_qps.columns.astype(int))
axes[1].set_yticks(range(len(pivot_qps.index)))
axes[1].set_yticklabels(pivot_qps.index.astype(int))
axes[1].set_xlabel('nprobe')
axes[1].set_ylabel('nlist')
axes[1].set_title('QPS Heatmap')

# Add text annotations
for i in range(len(pivot_qps.index)):
    for j in range(len(pivot_qps.columns)):
        val = pivot_qps.values[i, j]
        if not np.isnan(val):
            axes[1].text(j, i, f'{val:.0f}', ha='center', va='center', color='black', fontsize=9)

fig.colorbar(im2, ax=axes[1])

plt.suptitle('Parameter Grid Search Results', fontsize=14, fontweight='bold')
plt.tight_layout()
plt.show()

## 7. Experiment 4: nprobe as Percentage of nlist

A useful way to think about nprobe is as a percentage of nlist. Let's see if using the same ratio gives consistent recall across different nlist values.

In [None]:
# Test with nprobe as percentage of nlist
nlist_test = [64, 128, 256, 512, 1024]
pct_values = [1, 2, 5, 10, 25, 50]  # percentage of nlist to probe

pct_results = []

print("Experiment 4: nprobe as percentage of nlist\n")
print(f"{'nlist':>8} {'%':>6} {'nprobe':>8} {'Recall':>10} {'Search(ms)':>12}")
print("-" * 50)

for nlist in nlist_test:
    index, _, _ = build_ivfflat_index(xb, nlist)
    
    for pct in pct_values:
        nprobe = max(1, int(nlist * pct / 100))
        
        distances, labels, search_time = search_ivfflat_index(index, xq, k, nprobe)
        recall = compute_recall(labels_gt, labels, k)
        
        pct_results.append({
            'nlist': nlist,
            'pct': pct,
            'nprobe': nprobe,
            'recall': recall,
            'search_time_ms': search_time * 1000
        })
        
        print(f"{nlist:>8} {pct:>6}% {nprobe:>8} {recall:>10.4f} {search_time*1000:>12.2f}")
    print()

df_pct = pd.DataFrame(pct_results)

In [None]:
# Visualize recall vs percentage of nlist probed
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

colors = plt.cm.plasma(np.linspace(0, 0.8, len(nlist_test)))

# Recall vs percentage
ax1 = axes[0]
for i, nlist in enumerate(nlist_test):
    data = df_pct[df_pct['nlist'] == nlist]
    ax1.plot(data['pct'], data['recall'], 'o-', 
             linewidth=2, markersize=8, color=colors[i], label=f'nlist={nlist}')
ax1.set_xlabel('Percentage of nlist probed (%)')
ax1.set_ylabel('Recall@10')
ax1.set_title('Recall vs % of Clusters Probed')
ax1.set_ylim([0, 1.02])
ax1.legend()
ax1.grid(True, alpha=0.3)

# Search time vs percentage
ax2 = axes[1]
for i, nlist in enumerate(nlist_test):
    data = df_pct[df_pct['nlist'] == nlist]
    ax2.plot(data['pct'], data['search_time_ms'], 'o-', 
             linewidth=2, markersize=8, color=colors[i], label=f'nlist={nlist}')
ax2.set_xlabel('Percentage of nlist probed (%)')
ax2.set_ylabel('Search Time (ms)')
ax2.set_title('Search Time vs % of Clusters Probed')
ax2.legend()
ax2.grid(True, alpha=0.3)

plt.suptitle('Same Probe Ratio Across Different nlist Values', fontsize=14, fontweight='bold')
plt.tight_layout()
plt.show()

## 8. Experiment 5: Comparison with Brute Force

Let's compare IVFFlat performance against brute-force search.

In [None]:
# Brute force baseline
print("Comparing IVFFlat with Brute Force (IndexFlatL2)\n")

# Build brute force index
index_flat = faiss.IndexFlatL2(d)
start = time.time()
index_flat.add(xb)
flat_add_time = time.time() - start

# Search with brute force
start = time.time()
D_flat, I_flat = index_flat.search(xq, k)
flat_search_time = time.time() - start

flat_qps = nq / flat_search_time

print(f"IndexFlatL2 (Brute Force):")
print(f"  Add time: {flat_add_time:.3f}s")
print(f"  Search time: {flat_search_time*1000:.2f}ms")
print(f"  QPS: {flat_qps:.0f}")
print(f"  Recall: 1.0000 (exact)")

# IVFFlat with good configuration
ivf_config = {'nlist': 256, 'nprobe': 16}
index_ivf, ivf_train_time, ivf_add_time = build_ivfflat_index(xb, ivf_config['nlist'])
D_ivf, I_ivf, ivf_search_time = search_ivfflat_index(index_ivf, xq, k, ivf_config['nprobe'])
ivf_recall = compute_recall(I_flat, I_ivf, k)
ivf_qps = nq / ivf_search_time

print(f"\nIndexIVFFlat (nlist={ivf_config['nlist']}, nprobe={ivf_config['nprobe']}):")
print(f"  Train time: {ivf_train_time:.3f}s")
print(f"  Add time: {ivf_add_time:.3f}s")
print(f"  Search time: {ivf_search_time*1000:.2f}ms")
print(f"  QPS: {ivf_qps:.0f}")
print(f"  Recall: {ivf_recall:.4f}")

print(f"\nSpeedup: {ivf_qps/flat_qps:.1f}x faster at {ivf_recall*100:.1f}% recall")

In [None]:
# Bar chart comparison
fig, axes = plt.subplots(1, 3, figsize=(14, 5))

methods = ['Brute Force', f'IVFFlat\n(nlist={ivf_config["nlist"]})']
colors = ['#3498db', '#2ecc71']

# Build/Train+Add time
build_times = [flat_add_time, ivf_train_time + ivf_add_time]
axes[0].bar(methods, build_times, color=colors, edgecolor='black', linewidth=1.5)
axes[0].set_ylabel('Build Time (s)')
axes[0].set_title('Build Time Comparison\n(Train + Add for IVFFlat)')
for i, v in enumerate(build_times):
    axes[0].text(i, v + 0.02, f'{v:.2f}s', ha='center', fontweight='bold')

# QPS
qps_values = [flat_qps, ivf_qps]
axes[1].bar(methods, qps_values, color=colors, edgecolor='black', linewidth=1.5)
axes[1].set_ylabel('Queries Per Second')
axes[1].set_title('Search Throughput Comparison')
for i, v in enumerate(qps_values):
    axes[1].text(i, v + 100, f'{v:.0f}', ha='center', fontweight='bold')

# Recall
recall_values = [1.0, ivf_recall]
axes[2].bar(methods, recall_values, color=colors, edgecolor='black', linewidth=1.5)
axes[2].set_ylabel('Recall@10')
axes[2].set_title('Recall Comparison')
axes[2].set_ylim([0, 1.1])
for i, v in enumerate(recall_values):
    axes[2].text(i, v + 0.02, f'{v:.4f}', ha='center', fontweight='bold')

plt.suptitle('IVFFlat vs Brute Force Comparison', fontsize=14, fontweight='bold')
plt.tight_layout()
plt.show()

## 9. Experiment 6: Typical Configuration Examples

Let's test some commonly recommended configurations.

In [None]:
# Common configurations based on dataset size
# Rule of thumb: nlist ≈ sqrt(n) for balanced performance
# For 100K vectors: sqrt(100000) ≈ 316

configs = [
    {'name': 'Small nlist, low nprobe', 'nlist': 100, 'nprobe': 5},
    {'name': 'sqrt(n) rule', 'nlist': 316, 'nprobe': 16},
    {'name': 'Larger nlist', 'nlist': 1000, 'nprobe': 50},
    {'name': 'High recall', 'nlist': 256, 'nprobe': 64},
    {'name': 'Speed optimized', 'nlist': 1024, 'nprobe': 8},
]

config_results = []

print("Experiment 6: Common Configuration Examples\n")
print(f"{'Config':>25} {'nlist':>8} {'nprobe':>8} {'Train(s)':>10} {'Search(ms)':>12} {'Recall':>8} {'QPS':>8}")
print("-" * 90)

for config in configs:
    # Build index
    index, train_time, add_time = build_ivfflat_index(xb, config['nlist'])
    
    # Search
    distances, labels, search_time = search_ivfflat_index(index, xq, k, config['nprobe'])
    
    # Compute metrics
    recall = compute_recall(labels_gt, labels, k)
    qps = nq / search_time
    memory = estimate_memory_usage(index, d)
    
    config_results.append({
        'name': config['name'],
        'nlist': config['nlist'],
        'nprobe': config['nprobe'],
        'train_time': train_time,
        'search_time_ms': search_time * 1000,
        'recall': recall,
        'qps': qps,
        'memory_mb': memory
    })
    
    print(f"{config['name']:>25} {config['nlist']:>8} {config['nprobe']:>8} "
          f"{train_time:>10.2f} {search_time*1000:>12.2f} {recall:>8.4f} {qps:>8.0f}")

df_configs = pd.DataFrame(config_results)

In [None]:
# Visualize common configurations
fig, ax = plt.subplots(figsize=(12, 7))

# Scatter plot: recall vs QPS, size based on nlist
scatter = ax.scatter(df_configs['recall'], df_configs['qps'],
                     s=df_configs['nlist'] / 2,  # Size based on nlist
                     c=df_configs['nprobe'],  # Color based on nprobe
                     cmap='coolwarm', alpha=0.7, edgecolors='black', linewidth=2)

# Add labels
for i, row in df_configs.iterrows():
    ax.annotate(f"{row['name']}\nnlist={row['nlist']}, nprobe={row['nprobe']}",
                (row['recall'], row['qps']),
                textcoords="offset points",
                xytext=(10, 5),
                fontsize=9,
                bbox=dict(boxstyle='round,pad=0.3', facecolor='white', alpha=0.7))

ax.set_xlabel('Recall@10', fontsize=12)
ax.set_ylabel('Queries Per Second', fontsize=12)
ax.set_title('Common IVFFlat Configurations\n(Size = nlist, Color = nprobe)', fontsize=14, fontweight='bold')

cbar = plt.colorbar(scatter)
cbar.set_label('nprobe', fontsize=11)

ax.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

## 10. Summary and Recommendations

Based on our experiments, here are the key findings:

In [None]:
print("="*70)
print("IVFFlat Parameter Tuning Guidelines")
print("="*70)

print(f"""
1. nlist (Number of clusters):
   - Rule of thumb: nlist ≈ sqrt(n) where n is database size
   - For {nb:,} vectors: sqrt({nb}) ≈ {int(np.sqrt(nb))}
   - Higher nlist → Faster search but longer training
   - Need sufficient training data: ~30-256 × nlist vectors minimum
   - Recommended range: sqrt(n)/4 to 4×sqrt(n)

2. nprobe (Search probes):
   - This is your main recall/speed trade-off knob
   - Higher nprobe → Better recall but slower search
   - Can be adjusted at runtime without rebuilding index
   - As a percentage: 1-10% of nlist is common for good balance
   - For exact search: set nprobe = nlist (defeats purpose of IVF)

Common Configurations:
---------------------------------------------------
| Use Case          | nlist       | nprobe       |
|-------------------|-------------|--------------|
| Speed optimized   | 4×sqrt(n)   | 1-5% of nlist|
| Balanced          | sqrt(n)     | 5-10% of nlist|
| High recall       | sqrt(n)/2   | 10-25% of nlist|
| Very high recall  | sqrt(n)/4   | 25-50% of nlist|
---------------------------------------------------

Rule of thumb formulas:
  - nlist = sqrt(n) for balanced performance
  - nprobe = nlist/16 to nlist/4 for recall 90-99%
""")

# Show best configurations from our experiments
print("\nBest configurations from our experiments:")
print("-" * 50)

# Find configurations with >95% recall
high_recall = df_grid[df_grid['recall'] > 0.95].sort_values('qps', ascending=False)
if len(high_recall) > 0:
    best = high_recall.iloc[0]
    print(f"Best for >95% recall: nlist={int(best['nlist'])}, nprobe={int(best['nprobe'])}")
    print(f"  Recall: {best['recall']:.4f}, QPS: {best['qps']:.0f}")

# Find fastest configuration with >90% recall  
fast_good = df_grid[df_grid['recall'] > 0.90].sort_values('qps', ascending=False)
if len(fast_good) > 0:
    best = fast_good.iloc[0]
    print(f"\nFastest with >90% recall: nlist={int(best['nlist'])}, nprobe={int(best['nprobe'])}")
    print(f"  Recall: {best['recall']:.4f}, QPS: {best['qps']:.0f}")

## 11. Interactive Parameter Explorer

Use this cell to experiment with your own parameter combinations.

In [None]:
def test_ivfflat_config(nlist, nprobe, xb, xq, labels_gt, k):
    """
    Test a specific IVFFlat configuration and print results.
    """
    print(f"Testing IVFFlat with nlist={nlist}, nprobe={nprobe}")
    print("-" * 60)
    
    # Build
    index, train_time, add_time = build_ivfflat_index(xb, nlist)
    print(f"Training time: {train_time:.2f}s")
    print(f"Add time: {add_time:.2f}s")
    print(f"Vectors per cluster: {len(xb) / nlist:.0f}")
    
    # Search
    distances, labels, search_time = search_ivfflat_index(index, xq, k, nprobe)
    print(f"Search time: {search_time*1000:.2f}ms for {len(xq)} queries")
    
    # Metrics
    recall = compute_recall(labels_gt, labels, k)
    qps = len(xq) / search_time
    memory = estimate_memory_usage(index, xb.shape[1])
    
    print(f"Recall@{k}: {recall:.4f}")
    print(f"QPS: {qps:.0f}")
    print(f"Estimated memory: {memory:.1f} MB")
    print(f"nprobe/nlist ratio: {nprobe/nlist*100:.1f}%")
    
    return index, recall, qps

# Example: Try your own configuration!
# Modify these parameters and run the cell
my_nlist = 200
my_nprobe = 20

test_ivfflat_config(my_nlist, my_nprobe, xb, xq, labels_gt, k)

## 12. Bonus: Using index_factory String

FAISS provides a convenient `index_factory` function that creates indexes from a string description.

In [None]:
# Using index_factory for IVFFlat
print("Creating IVFFlat indexes using index_factory:\n")

# Different index factory strings
factory_strings = [
    "IVF100,Flat",      # 100 clusters, flat storage
    "IVF256,Flat",      # 256 clusters
    "IVF1024,Flat",     # 1024 clusters
]

for factory_string in factory_strings:
    print(f"Factory string: '{factory_string}'")
    
    # Create index using factory
    index = faiss.index_factory(d, factory_string)
    
    # Train and add
    start = time.time()
    index.train(xb)
    train_time = time.time() - start
    
    start = time.time()
    index.add(xb)
    add_time = time.time() - start
    
    # Search with nprobe=10
    index.nprobe = 10
    start = time.time()
    D, I = index.search(xq, k)
    search_time = time.time() - start
    
    recall = compute_recall(labels_gt, I, k)
    
    print(f"  nlist: {index.nlist}, nprobe: {index.nprobe}")
    print(f"  Train: {train_time:.2f}s, Add: {add_time:.2f}s, Search: {search_time*1000:.2f}ms")
    print(f"  Recall@10: {recall:.4f}")
    print()