asg017 · asg017 · Mar 31, 2026 · Mar 30, 2026
diff --git a/Makefile b/Makefile
@@ -42,6 +42,11 @@ ifndef OMIT_SIMD
 	ifeq ($(shell uname -sm),Darwin arm64)
 	CFLAGS += -mcpu=apple-m1 -DSQLITE_VEC_ENABLE_NEON
 	endif
+	ifeq ($(shell uname -s),Linux)
+	ifneq ($(filter avx,$(shell grep -o 'avx[^ ]*' /proc/cpuinfo 2>/dev/null | head -1)),)
+	CFLAGS += -mavx -DSQLITE_VEC_ENABLE_AVX
+	endif
+	endif
 endif
 
 ifdef USE_BREW_SQLITE
@@ -155,6 +160,13 @@ clean:
 	rm -rf dist
 
 
+TARGET_AMALGAMATION=$(prefix)/sqlite-vec.c
+
+amalgamation: $(TARGET_AMALGAMATION)
+
+$(TARGET_AMALGAMATION): sqlite-vec.c $(wildcard sqlite-vec-*.c) scripts/amalgamate.py $(prefix)
+	python3 scripts/amalgamate.py sqlite-vec.c > $@
+
 FORMAT_FILES=sqlite-vec.h sqlite-vec.c
 format: $(FORMAT_FILES)
 	clang-format -i $(FORMAT_FILES)
@@ -174,7 +186,7 @@ evidence-of:
 test:
 	sqlite3 :memory: '.read test.sql'
 
-.PHONY: version loadable static test clean gh-release evidence-of install uninstall
+.PHONY: version loadable static test clean gh-release evidence-of install uninstall amalgamation
 
 publish-release:
 	./scripts/publish-release.sh

diff --git a/TODO.md b/TODO.md
@@ -0,0 +1,73 @@
+# TODO: `ann` base branch + consolidated benchmarks
+
+## 1. Create `ann` branch with shared code
+
+### 1.1 Branch setup
+- [x] `git checkout -B ann origin/main`
+- [x] Cherry-pick `624f998` (vec0_distance_full shared distance dispatch)
+- [x] Cherry-pick stdint.h fix for test header
+- [ ] Pull NEON cosine optimization from ivf-yolo3 into shared code
+  - Currently only in ivf branch but is general-purpose (benefits all distance calcs)
+  - Lives in `distance_cosine_float()` — ~57 lines of ARM NEON vectorized cosine
+
+### 1.2 Benchmark infrastructure (`benchmarks-ann/`)
+- [x] Seed data pipeline (`seed/Makefile`, `seed/build_base_db.py`)
+- [x] Ground truth generator (`ground_truth.py`)
+- [x] Results schema (`schema.sql`)
+- [x] Benchmark runner with `INDEX_REGISTRY` extension point (`bench.py`)
+  - Baseline configs (float, int8-rescore, bit-rescore) implemented
+  - Index branches register their types via `INDEX_REGISTRY` dict
+- [x] Makefile with baseline targets
+- [x] README
+
+### 1.3 Rebase feature branches onto `ann`
+- [x] Rebase `diskann-yolo2` onto `ann` (1 commit: DiskANN implementation)
+- [x] Rebase `ivf-yolo3` onto `ann` (1 commit: IVF implementation)
+- [x] Rebase `annoy-yolo2` onto `ann` (2 commits: Annoy implementation + schema fix)
+- [x] Verify each branch has only its index-specific commits remaining
+- [ ] Force-push all 4 branches to origin
+
+---
+
+## 2. Per-branch: register index type in benchmarks
+
+Each index branch should add to `benchmarks-ann/` when rebased onto `ann`:
+
+### 2.1 Register in `bench.py`
+
+Add an `INDEX_REGISTRY` entry. Each entry provides:
+- `defaults` — default param values
+- `create_table_sql(params)` — CREATE VIRTUAL TABLE with INDEXED BY clause
+- `insert_sql(params)` — custom insert SQL, or None for default
+- `post_insert_hook(conn, params)` — training/building step, returns time
+- `run_query(conn, params, query, k)` — custom query, or None for default MATCH
+- `describe(params)` — one-line description for report output
+
+### 2.2 Add configs to `Makefile`
+
+Append index-specific config variables and targets. Example pattern:
+
+```makefile
+DISKANN_CONFIGS = \
+    "diskann-R48-binary:type=diskann,R=48,L=128,quantizer=binary" \
+    ...
+
+ALL_CONFIGS += $(DISKANN_CONFIGS)
+
+bench-diskann: seed
+    $(BENCH) --subset-size 10000 -k 10 -o runs/diskann $(BASELINES) $(DISKANN_CONFIGS)
+    ...
+```
+
+### 2.3 Migrate existing benchmark results/docs
+
+- Move useful results docs (RESULTS.md, etc.) into `benchmarks-ann/results/`
+- Delete redundant per-branch benchmark directories once consolidated infra is proven
+
+---
+
+## 3. Future improvements
+
+- [ ] Reporting script (`report.py`) — query results.db, produce markdown comparison tables
+- [ ] Profiling targets in Makefile (lift from ivf-yolo3's Instruments/perf wrappers)
+- [ ] Pre-computed ground truth integration (use GT DB files instead of on-the-fly brute-force)
diff --git a/benchmarks-ann/.gitignore b/benchmarks-ann/.gitignore
@@ -0,0 +1,2 @@
+*.db
+runs/
diff --git a/benchmarks-ann/Makefile b/benchmarks-ann/Makefile
@@ -0,0 +1,61 @@
+BENCH = python bench.py
+BASE_DB = seed/base.db
+EXT = ../dist/vec0
+
+# --- Baseline (brute-force) configs ---
+BASELINES = \
+	"brute-float:type=baseline,variant=float" \
+	"brute-int8:type=baseline,variant=int8" \
+	"brute-bit:type=baseline,variant=bit"
+
+# --- Index-specific configs ---
+# Each index branch should add its own configs here. Example:
+#
+# DISKANN_CONFIGS = \
+# 	"diskann-R48-binary:type=diskann,R=48,L=128,quantizer=binary" \
+# 	"diskann-R72-int8:type=diskann,R=72,L=128,quantizer=int8"
+#
+# IVF_CONFIGS = \
+# 	"ivf-n128-p16:type=ivf,nlist=128,nprobe=16"
+#
+# ANNOY_CONFIGS = \
+# 	"annoy-t50:type=annoy,n_trees=50"
+
+ALL_CONFIGS = $(BASELINES)
+
+.PHONY: seed ground-truth bench-smoke bench-10k bench-50k bench-100k bench-all \
+        report clean
+
+# --- Data preparation ---
+seed:
+	$(MAKE) -C seed
+
+ground-truth: seed
+	python ground_truth.py --subset-size 10000
+	python ground_truth.py --subset-size 50000
+	python ground_truth.py --subset-size 100000
+
+# --- Quick smoke test ---
+bench-smoke: seed
+	$(BENCH) --subset-size 5000 -k 10 -n 20 -o runs/smoke \
+		$(BASELINES)
+
+# --- Standard sizes ---
+bench-10k: seed
+	$(BENCH) --subset-size 10000 -k 10 -o runs/10k $(ALL_CONFIGS)
+
+bench-50k: seed
+	$(BENCH) --subset-size 50000 -k 10 -o runs/50k $(ALL_CONFIGS)
+
+bench-100k: seed
+	$(BENCH) --subset-size 100000 -k 10 -o runs/100k $(ALL_CONFIGS)
+
+bench-all: bench-10k bench-50k bench-100k
+
+# --- Report ---
+report:
+	@echo "Use: sqlite3 runs/<dir>/results.db 'SELECT * FROM bench_results ORDER BY recall DESC'"
+
+# --- Cleanup ---
+clean:
+	rm -rf runs/
diff --git a/benchmarks-ann/README.md b/benchmarks-ann/README.md
@@ -0,0 +1,81 @@
+# KNN Benchmarks for sqlite-vec
+
+Benchmarking infrastructure for vec0 KNN configurations. Includes brute-force
+baselines (float, int8, bit); index-specific branches add their own types
+via the `INDEX_REGISTRY` in `bench.py`.
+
+## Prerequisites
+
+- Built `dist/vec0` extension (run `make` from repo root)
+- Python 3.10+
+- `uv` (for seed data prep): `pip install uv`
+
+## Quick start
+
+```bash
+# 1. Download dataset and build seed DB (~3 GB download, ~5 min)
+make seed
+
+# 2. Run a quick smoke test (5k vectors, ~1 min)
+make bench-smoke
+
+# 3. Run full benchmark at 10k
+make bench-10k
+```
+
+## Usage
+
+### Direct invocation
+
+```bash
+python bench.py --subset-size 10000 \
+  "brute-float:type=baseline,variant=float" \
+  "brute-int8:type=baseline,variant=int8" \
+  "brute-bit:type=baseline,variant=bit"
+```
+
+### Config format
+
+`name:type=<index_type>,key=val,key=val`
+
+| Index type | Keys | Branch |
+|-----------|------|--------|
+| `baseline` | `variant` (float/int8/bit), `oversample` | this branch |
+
+Index branches register additional types in `INDEX_REGISTRY`. See the
+docstring in `bench.py` for the extension API.
+
+### Make targets
+
+| Target | Description |
+|--------|-------------|
+| `make seed` | Download COHERE 1M dataset |
+| `make ground-truth` | Pre-compute ground truth for 10k/50k/100k |
+| `make bench-smoke` | Quick 5k baseline test |
+| `make bench-10k` | All configs at 10k vectors |
+| `make bench-50k` | All configs at 50k vectors |
+| `make bench-100k` | All configs at 100k vectors |
+| `make bench-all` | 10k + 50k + 100k |
+
+## Adding an index type
+
+In your index branch, add an entry to `INDEX_REGISTRY` in `bench.py` and
+append your configs to `ALL_CONFIGS` in the `Makefile`. See the existing
+`baseline` entry and the comments in both files for the pattern.
+
+## Results
+
+Results are stored in `runs/<dir>/results.db` using the schema in `schema.sql`.
+
+```bash
+sqlite3 runs/10k/results.db "
+  SELECT config_name, recall, mean_ms, qps
+  FROM bench_results
+  ORDER BY recall DESC
+"
+```
+
+## Dataset
+
+[Zilliz COHERE Medium 1M](https://zilliz.com/learn/datasets-for-vector-database-benchmarks):
+768 dimensions, cosine distance, 1M train vectors + 10k query vectors with precomputed neighbors.