From c5b88f3f16ea802beff010fda11c90a7d70b6d4d Mon Sep 17 00:00:00 2001 From: writinwaters <93570324+writinwaters@users.noreply.github.com> Date: Mon, 17 Jun 2024 23:11:48 +0800 Subject: [PATCH] Synchronize documentation. (#139) Co-authored-by: Infinity Curator --- website/docs/getstarted/build_from_source.md | 4 +- website/docs/references/benchmark.md | 45 +++++++++++--------- 2 files changed, 26 insertions(+), 23 deletions(-) diff --git a/website/docs/getstarted/build_from_source.md b/website/docs/getstarted/build_from_source.md index 9e0d741..0ac501b 100644 --- a/website/docs/getstarted/build_from_source.md +++ b/website/docs/getstarted/build_from_source.md @@ -76,7 +76,7 @@ echo 'deb https://apt.llvm.org/jammy/ llvm-toolchain-jammy-17 main' | sudo tee / wget -qO- https://apt.llvm.org/llvm-snapshot.gpg.key | sudo tee /etc/apt/trusted.gpg.d/apt.llvm.org.asc sudo add-apt-repository -P ppa:ubuntu-toolchain-r/test sudo add-apt-repository -P ppa:mhier/libboost-latest -sudo apt update && sudo apt install g++-13 clang-17 clang-tools-17 flex libboost1.81-dev liblz4-dev zlib1g-dev libevent-dev liburing-dev libjemalloc-dev +sudo apt update && sudo apt install g++-13 clang-17 clang-tools-17 flex libboost1.81-dev liblz4-dev zlib1g-dev libevent-dev liburing-dev libjemalloc-dev python3-dev ln -s /usr/lib/llvm-17/bin/clang-scan-deps /usr/bin/clang-scan-deps ln -s /usr/bin/clang-format-17 /usr/bin/clang-format ln -s /usr/bin/clang-tidy-17 /usr/bin/clang-tidy @@ -128,7 +128,7 @@ sudo apt update && sudo apt install -y git wget wget https://github.com/Kitware/CMake/releases/download/v3.29.0/cmake-3.29.0-linux-x86_64.tar.gz tar zxvf cmake-3.29.0-linux-x86_64.tar.gz sudo cp -rf cmake-3.29.0-linux-x86_64/bin/* /usr/local/bin && sudo cp -rf cmake-3.29.0-linux-x86_64/share/* /usr/local/share && rm -rf cmake-3.29.0-linux-x86_64 -sudo apt install -y ninja-build clang-17 clang-tools-17 llvm-17 flex libboost1.81-dev liblz4-dev zlib1g-dev libevent-dev liburing-dev libjemalloc-dev +sudo apt install -y ninja-build clang-17 clang-tools-17 llvm-17 flex libboost1.81-dev liblz4-dev zlib1g-dev libevent-dev liburing-dev libjemalloc-dev python3-dev ln -s /usr/lib/llvm-17/bin/clang-scan-deps /usr/bin/clang-scan-deps ln -s /usr/bin/clang-format-17 /usr/bin/clang-format ln -s /usr/bin/clang-tidy-17 /usr/bin/clang-tidy diff --git a/website/docs/references/benchmark.md b/website/docs/references/benchmark.md index e113956..59667f0 100644 --- a/website/docs/references/benchmark.md +++ b/website/docs/references/benchmark.md @@ -3,9 +3,8 @@ sidebar_position: 1 slug: /benchmark --- # Benchmark -This document compares the following key specifications of Elasticsearch, Qdrant, and Infinity: +This document compares the following key specifications of Elasticsearch, Qdrant, Quickwit and Infinity: -- Recall - Time to insert & build index - Time to import & build index - query latency @@ -29,7 +28,8 @@ Test environment: | ----------------- |---------| | **Elasticsearch** | v8.13.4 | | **Qdrant** | v1.9.2 | -| **Infinity** | v0.1.0 | +| **Quickwit** | v0.8.1 | +| **Infinity** | v0.2.0 | ## Run Benchmark @@ -54,17 +54,17 @@ sed '1d' datasets/enwiki/enwiki-20120502-lines-1k.txt > datasets/enwiki/enwiki.c 3. Start up the databases to compare: ```bash -mkdir -p $HOME/elasticsearch -docker run -d --name elasticsearch --network host -e "discovery.type=single-node" -e "ES_JAVA_OPTS=-Xms16384m -Xmx32000m" -e "xpack.security.enabled=false" -v $HOME/elasticsearch:/usr/share/elasticsearch elasticsearch:8.13.4 +mkdir -p $HOME/elasticsearch/data +docker run -d --name elasticsearch --network host -e "discovery.type=single-node" -e "ES_JAVA_OPTS=-Xms16384m -Xmx32000m" -e "xpack.security.enabled=false" -v $HOME/elasticsearch/data:/usr/share/elasticsearch/data elasticsearch:8.13.4 mkdir -p $HOME/qdrant/storage docker run -d --name qdrant --network host -v $HOME/qdrant/storage:/qdrant/storage qdrant/qdrant:v1.9.2 -mkdir -p $HOME/infinity -docker run -d --name infinity -v $HOME/infinity:/var/infinity --ulimit nofile=500000:500000 --network=host infiniflow/infinity:0.1.0 - mkdir -p $HOME/quickwit -docker run -d --rm -v $HOME/quickwit/qwdata:/quickwit/qwdata -p 127.0.0.1:7280:7280 quickwit/quickwit run +docker run -d --name quickwit --network=host -v $HOME/quickwit/qwdata:/quickwit/qwdata quickwit/quickwit:0.8.1 run + +mkdir -p $HOME/infinity +docker run -d --name infinity --network=host -v $HOME/infinity:/var/infinity --ulimit nofile=500000:500000 infiniflow/infinity:nightly ``` 4. Run Benchmark: @@ -88,16 +88,16 @@ usage: run.py [-h] [--generate] [--import] [--query QUERY] [--query-express QUER RAG Database Benchmark options: --h, --help show this help message and exit ---generate Generate fulltext query set based on the dataset (default: False) ---import Import dataset into database engine (default: False) ---query QUERY Run the query set only once using given number of clients with recording the result and latency. This is for result validation and latency analysis (default: 0) ---query-express QUERY_EXPRESS -Run the query set randomly using given number of clients without recording the result and latency. This is for QPS measurement. (default: 0) ---concurrency CONCURRENCY -Choose concurrency mechanism, one of: mp - multiprocessing(recommended), mt - multithreading. (default: mp) ---engine ENGINE Choose database engine to benchmark, one of: infinity, qdrant, elasticsearch (default: infinity) ---dataset DATASET Choose dataset to benchmark, one of: gist, sift, geonames, enwiki (default: enwiki) + -h, --help show this help message and exit + --generate Generate fulltext query set based on the dataset (default: False) + --import Import dataset into database engine (default: False) + --query QUERY Run the query set only once using given number of clients with recording the result and latency. This is for result validation and latency analysis (default: 0) + --query-express QUERY_EXPRESS + Run the query set randomly using given number of clients without recording the result and latency. This is for QPS measurement. (default: 0) + --concurrency CONCURRENCY + Choose concurrency mechanism, one of: mp - multiprocessing(recommended), mt - multithreading. (default: mp) + --engine ENGINE Choose database engine to benchmark, one of: infinity, qdrant, elasticsearch, quickwit (default: infinity) + --dataset DATASET Choose dataset to benchmark, one of: gist, sift, geonames, enwiki, tantivy (default: enwiki) ``` Following are commands for engine `infinity` and dataset `enwiki`: @@ -114,7 +114,9 @@ Following are commands to issue a single query so that you can compare results a ```base curl -X GET "http://localhost:9200/elasticsearch_enwiki/_search" -H 'Content-Type: application/json' -d'{"size":10,"_source":"doctitle","query": {"match": { "body": "wraysbury istorijos" }}}' -psql -h 0.0.0.0 -p 5432 -c "SELECT doctitle, ROW_ID(), SCORE() FROM infinity_enwiki SEARCH MATCH TEXT ('body', 'wraysbury istorijos', 'topn=10;block_max=true');" +curl -X GET "http://localhost:7280/api/v1/_elastic/qucikwit_enwiki/_search" -H 'Content-Type: application/json' -d'{"query": {"query_string": {"query": "wraysbury istorijos", "fields": [ "body" ] } },"sort": ["_score"]}' + +psql -h 0.0.0.0 -p 5432 -c "SELECT doctitle, ROW_ID(), SCORE() FROM infinity_enwiki SEARCH MATCH TEXT ('body', 'wraysbury istorijos', 'topn=10');" ``` ## Benchmark Results @@ -152,7 +154,8 @@ psql -h 0.0.0.0 -p 5432 -c "SELECT doctitle, ROW_ID(), SCORE() FROM infinity_enw | | Time to insert & build index | Time to import & build index | P95 Latency(ms)| QPS (16 python clients) | Memory | vCPU | | ----------------- | ---------------------------- | ---------------------------- | ---------------| ------------------------| --------| ----- | | **Elasticsearch** | 2289 s | N/A | 14.75 | 1340 | 21.0GB | 10.6 | -| **Infinity** | 1562 s | 2244 s | 1.86 | 12328 | 10.0GB | 11.0 | +| **Quickwith** | 3962 s | N/A | 65.55 | 179 | 1.2GB | 11.3 | +| **Infinity** | 1562 s | 2244 s | 1.37 | 13731 | 10.0GB | 11.0 | ---