Skip to content

Commit

Permalink
modify ground truth paths
Browse files Browse the repository at this point in the history
  • Loading branch information
marevol committed May 18, 2024
1 parent cdaea45 commit 9d0ddae
Show file tree
Hide file tree
Showing 9 changed files with 29 additions and 19 deletions.
5 changes: 3 additions & 2 deletions run-chroma.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
"import json\n",
"import os\n",
"import pprint\n",
"import re\n",
"import subprocess\n",
"import time\n",
"from datetime import timedelta, datetime\n",
Expand Down Expand Up @@ -787,7 +788,7 @@
" search_with_knn_queries(dataset_config, filename, page_size=page_size, max_size=1000) # warmup\n",
" search_with_knn_queries(dataset_config, filename, page_size=page_size, offset=dataset_config.index_size)\n",
" stop_update()\n",
" results[f\"top_{page_size}\"] = print_took_and_total_hits(page_size, filename, f\"dataset/ground_truth/{target_config}/knn_{page_size}.jsonl.gz\")"
" results[f\"top_{page_size}\"] = print_took_and_total_hits(page_size, filename, f\"dataset/ground_truth/{re.sub(r'-m.*', '', target_config)}/knn_{page_size}.jsonl.gz\")"
]
},
{
Expand Down Expand Up @@ -817,7 +818,7 @@
"# search_with_knn_queries(dataset_config, filename, page_size=page_size, max_size=1000, pre_filter=pre_filter_generator()) # warmup\n",
"# search_with_knn_queries(dataset_config, filename, page_size=page_size, offset=dataset_config.index_size, pre_filter=pre_filter_generator())\n",
"# stop_update()\n",
"# results[f\"top_{page_size}_filtered\"] = print_took_and_total_hits(page_size, filename, f\"dataset/ground_truth/{target_config}/knn_{page_size}_filtered.jsonl.gz\")"
"# results[f\"top_{page_size}_filtered\"] = print_took_and_total_hits(page_size, filename, f\"dataset/ground_truth/{re.sub(r'-m.*', '', target_config)}/knn_{page_size}_filtered.jsonl.gz\")"
]
},
{
Expand Down
5 changes: 3 additions & 2 deletions run-elasticsearch.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
"import json\n",
"import os\n",
"import pprint\n",
"import re\n",
"import subprocess\n",
"import time\n",
"from datetime import timedelta, datetime\n",
Expand Down Expand Up @@ -965,7 +966,7 @@
" search_with_knn_queries(dataset_config, filename, page_size=page_size, max_size=1000) # warmup\n",
" search_with_knn_queries(dataset_config, filename, page_size=page_size, explain=False, track_total_hits=False, offset=dataset_config.index_size)\n",
" stop_update()\n",
" results[f\"top_{page_size}\"] = print_took_and_total_hits(page_size, filename, f\"dataset/ground_truth/{target_config}/knn_{page_size}.jsonl.gz\")"
" results[f\"top_{page_size}\"] = print_took_and_total_hits(page_size, filename, f\"dataset/ground_truth/{re.sub(r'-m.*', '', target_config)}/knn_{page_size}.jsonl.gz\")"
]
},
{
Expand Down Expand Up @@ -994,7 +995,7 @@
" search_with_knn_queries(dataset_config, filename, page_size=page_size, max_size=1000, pre_filter=pre_filter_generator()) # warmup\n",
" search_with_knn_queries(dataset_config, filename, page_size=page_size, explain=False, track_total_hits=False, offset=dataset_config.index_size, pre_filter=pre_filter_generator())\n",
" stop_update()\n",
" results[f\"top_{page_size}_filtered\"] = print_took_and_total_hits(page_size, filename, f\"dataset/ground_truth/{target_config}/knn_{page_size}_filtered.jsonl.gz\")"
" results[f\"top_{page_size}_filtered\"] = print_took_and_total_hits(page_size, filename, f\"dataset/ground_truth/{re.sub(r'-m.*', '', target_config)}/knn_{page_size}_filtered.jsonl.gz\")"
]
},
{
Expand Down
5 changes: 3 additions & 2 deletions run-milvus.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
"import json\n",
"import os\n",
"import pprint\n",
"import re\n",
"import subprocess\n",
"import time\n",
"from datetime import timedelta, datetime\n",
Expand Down Expand Up @@ -905,7 +906,7 @@
" search_with_knn_queries(dataset_config, filename, page_size=page_size, max_size=1000) # warmup\n",
" search_with_knn_queries(dataset_config, filename, page_size=page_size, offset=dataset_config.index_size)\n",
" stop_update()\n",
" results[f\"top_{page_size}\"] = print_took_and_total_hits(page_size, filename, f\"dataset/ground_truth/{target_config}/knn_{page_size}.jsonl.gz\")"
" results[f\"top_{page_size}\"] = print_took_and_total_hits(page_size, filename, f\"dataset/ground_truth/{re.sub(r'-m.*', '', target_config)}/knn_{page_size}.jsonl.gz\")"
]
},
{
Expand All @@ -930,7 +931,7 @@
" search_with_knn_queries(dataset_config, filename, page_size=page_size, max_size=1000, pre_filter=pre_filter_generator()) # warmup\n",
" search_with_knn_queries(dataset_config, filename, page_size=page_size, offset=dataset_config.index_size, pre_filter=pre_filter_generator())\n",
" stop_update()\n",
" results[f\"top_{page_size}_filtered\"] = print_took_and_total_hits(page_size, filename, f\"dataset/ground_truth/{target_config}/knn_{page_size}_filtered.jsonl.gz\")"
" results[f\"top_{page_size}_filtered\"] = print_took_and_total_hits(page_size, filename, f\"dataset/ground_truth/{re.sub(r'-m.*', '', target_config)}/knn_{page_size}_filtered.jsonl.gz\")"
]
},
{
Expand Down
7 changes: 4 additions & 3 deletions run-opensearch.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
"import json\n",
"import os\n",
"import pprint\n",
"import re\n",
"import subprocess\n",
"import time\n",
"from datetime import timedelta, datetime\n",
Expand Down Expand Up @@ -50,7 +51,7 @@
" opensearch_name: str = \"benchmark_opensearch\"\n",
" opensearch_host: str = \"localhost\"\n",
" opensearch_port: int = 9212\n",
" opensearch_version: str = \"2.13.0\"\n",
" opensearch_version: str = \"2.14.0\"\n",
" opensearch_heap: str = \"2g\" # \"4g\"\n",
"\n",
"\n",
Expand Down Expand Up @@ -955,7 +956,7 @@
" search_with_knn_queries(dataset_config, filename, page_size=page_size, max_size=1000) # warmup\n",
" search_with_knn_queries(dataset_config, filename, page_size=page_size, explain=False, track_total_hits=False, offset=dataset_config.index_size)\n",
" stop_update()\n",
" results[f\"top_{page_size}\"] = print_took_and_total_hits(page_size, filename, f\"dataset/ground_truth/{target_config}/knn_{page_size}.jsonl.gz\")"
" results[f\"top_{page_size}\"] = print_took_and_total_hits(page_size, filename, f\"dataset/ground_truth/{re.sub(r'-m.*', '', target_config)}/knn_{page_size}.jsonl.gz\")"
]
},
{
Expand Down Expand Up @@ -984,7 +985,7 @@
" search_with_knn_queries(dataset_config, filename, page_size=page_size, max_size=1000, pre_filter=pre_filter_generator()) # warmup\n",
" search_with_knn_queries(dataset_config, filename, page_size=page_size, explain=False, track_total_hits=False, offset=dataset_config.index_size, pre_filter=pre_filter_generator())\n",
" stop_update()\n",
" results[f\"top_{page_size}_filtered\"] = print_took_and_total_hits(page_size, filename, f\"dataset/ground_truth/{target_config}/knn_{page_size}_filtered.jsonl.gz\")"
" results[f\"top_{page_size}_filtered\"] = print_took_and_total_hits(page_size, filename, f\"dataset/ground_truth/{re.sub(r'-m.*', '', target_config)}/knn_{page_size}_filtered.jsonl.gz\")"
]
},
{
Expand Down
5 changes: 3 additions & 2 deletions run-pgvector.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
"import json\n",
"import os\n",
"import pprint\n",
"import re\n",
"import subprocess\n",
"import time\n",
"from datetime import timedelta, datetime\n",
Expand Down Expand Up @@ -814,7 +815,7 @@
" search_with_knn_queries(dataset_config, filename, page_size=page_size, max_size=1000) # warmup\n",
" search_with_knn_queries(dataset_config, filename, page_size=page_size, offset=dataset_config.index_size)\n",
" stop_update()\n",
" results[f\"top_{page_size}\"] = print_took_and_total_hits(page_size, filename, f\"dataset/ground_truth/{target_config}/knn_{page_size}.jsonl.gz\")"
" results[f\"top_{page_size}\"] = print_took_and_total_hits(page_size, filename, f\"dataset/ground_truth/{re.sub(r'-m.*', '', target_config)}/knn_{page_size}.jsonl.gz\")"
]
},
{
Expand All @@ -839,7 +840,7 @@
" search_with_knn_queries(dataset_config, filename, page_size=page_size, max_size=1000, pre_filter=pre_filter_generator()) # warmup\n",
" search_with_knn_queries(dataset_config, filename, page_size=page_size, offset=dataset_config.index_size, pre_filter=pre_filter_generator())\n",
" stop_update()\n",
" results[f\"top_{page_size}_filtered\"] = print_took_and_total_hits(page_size, filename, f\"dataset/ground_truth/{target_config}/knn_{page_size}_filtered.jsonl.gz\")"
" results[f\"top_{page_size}_filtered\"] = print_took_and_total_hits(page_size, filename, f\"dataset/ground_truth/{re.sub(r'-m.*', '', target_config)}/knn_{page_size}_filtered.jsonl.gz\")"
]
},
{
Expand Down
5 changes: 3 additions & 2 deletions run-qdrant.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
"import json\n",
"import os\n",
"import pprint\n",
"import re\n",
"import subprocess\n",
"import time\n",
"from datetime import timedelta, datetime\n",
Expand Down Expand Up @@ -827,7 +828,7 @@
" search_with_knn_queries(dataset_config, filename, page_size=page_size, max_size=1000) # warmup\n",
" search_with_knn_queries(dataset_config, filename, page_size=page_size, offset=dataset_config.index_size, exact=dataset_config.exact)\n",
" stop_update()\n",
" results[f\"top_{page_size}\"] = print_took_and_total_hits(page_size, filename, f\"dataset/ground_truth/{target_config}/knn_{page_size}.jsonl.gz\")"
" results[f\"top_{page_size}\"] = print_took_and_total_hits(page_size, filename, f\"dataset/ground_truth/{re.sub(r'-m.*', '', target_config)}/knn_{page_size}.jsonl.gz\")"
]
},
{
Expand Down Expand Up @@ -861,7 +862,7 @@
" search_with_knn_queries(dataset_config, filename, page_size=page_size, max_size=1000, pre_filter=pre_filter_generator()) # warmup\n",
" search_with_knn_queries(dataset_config, filename, page_size=page_size, offset=dataset_config.index_size, pre_filter=pre_filter_generator(), exact=dataset_config.exact)\n",
" stop_update()\n",
" results[f\"top_{page_size}_filtered\"] = print_took_and_total_hits(page_size, filename, f\"dataset/ground_truth/{target_config}/knn_{page_size}_filtered.jsonl.gz\")"
" results[f\"top_{page_size}_filtered\"] = print_took_and_total_hits(page_size, filename, f\"dataset/ground_truth/{re.sub(r'-m.*', '', target_config)}/knn_{page_size}_filtered.jsonl.gz\")"
]
},
{
Expand Down
4 changes: 2 additions & 2 deletions run-vespa.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -912,7 +912,7 @@
" search_with_knn_queries(dataset_config, filename, page_size=page_size, max_size=1000) # warmup\n",
" search_with_knn_queries(dataset_config, filename, page_size=page_size, offset=dataset_config.index_size, exact=dataset_config.exact)\n",
" stop_update()\n",
" results[f\"top_{page_size}\"] = print_took_and_total_hits(page_size, filename, f\"dataset/ground_truth/{target_config}/knn_{page_size}.jsonl.gz\")"
" results[f\"top_{page_size}\"] = print_took_and_total_hits(page_size, filename, f\"dataset/ground_truth/{re.sub(r'-m.*', '', target_config)}/knn_{page_size}.jsonl.gz\")"
]
},
{
Expand All @@ -937,7 +937,7 @@
" search_with_knn_queries(dataset_config, filename, page_size=page_size, max_size=1000, pre_filter=pre_filter_generator()) # warmup\n",
" search_with_knn_queries(dataset_config, filename, page_size=page_size, offset=dataset_config.index_size, pre_filter=pre_filter_generator(), exact=dataset_config.exact)\n",
" stop_update()\n",
" results[f\"top_{page_size}_filtered\"] = print_took_and_total_hits(page_size, filename, f\"dataset/ground_truth/{target_config}/knn_{page_size}_filtered.jsonl.gz\")"
" results[f\"top_{page_size}_filtered\"] = print_took_and_total_hits(page_size, filename, f\"dataset/ground_truth/{re.sub(r'-m.*', '', target_config)}/knn_{page_size}_filtered.jsonl.gz\")"
]
},
{
Expand Down
5 changes: 3 additions & 2 deletions run-weaviate.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
"import json\n",
"import os\n",
"import pprint\n",
"import re\n",
"import subprocess\n",
"import time\n",
"from datetime import timedelta, datetime\n",
Expand Down Expand Up @@ -829,7 +830,7 @@
" search_with_knn_queries(dataset_config, filename, page_size=page_size, max_size=1000) # warmup\n",
" search_with_knn_queries(dataset_config, filename, page_size=page_size, offset=dataset_config.index_size)\n",
" stop_update()\n",
" results[f\"top_{page_size}\"] = print_took_and_total_hits(page_size, filename, f\"dataset/ground_truth/{target_config}/knn_{page_size}.jsonl.gz\")"
" results[f\"top_{page_size}\"] = print_took_and_total_hits(page_size, filename, f\"dataset/ground_truth/{re.sub(r'-m.*', '', target_config)}/knn_{page_size}.jsonl.gz\")"
]
},
{
Expand Down Expand Up @@ -860,7 +861,7 @@
" search_with_knn_queries(dataset_config, filename, page_size=page_size, max_size=1000, pre_filter=pre_filter_generator()) # warmup\n",
" search_with_knn_queries(dataset_config, filename, page_size=page_size, offset=dataset_config.index_size, pre_filter=pre_filter_generator())\n",
" stop_update()\n",
" results[f\"top_{page_size}_filtered\"] = print_took_and_total_hits(page_size, filename, f\"dataset/ground_truth/{target_config}/knn_{page_size}_filtered.jsonl.gz\")"
" results[f\"top_{page_size}_filtered\"] = print_took_and_total_hits(page_size, filename, f\"dataset/ground_truth/{re.sub(r'-m.*', '', target_config)}/knn_{page_size}_filtered.jsonl.gz\")"
]
},
{
Expand Down
7 changes: 5 additions & 2 deletions scripts/setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ setting_type=100k-768-m32-efc200-ef100-ip

data_dir="${base_dir}/../dataset/${data_type}"
output_dir="${base_dir}/../output"
truth_dir="${base_dir}/../dataset/ground_truth/${setting_type}"

mkdir -p "${data_dir}" "${output_dir}"

Expand All @@ -57,6 +56,10 @@ while [[ ${count} -lt ${num_of_docs} ]] ; do
count=$((count + 100000))
done


truth_type=$(echo ${setting_type} | sed -e "s/-m.*//")
truth_dir="${base_dir}/../dataset/ground_truth/${truth_type}"

mkdir -p "${truth_dir}"

truth_files="
Expand All @@ -72,7 +75,7 @@ for truth_file in ${truth_files} ; do
if [[ ! -f "${truth_dir}/${truth_file}" ]] ; then
echo -n "Downloading ${truth_file}... "
curl -sL -o "${truth_dir}/${truth_file}" \
"https://codelibs.co/download/ann/benchmark/${setting_type}/${truth_file}" || exit 1
"https://codelibs.co/download/ann/benchmark/${truth_type}/${truth_file}" || exit 1
echo "[OK]"
fi
done
Expand Down

0 comments on commit 9d0ddae

Please sign in to comment.