Skip to content

Commit

Permalink
update target_config names
Browse files Browse the repository at this point in the history
  • Loading branch information
marevol committed May 17, 2024
1 parent 8506a42 commit cdaea45
Show file tree
Hide file tree
Showing 9 changed files with 39 additions and 37 deletions.
8 changes: 4 additions & 4 deletions run-chroma.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@
"\n",
"def get_dataset_config(target_name):\n",
" setting = {\n",
" \"100k-768-m32-ef100-ip\": {\n",
" \"100k-768-m32-efc200-ef100-ip\": {\n",
" \"content_path\": Path(\"dataset/passages-c400-jawiki-20230403\"),\n",
" \"embedding_path\": Path(\"dataset/passages-c400-jawiki-20230403/multilingual-e5-base-passage\"),\n",
" \"num_of_docs\": 5555583,\n",
Expand All @@ -85,7 +85,7 @@
" \"update_docs_per_sec\": 0,\n",
" \"quantization\": \"\",\n",
" },\n",
" \"1m-768-m49-ef100-ip\": {\n",
" \"1m-768-m48-efc200-ef100-ip\": {\n",
" \"content_path\": Path(\"dataset/passages-c400-jawiki-20230403\"),\n",
" \"embedding_path\": Path(\"dataset/passages-c400-jawiki-20230403/multilingual-e5-base-passage\"),\n",
" \"num_of_docs\": 5555583,\n",
Expand All @@ -101,7 +101,7 @@
" \"update_docs_per_sec\": 0,\n",
" \"quantization\": \"\",\n",
" },\n",
" \"5m-768-m49-ef100-ip\": {\n",
" \"5m-768-m48-efc200-ef100-ip\": {\n",
" \"content_path\": Path(\"dataset/passages-c400-jawiki-20230403\"),\n",
" \"embedding_path\": Path(\"dataset/passages-c400-jawiki-20230403/multilingual-e5-base-passage\"),\n",
" \"num_of_docs\": 5555583,\n",
Expand Down Expand Up @@ -679,7 +679,7 @@
"source": [
"query_data = {\"section_values\": []}\n",
"results = {}\n",
"target_config = os.getenv(\"TARGET_CONFIG\", \"100k-768-m32-ef100-ip\")\n",
"target_config = os.getenv(\"TARGET_CONFIG\", \"100k-768-m32-efc200-ef100-ip\")\n",
"dataset_config = get_dataset_config(target_config)\n",
"pprint.pprint(dataset_config)"
]
Expand Down
8 changes: 4 additions & 4 deletions run-elasticsearch.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@
"\n",
"def get_dataset_config(target_name):\n",
" setting = {\n",
" \"100k-768-m32-ef100-ip\": {\n",
" \"100k-768-m32-efc200-ef100-ip\": {\n",
" \"content_path\": Path(\"dataset/passages-c400-jawiki-20230403\"),\n",
" \"embedding_path\": Path(\"dataset/passages-c400-jawiki-20230403/multilingual-e5-base-passage\"),\n",
" \"num_of_docs\": 5555583,\n",
Expand All @@ -72,7 +72,7 @@
" \"update_docs_per_sec\": 0,\n",
" \"quantization\": \"int8\",\n",
" },\n",
" \"1m-768-m49-ef100-ip\": {\n",
" \"1m-768-m48-efc200-ef100-ip\": {\n",
" \"content_path\": Path(\"dataset/passages-c400-jawiki-20230403\"),\n",
" \"embedding_path\": Path(\"dataset/passages-c400-jawiki-20230403/multilingual-e5-base-passage\"),\n",
" \"num_of_docs\": 5555583,\n",
Expand All @@ -88,7 +88,7 @@
" \"update_docs_per_sec\": 0,\n",
" \"quantization\": \"int8\",\n",
" },\n",
" \"5m-768-m49-ef100-ip\": {\n",
" \"5m-768-m48-efc200-ef100-ip\": {\n",
" \"content_path\": Path(\"dataset/passages-c400-jawiki-20230403\"),\n",
" \"embedding_path\": Path(\"dataset/passages-c400-jawiki-20230403/multilingual-e5-base-passage\"),\n",
" \"num_of_docs\": 5555583,\n",
Expand Down Expand Up @@ -842,7 +842,7 @@
"source": [
"query_data = {\"section_values\": []}\n",
"results = {}\n",
"target_config = os.getenv(\"TARGET_CONFIG\", \"100k-768-m32-ef100-ip\")\n",
"target_config = os.getenv(\"TARGET_CONFIG\", \"100k-768-m32-efc200-ef100-ip\")\n",
"dataset_config = get_dataset_config(target_config)\n",
"pprint.pprint(dataset_config)"
]
Expand Down
8 changes: 4 additions & 4 deletions run-milvus.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@
"\n",
"def get_dataset_config(target_name):\n",
" setting = {\n",
" \"100k-768-m32-ef100-ip\": {\n",
" \"100k-768-m32-efc200-ef100-ip\": {\n",
" \"content_path\": Path(\"dataset/passages-c400-jawiki-20230403\"),\n",
" \"embedding_path\": Path(\"dataset/passages-c400-jawiki-20230403/multilingual-e5-base-passage\"),\n",
" \"num_of_docs\": 5555583,\n",
Expand All @@ -83,7 +83,7 @@
" \"hnsw_ef\": 100,\n",
" \"update_docs_per_sec\": 0,\n",
" },\n",
" \"1m-768-m49-ef100-ip\": {\n",
" \"1m-768-m48-efc200-ef100-ip\": {\n",
" \"content_path\": Path(\"dataset/passages-c400-jawiki-20230403\"),\n",
" \"embedding_path\": Path(\"dataset/passages-c400-jawiki-20230403/multilingual-e5-base-passage\"),\n",
" \"num_of_docs\": 5555583,\n",
Expand All @@ -97,7 +97,7 @@
" \"hnsw_ef\": 100,\n",
" \"update_docs_per_sec\": 0,\n",
" },\n",
" \"5m-768-m49-ef100-ip\": {\n",
" \"5m-768-m48-efc200-ef100-ip\": {\n",
" \"content_path\": Path(\"dataset/passages-c400-jawiki-20230403\"),\n",
" \"embedding_path\": Path(\"dataset/passages-c400-jawiki-20230403/multilingual-e5-base-passage\"),\n",
" \"num_of_docs\": 5555583,\n",
Expand Down Expand Up @@ -809,7 +809,7 @@
"source": [
"query_data = {\"section_values\": []}\n",
"results = {}\n",
"target_config = os.getenv(\"TARGET_CONFIG\", \"100k-768-m32-ef100-ip\")\n",
"target_config = os.getenv(\"TARGET_CONFIG\", \"100k-768-m32-efc200-ef100-ip\")\n",
"dataset_config = get_dataset_config(target_config)\n",
"pprint.pprint(dataset_config)"
]
Expand Down
8 changes: 4 additions & 4 deletions run-opensearch.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@
"\n",
"def get_dataset_config(target_name):\n",
" setting = {\n",
" \"100k-768-m32-ef100-ip\": {\n",
" \"100k-768-m32-efc200-ef100-ip\": {\n",
" \"content_path\": Path(\"dataset/passages-c400-jawiki-20230403\"),\n",
" \"embedding_path\": Path(\"dataset/passages-c400-jawiki-20230403/multilingual-e5-base-passage\"),\n",
" \"num_of_docs\": 5555583,\n",
Expand All @@ -71,7 +71,7 @@
" \"hnsw_ef\": 100,\n",
" \"update_docs_per_sec\": 0,\n",
" },\n",
" \"1m-768-m49-ef100-ip\": {\n",
" \"1m-768-m48-efc200-ef100-ip\": {\n",
" \"content_path\": Path(\"dataset/passages-c400-jawiki-20230403\"),\n",
" \"embedding_path\": Path(\"dataset/passages-c400-jawiki-20230403/multilingual-e5-base-passage\"),\n",
" \"num_of_docs\": 5555583,\n",
Expand All @@ -86,7 +86,7 @@
" \"hnsw_ef\": 100,\n",
" \"update_docs_per_sec\": 0,\n",
" },\n",
" \"5m-768-m49-ef100-ip\": {\n",
" \"5m-768-m48-efc200-ef100-ip\": {\n",
" \"content_path\": Path(\"dataset/passages-c400-jawiki-20230403\"),\n",
" \"embedding_path\": Path(\"dataset/passages-c400-jawiki-20230403/multilingual-e5-base-passage\"),\n",
" \"num_of_docs\": 5555583,\n",
Expand Down Expand Up @@ -836,7 +836,7 @@
"source": [
"query_data = {\"section_values\": []}\n",
"results = {}\n",
"target_config = os.getenv(\"TARGET_CONFIG\", \"100k-768-m32-ef100-ip\")\n",
"target_config = os.getenv(\"TARGET_CONFIG\", \"100k-768-m32-efc200-ef100-ip\")\n",
"dataset_config = get_dataset_config(target_config)\n",
"pprint.pprint(dataset_config)"
]
Expand Down
8 changes: 4 additions & 4 deletions run-pgvector.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@
"\n",
"def get_dataset_config(target_name):\n",
" setting = {\n",
" \"100k-768-m32-ef100-ip\": {\n",
" \"100k-768-m32-efc200-ef100-ip\": {\n",
" \"content_path\": Path(\"dataset/passages-c400-jawiki-20230403\"),\n",
" \"embedding_path\": Path(\"dataset/passages-c400-jawiki-20230403/multilingual-e5-base-passage\"),\n",
" \"num_of_docs\": 5555583,\n",
Expand All @@ -90,7 +90,7 @@
" \"update_docs_per_sec\": 0,\n",
" \"quantization\": \"halfvec\", # \"vector\",\n",
" },\n",
" \"1m-768-m49-ef100-ip\": {\n",
" \"1m-768-m48-efc200-ef100-ip\": {\n",
" \"content_path\": Path(\"dataset/passages-c400-jawiki-20230403\"),\n",
" \"embedding_path\": Path(\"dataset/passages-c400-jawiki-20230403/multilingual-e5-base-passage\"),\n",
" \"num_of_docs\": 5555583,\n",
Expand All @@ -105,7 +105,7 @@
" \"update_docs_per_sec\": 0,\n",
" \"quantization\": \"halfvec\", # \"vector\",\n",
" },\n",
" \"5m-768-m49-ef100-ip\": {\n",
" \"5m-768-m48-efc200-ef100-ip\": {\n",
" \"content_path\": Path(\"dataset/passages-c400-jawiki-20230403\"),\n",
" \"embedding_path\": Path(\"dataset/passages-c400-jawiki-20230403/multilingual-e5-base-passage\"),\n",
" \"num_of_docs\": 5555583,\n",
Expand Down Expand Up @@ -723,7 +723,7 @@
"source": [
"query_data = {\"section_values\": []}\n",
"results = {}\n",
"target_config = os.getenv(\"TARGET_CONFIG\", \"100k-768-m32-ef100-ip\")\n",
"target_config = os.getenv(\"TARGET_CONFIG\", \"100k-768-m32-efc200-ef100-ip\")\n",
"dataset_config = get_dataset_config(target_config)\n",
"pprint.pprint(dataset_config)"
]
Expand Down
18 changes: 10 additions & 8 deletions run-qdrant.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@
"\n",
"def get_dataset_config(target_name):\n",
" setting = {\n",
" \"100k-768-m32-ef100-ip\": {\n",
" \"100k-768-m32-efc200-ef100-ip\": {\n",
" \"content_path\": Path(\"dataset/passages-c400-jawiki-20230403\"),\n",
" \"embedding_path\": Path(\"dataset/passages-c400-jawiki-20230403/multilingual-e5-base-passage\"),\n",
" \"num_of_docs\": 5555583,\n",
Expand All @@ -71,7 +71,7 @@
" \"update_docs_per_sec\": 0,\n",
" \"quantization\": \"int8\",\n",
" },\n",
" \"1m-768-m49-ef100-ip\": {\n",
" \"1m-768-m48-efc200-ef100-ip\": {\n",
" \"content_path\": Path(\"dataset/passages-c400-jawiki-20230403\"),\n",
" \"embedding_path\": Path(\"dataset/passages-c400-jawiki-20230403/multilingual-e5-base-passage\"),\n",
" \"num_of_docs\": 5555583,\n",
Expand All @@ -87,7 +87,7 @@
" \"update_docs_per_sec\": 0,\n",
" \"quantization\": \"int8\",\n",
" },\n",
" \"5m-768-m49-ef100-ip\": {\n",
" \"5m-768-m48-efc200-ef100-ip\": {\n",
" \"content_path\": Path(\"dataset/passages-c400-jawiki-20230403\"),\n",
" \"embedding_path\": Path(\"dataset/passages-c400-jawiki-20230403/multilingual-e5-base-passage\"),\n",
" \"num_of_docs\": 5555583,\n",
Expand Down Expand Up @@ -255,8 +255,12 @@
" \"distance\": config.distance,\n",
" \"hnsw_config\": {\n",
" \"m\": config.hnsw_m,\n",
" \"ef_construction\": config.hnsw_ef_construction,\n",
" \"ef_construct\": config.hnsw_ef_construction,\n",
" }\n",
" },\n",
" \"hnsw_config\": {\n",
" \"m\": config.hnsw_m,\n",
" \"ef_construct\": config.hnsw_ef_construction,\n",
" }\n",
" }\n",
" if config.quantization == \"int8\":\n",
Expand Down Expand Up @@ -729,7 +733,7 @@
"source": [
"query_data = {\"section_values\": []}\n",
"results = {}\n",
"target_config = os.getenv(\"TARGET_CONFIG\", \"100k-768-m32-ef100-ip\")\n",
"target_config = os.getenv(\"TARGET_CONFIG\", \"100k-768-m32-efc200-ef100-ip\")\n",
"dataset_config = get_dataset_config(target_config)\n",
"pprint.pprint(dataset_config)"
]
Expand Down Expand Up @@ -776,9 +780,7 @@
"cell_type": "code",
"execution_count": null,
"id": "bcbdfa51-5bdf-494f-a9aa-3f265b8ec20d",
"metadata": {
"scrolled": true
},
"metadata": {},
"outputs": [],
"source": [
"print_docker_container_stats(dataset_config)\n",
Expand Down
8 changes: 4 additions & 4 deletions run-vespa.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@
"\n",
"def get_dataset_config(target_name):\n",
" setting = {\n",
" \"100k-768-m32-ef100-ip\": {\n",
" \"100k-768-m32-efc200-ef100-ip\": {\n",
" \"content_path\": Path(\"dataset/passages-c400-jawiki-20230403\"),\n",
" \"embedding_path\": Path(\"dataset/passages-c400-jawiki-20230403/multilingual-e5-base-passage\"),\n",
" \"num_of_docs\": 5555583,\n",
Expand All @@ -83,7 +83,7 @@
" \"update_docs_per_sec\": 0,\n",
" \"quantization\": \"float\", # \"bfloat16\",\n",
" },\n",
" \"1m-768-m49-ef100-ip\": {\n",
" \"1m-768-m48-efc200-ef100-ip\": {\n",
" \"content_path\": Path(\"dataset/passages-c400-jawiki-20230403\"),\n",
" \"embedding_path\": Path(\"dataset/passages-c400-jawiki-20230403/multilingual-e5-base-passage\"),\n",
" \"num_of_docs\": 5555583,\n",
Expand All @@ -99,7 +99,7 @@
" \"update_docs_per_sec\": 0,\n",
" \"quantization\": \"float\", # \"bfloat16\",\n",
" },\n",
" \"5m-768-m49-ef100-ip\": {\n",
" \"5m-768-m48-efc200-ef100-ip\": {\n",
" \"content_path\": Path(\"dataset/passages-c400-jawiki-20230403\"),\n",
" \"embedding_path\": Path(\"dataset/passages-c400-jawiki-20230403/multilingual-e5-base-passage\"),\n",
" \"num_of_docs\": 5555583,\n",
Expand Down Expand Up @@ -819,7 +819,7 @@
"source": [
"query_data = {\"section_values\": []}\n",
"results = {}\n",
"target_config = os.getenv(\"TARGET_CONFIG\", \"100k-768-m32-ef100-ip\")\n",
"target_config = os.getenv(\"TARGET_CONFIG\", \"100k-768-m32-efc200-ef100-ip\")\n",
"dataset_config = get_dataset_config(target_config)\n",
"pprint.pprint(dataset_config)"
]
Expand Down
8 changes: 4 additions & 4 deletions run-weaviate.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@
"\n",
"def get_dataset_config(target_name):\n",
" setting = {\n",
" \"100k-768-m32-ef100-ip\": {\n",
" \"100k-768-m32-efc200-ef100-ip\": {\n",
" \"content_path\": Path(\"dataset/passages-c400-jawiki-20230403\"),\n",
" \"embedding_path\": Path(\"dataset/passages-c400-jawiki-20230403/multilingual-e5-base-passage\"),\n",
" \"num_of_docs\": 5555583,\n",
Expand All @@ -69,7 +69,7 @@
" \"update_docs_per_sec\": 0,\n",
" \"quantization\": \"none\", # \"pq\",\n",
" },\n",
" \"1m-768-m49-ef100-ip\": {\n",
" \"1m-768-m48-efc200-ef100-ip\": {\n",
" \"content_path\": Path(\"dataset/passages-c400-jawiki-20230403\"),\n",
" \"embedding_path\": Path(\"dataset/passages-c400-jawiki-20230403/multilingual-e5-base-passage\"),\n",
" \"num_of_docs\": 5555583,\n",
Expand All @@ -84,7 +84,7 @@
" \"update_docs_per_sec\": 0,\n",
" \"quantization\": \"none\", # \"pq\",\n",
" },\n",
" \"5m-768-m49-ef100-ip\": {\n",
" \"5m-768-m48-efc200-ef100-ip\": {\n",
" \"content_path\": Path(\"dataset/passages-c400-jawiki-20230403\"),\n",
" \"embedding_path\": Path(\"dataset/passages-c400-jawiki-20230403/multilingual-e5-base-passage\"),\n",
" \"num_of_docs\": 5555583,\n",
Expand Down Expand Up @@ -737,7 +737,7 @@
"source": [
"query_data = {\"section_values\": []}\n",
"results = {}\n",
"target_config = os.getenv(\"TARGET_CONFIG\", \"100k-768-m32-ef100-ip\")\n",
"target_config = os.getenv(\"TARGET_CONFIG\", \"100k-768-m32-efc200-ef100-ip\")\n",
"dataset_config = get_dataset_config(target_config)\n",
"pprint.pprint(dataset_config)"
]
Expand Down
2 changes: 1 addition & 1 deletion scripts/setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ fi
# wikipedia contents
data_type=passages-c400-jawiki-20230403
model_type=multilingual-e5-base-passage
setting_type=100k-768-m32-ef100-ip
setting_type=100k-768-m32-efc200-ef100-ip

data_dir="${base_dir}/../dataset/${data_type}"
output_dir="${base_dir}/../output"
Expand Down

0 comments on commit cdaea45

Please sign in to comment.