Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
99 changes: 71 additions & 28 deletions notebooks/search/03-ELSER.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 1,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
Expand Down Expand Up @@ -129,7 +129,7 @@
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": 4,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
Expand Down Expand Up @@ -173,9 +173,35 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 39,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Model deleted successfully, We will proceed with creating one\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/var/folders/_8/2nxt7jjx27bd8bm5lw63ht340000gn/T/ipykernel_34494/2089429255.py:3: ElasticsearchWarning: The default [remove_binary] value of 'false' is deprecated and will be set to 'true' in a future release. Set [remove_binary] explicitly to 'true' or 'false' to ensure no behavior change.\n",
" client.ml.delete_trained_model(model_id=\".elser_model_2\",force=True)\n"
]
},
{
"data": {
"text/plain": [
"ObjectApiResponse({'model_id': '.elser_model_2', 'model_type': 'pytorch', 'model_package': {'packaged_model_id': 'elser_model_2', 'model_repository': 'https://ml-models.elastic.co', 'minimum_version': '11.0.0', 'size': 438123914, 'sha256': '2e0450a1c598221a919917cbb05d8672aed6c613c028008fedcd696462c81af0', 'metadata': {}, 'tags': [], 'vocabulary_file': 'elser_model_2.vocab.json'}, 'created_by': 'api_user', 'version': '11.0.0', 'create_time': 1701689920521, 'model_size_bytes': 0, 'estimated_operations': 0, 'license_level': 'platinum', 'description': 'Elastic Learned Sparse EncodeR v2', 'tags': ['elastic'], 'metadata': {}, 'input': {'field_names': ['text_field']}, 'inference_config': {'text_expansion': {'vocabulary': {'index': '.ml-inference-native-000002'}, 'tokenization': {'bert': {'do_lower_case': True, 'with_special_tokens': True, 'max_sequence_length': 512, 'truncate': 'first', 'span': -1}}}}, 'location': {'index': {'name': '.ml-inference-native-000002'}}})"
]
},
"execution_count": 39,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# delete model if already downloaded and deployed\n",
"try:\n",
Expand All @@ -202,7 +228,7 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": 40,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -237,31 +263,39 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": 41,
"metadata": {},
"outputs": [
{
"ename": "BadRequestError",
"evalue": "BadRequestError(400, 'status_exception', 'Could not start model deployment because an existing deployment with the same id [.elser_model_2] exist')",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mBadRequestError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m/Users/joe/projects/elastic/elasticsearch-labs/notebooks/search/03-ELSER.ipynb Cell 16\u001b[0m line \u001b[0;36m2\n\u001b[1;32m <a href='vscode-notebook-cell:/Users/joe/projects/elastic/elasticsearch-labs/notebooks/search/03-ELSER.ipynb#X21sZmlsZQ%3D%3D?line=0'>1</a>\u001b[0m \u001b[39m# Start trained model deployment if not already deployed\u001b[39;00m\n\u001b[0;32m----> <a href='vscode-notebook-cell:/Users/joe/projects/elastic/elasticsearch-labs/notebooks/search/03-ELSER.ipynb#X21sZmlsZQ%3D%3D?line=1'>2</a>\u001b[0m client\u001b[39m.\u001b[39;49mml\u001b[39m.\u001b[39;49mstart_trained_model_deployment(\n\u001b[1;32m <a href='vscode-notebook-cell:/Users/joe/projects/elastic/elasticsearch-labs/notebooks/search/03-ELSER.ipynb#X21sZmlsZQ%3D%3D?line=2'>3</a>\u001b[0m model_id\u001b[39m=\u001b[39;49m\u001b[39m\"\u001b[39;49m\u001b[39m.elser_model_2\u001b[39;49m\u001b[39m\"\u001b[39;49m,\n\u001b[1;32m <a href='vscode-notebook-cell:/Users/joe/projects/elastic/elasticsearch-labs/notebooks/search/03-ELSER.ipynb#X21sZmlsZQ%3D%3D?line=3'>4</a>\u001b[0m number_of_allocations\u001b[39m=\u001b[39;49m\u001b[39m1\u001b[39;49m\n\u001b[1;32m <a href='vscode-notebook-cell:/Users/joe/projects/elastic/elasticsearch-labs/notebooks/search/03-ELSER.ipynb#X21sZmlsZQ%3D%3D?line=4'>5</a>\u001b[0m )\n",
"File \u001b[0;32m~/.pyenv/versions/3.10.3/lib/python3.10/site-packages/elasticsearch/_sync/client/utils.py:402\u001b[0m, in \u001b[0;36m_rewrite_parameters.<locals>.wrapper.<locals>.wrapped\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 399\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mKeyError\u001b[39;00m:\n\u001b[1;32m 400\u001b[0m \u001b[39mpass\u001b[39;00m\n\u001b[0;32m--> 402\u001b[0m \u001b[39mreturn\u001b[39;00m api(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n",
"File \u001b[0;32m~/.pyenv/versions/3.10.3/lib/python3.10/site-packages/elasticsearch/_sync/client/ml.py:3655\u001b[0m, in \u001b[0;36mMlClient.start_trained_model_deployment\u001b[0;34m(self, model_id, cache_size, error_trace, filter_path, human, number_of_allocations, pretty, priority, queue_capacity, threads_per_allocation, timeout, wait_for)\u001b[0m\n\u001b[1;32m 3653\u001b[0m __query[\u001b[39m\"\u001b[39m\u001b[39mwait_for\u001b[39m\u001b[39m\"\u001b[39m] \u001b[39m=\u001b[39m wait_for\n\u001b[1;32m 3654\u001b[0m __headers \u001b[39m=\u001b[39m {\u001b[39m\"\u001b[39m\u001b[39maccept\u001b[39m\u001b[39m\"\u001b[39m: \u001b[39m\"\u001b[39m\u001b[39mapplication/json\u001b[39m\u001b[39m\"\u001b[39m}\n\u001b[0;32m-> 3655\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mperform_request( \u001b[39m# type: ignore[return-value]\u001b[39;49;00m\n\u001b[1;32m 3656\u001b[0m \u001b[39m\"\u001b[39;49m\u001b[39mPOST\u001b[39;49m\u001b[39m\"\u001b[39;49m, __path, params\u001b[39m=\u001b[39;49m__query, headers\u001b[39m=\u001b[39;49m__headers\n\u001b[1;32m 3657\u001b[0m )\n",
"File \u001b[0;32m~/.pyenv/versions/3.10.3/lib/python3.10/site-packages/elasticsearch/_sync/client/_base.py:389\u001b[0m, in \u001b[0;36mNamespacedClient.perform_request\u001b[0;34m(self, method, path, params, headers, body)\u001b[0m\n\u001b[1;32m 378\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mperform_request\u001b[39m(\n\u001b[1;32m 379\u001b[0m \u001b[39mself\u001b[39m,\n\u001b[1;32m 380\u001b[0m method: \u001b[39mstr\u001b[39m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 387\u001b[0m \u001b[39m# Use the internal clients .perform_request() implementation\u001b[39;00m\n\u001b[1;32m 388\u001b[0m \u001b[39m# so we take advantage of their transport options.\u001b[39;00m\n\u001b[0;32m--> 389\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_client\u001b[39m.\u001b[39;49mperform_request(\n\u001b[1;32m 390\u001b[0m method, path, params\u001b[39m=\u001b[39;49mparams, headers\u001b[39m=\u001b[39;49mheaders, body\u001b[39m=\u001b[39;49mbody\n\u001b[1;32m 391\u001b[0m )\n",
"File \u001b[0;32m~/.pyenv/versions/3.10.3/lib/python3.10/site-packages/elasticsearch/_sync/client/_base.py:320\u001b[0m, in \u001b[0;36mBaseClient.perform_request\u001b[0;34m(self, method, path, params, headers, body)\u001b[0m\n\u001b[1;32m 317\u001b[0m \u001b[39mexcept\u001b[39;00m (\u001b[39mValueError\u001b[39;00m, \u001b[39mKeyError\u001b[39;00m, \u001b[39mTypeError\u001b[39;00m):\n\u001b[1;32m 318\u001b[0m \u001b[39mpass\u001b[39;00m\n\u001b[0;32m--> 320\u001b[0m \u001b[39mraise\u001b[39;00m HTTP_EXCEPTIONS\u001b[39m.\u001b[39mget(meta\u001b[39m.\u001b[39mstatus, ApiError)(\n\u001b[1;32m 321\u001b[0m message\u001b[39m=\u001b[39mmessage, meta\u001b[39m=\u001b[39mmeta, body\u001b[39m=\u001b[39mresp_body\n\u001b[1;32m 322\u001b[0m )\n\u001b[1;32m 324\u001b[0m \u001b[39m# 'X-Elastic-Product: Elasticsearch' should be on every 2XX response.\u001b[39;00m\n\u001b[1;32m 325\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_verified_elasticsearch:\n\u001b[1;32m 326\u001b[0m \u001b[39m# If the header is set we mark the server as verified.\u001b[39;00m\n",
"\u001b[0;31mBadRequestError\u001b[0m: BadRequestError(400, 'status_exception', 'Could not start model deployment because an existing deployment with the same id [.elser_model_2] exist')"
"name": "stdout",
"output_type": "stream",
"text": [
"ELSER Model is currently being deployed.\n",
"ELSER Model is currently being deployed.\n",
"ELSER Model has been successfully deployed.\n"
]
}
],
"source": [
"# Start trained model deployment if not already deployed\n",
"client.ml.start_trained_model_deployment(\n",
" model_id=\".elser_model_2\",\n",
" number_of_allocations=1\n",
")\n"
" number_of_allocations=1,\n",
" wait_for=\"starting\"\n",
")\n",
"\n",
"while True:\n",
" status = client.ml.get_trained_models_stats(\n",
" model_id=\".elser_model_2\",\n",
" )\n",
" if (status[\"trained_model_stats\"][0][\"deployment_stats\"][\"state\"] == \"started\"):\n",
" print(\"ELSER Model has been successfully deployed.\")\n",
" break\n",
" else:\n",
" print(\"ELSER Model is currently being deployed.\")\n",
" time.sleep(5)\n",
"\n",
"\n"
]
},
{
Expand All @@ -286,15 +320,26 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 42,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "XhRng99KLQsd",
"outputId": "00ea73b5-45a4-472b-f4bc-2c2c790ab94d"
},
"outputs": [],
"outputs": [
{
"data": {
"text/plain": [
"ObjectApiResponse({'acknowledged': True})"
]
},
"execution_count": 42,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"client.ingest.put_pipeline(\n",
" id=\"elser-ingest-pipeline\", \n",
Expand Down Expand Up @@ -349,7 +394,7 @@
},
{
"cell_type": "code",
"execution_count": 162,
"execution_count": 46,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
Expand All @@ -364,7 +409,7 @@
"ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'elser-example-movies'})"
]
},
"execution_count": 162,
"execution_count": 46,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -375,8 +420,6 @@
" index=\"elser-example-movies\",\n",
" settings={\n",
" \"index\": {\n",
" \"number_of_shards\": 1,\n",
" \"number_of_replicas\": 1,\n",
" \"default_pipeline\": \"elser-ingest-pipeline\"\n",
" }\n",
" },\n",
Expand Down Expand Up @@ -414,7 +457,7 @@
},
{
"cell_type": "code",
"execution_count": 163,
"execution_count": 44,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
Expand Down Expand Up @@ -482,7 +525,7 @@
},
{
"cell_type": "code",
"execution_count": 164,
"execution_count": 45,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
Expand Down