Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions notebooks/document-chunking/.nbtest.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
masks:
- "'name': '[^']+'"
- "'cluster_name': '[^']+'"
- "'cluster_uuid': '[^']+'"
- "'build_flavor': '[^']+'"
- '[0-9]+\.[0-9]+\.[0-9]+'
- "'build_hash': '[^']+'"
- "'build_date': '[^']+'"
11 changes: 7 additions & 4 deletions notebooks/document-chunking/Makefile
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
NBTEST = ../../bin/nbtest
NOTEBOOKS = \
with-index-pipelines.ipynb

.PHONY: all
.PHONY: all $(NOTEBOOKS)

all:
$(NBTEST) \
with-index-pipelines.ipynb
all: $(NOTEBOOKS)

$(NOTEBOOKS):
$(NBTEST) $@
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "1422b7bb-bc8c-42bb-b070-53fce3cf6144",
"metadata": {},
"outputs": [],
"source": [
"from elasticsearch import Elasticsearch\n",
"from getpass import getpass\n",
"\n",
"# https://www.elastic.co/search-labs/tutorials/install-elasticsearch/elastic-cloud#finding-your-cloud-id\n",
"ELASTIC_CLOUD_ID = getpass(\"Elastic Cloud ID: \")\n",
"\n",
"# https://www.elastic.co/search-labs/tutorials/install-elasticsearch/elastic-cloud#creating-an-api-key\n",
"ELASTIC_API_KEY = getpass(\"Elastic Api Key: \")\n",
"\n",
"# Create the client instance\n",
"client = Elasticsearch(\n",
" # For local development\n",
" # hosts=[\"http://localhost:9200\"] \n",
" cloud_id=ELASTIC_CLOUD_ID,\n",
" api_key=ELASTIC_API_KEY,\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e4a89367-d23a-4340-bc92-2dcabd18adcd",
"metadata": {},
"outputs": [],
"source": [
"client.indices.delete(index=\"chunk_passages_example\")\n",
"client.ingest.delete_pipeline(id=\"chunk_text_to_passages\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4ac37f1b-6122-49fe-a3b8-e8f2025a0961",
"metadata": {},
"outputs": [],
"source": [
"try:\n",
" client.ml.delete_trained_model(model_id=\"sentence-transformers__all-minilm-l6-v2\", force=True)\n",
"except:\n",
" pass"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
13 changes: 1 addition & 12 deletions notebooks/document-chunking/with-index-pipelines.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -523,17 +523,6 @@
"\n",
"pretty_response(response)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b269da89",
"metadata": {},
"outputs": [],
"source": [
"client.indices.delete(index=INDEX_NAME)\n",
"client.ingest.delete_pipeline(id=\"chunk_text_to_passages\")\n"
]
}
],
"metadata": {
Expand All @@ -555,7 +544,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.3"
"version": "3.11.6"
}
},
"nbformat": 4,
Expand Down
11 changes: 11 additions & 0 deletions notebooks/search/.nbtest.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
masks:
- "'name': '[^']+'"
- "'build_flavor': '[^']+'"
- '[0-9]+\.[0-9]+\.[0-9]+'
- "'cluster_name': '[^']+'"
- "'cluster_uuid': '[^']+'"
- "'build_hash': '[^']+'"
- "'build_date': '[^']+'"
- "'_version': [0-9]+"
- '^ID: .*$'
- '^Score: [0-9]+\.[0-9][0-9]*$'
23 changes: 13 additions & 10 deletions notebooks/search/Makefile
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
NBTEST = ../../bin/nbtest
NOTEBOOKS = \
00-quick-start.ipynb \
01-keyword-querying-filtering.ipynb \
02-hybrid-search.ipynb \
03-ELSER.ipynb \
04-multilingual.ipynb \
05-query-rules.ipynb \
06-synonyms-api.ipynb

.PHONY: all
.PHONY: all $(NOTEBOOKS)

all:
$(NBTEST) \
00-quick-start.ipynb \
01-keyword-querying-filtering.ipynb \
02-hybrid-search.ipynb \
03-ELSER.ipynb \
04-multilingual.ipynb \
05-query-rules.ipynb \
06-synonyms-api.ipynb
all: $(NOTEBOOKS)

$(NOTEBOOKS):
$(NBTEST) $@
93 changes: 93 additions & 0 deletions notebooks/search/_nbtest.setup.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "e180af3a-3a2c-4186-a577-7051ec6460b1",
"metadata": {},
"outputs": [],
"source": [
"!pip install -qU elasticsearch sentence-transformers"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "63d22ea2-ecca-41bb-b08f-de8ad49cda41",
"metadata": {},
"outputs": [],
"source": [
"# get the Elasticsearch client\n",
"from elasticsearch import Elasticsearch\n",
"from getpass import getpass\n",
"\n",
"ELASTIC_CLOUD_ID = getpass(\"Elastic Cloud ID: \")\n",
"ELASTIC_API_KEY = getpass(\"Elastic Api Key: \")\n",
"\n",
"client = Elasticsearch(cloud_id=ELASTIC_CLOUD_ID, api_key=ELASTIC_API_KEY,)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b367acaa-90e6-43d0-b9ae-cf42a0e2c0f1",
"metadata": {},
"outputs": [],
"source": [
"import json\n",
"from urllib.request import urlopen\n",
"from sentence_transformers import SentenceTransformer\n",
"\n",
"if NBTEST[\"notebook\"] in ['01-keyword-querying-filtering.ipynb', '02-hybrid-search.ipynb', '06-synonyms-api.ipynb']:\n",
" # these tests need book_index to exist ahead of time\n",
" client.indices.delete(index=\"book_index\", ignore_unavailable=True)\n",
" \n",
" mappings = {\n",
" \"properties\": {\n",
" \"title_vector\": {\n",
" \"type\": \"dense_vector\",\n",
" \"dims\": 384,\n",
" \"index\": \"true\",\n",
" \"similarity\": \"cosine\"\n",
" }\n",
" }\n",
" }\n",
" client.indices.create(index='book_index', mappings=mappings)\n",
"\n",
" url = \"https://raw.githubusercontent.com/elastic/elasticsearch-labs/main/notebooks/search/data.json\"\n",
" response = urlopen(url)\n",
" books = json.loads(response.read())\n",
"\n",
" model = SentenceTransformer('all-MiniLM-L6-v2')\n",
" operations = []\n",
" for book in books:\n",
" operations.append({\"index\": {\"_index\": \"book_index\"}})\n",
" # Transforming the title into an embedding using the model\n",
" book[\"title_vector\"] = model.encode(book[\"title\"]).tolist()\n",
" operations.append(book)\n",
" client.bulk(index=\"book_index\", operations=operations, refresh=True)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
56 changes: 56 additions & 0 deletions notebooks/search/_nbtest.teardown.03-ELSER.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "7bf006aa-91cf-4c3a-b685-1f8ca5892a33",
"metadata": {},
"outputs": [],
"source": [
"from elasticsearch import Elasticsearch\n",
"from getpass import getpass\n",
"\n",
"ELASTIC_CLOUD_ID = getpass(\"Elastic Cloud ID: \")\n",
"ELASTIC_API_KEY = getpass(\"Elastic Api Key: \")\n",
"\n",
"client = Elasticsearch(cloud_id=ELASTIC_CLOUD_ID, api_key=ELASTIC_API_KEY,)\n",
"\n",
"# delete the notebook's index\n",
"client.indices.delete(index=\"elser-example-movies\", ignore_unavailable=True)\n",
"\n",
"# delete the pipeline\n",
"try:\n",
" client.ingest.delete_pipeline(id=\"elser-ingest-pipeline\")\n",
"except:\n",
" pass\n",
"\n",
"# delete the model\n",
"try:\n",
" client.ml.delete_trained_model(model_id=\".elser_model_2\", force=True)\n",
"except:\n",
" pass"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
73 changes: 73 additions & 0 deletions notebooks/search/_nbtest.teardown.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "7bcf0f81-aec8-4f49-918c-3163917885ec",
"metadata": {},
"outputs": [],
"source": [
"indexes = {\n",
" \"00-quick-start.ipynb\": \"book_index\",\n",
" \"01-keyword-querying-filtering.ipynb\": \"book_index\",\n",
" \"02-hybrid-search.ipynb\": \"book_index\",\n",
" # 03-ELSER.ipynb has its own teardown notebook\n",
" \"04-multilingual.ipynb\": \"articles\",\n",
" \"05-query-rules.ipynb\": \"products_index\",\n",
" \"06-synonyms-api.ipynb\": \"book_index\",\n",
"}\n",
"INDEX_NAME = indexes.get(NBTEST[\"notebook\"])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "fcd17ce3-ece3-4268-b37b-bbf47c2437c8",
"metadata": {},
"outputs": [],
"source": [
"# get the Elasticsearch client\n",
"from elasticsearch import Elasticsearch\n",
"from getpass import getpass\n",
"\n",
"ELASTIC_CLOUD_ID = getpass(\"Elastic Cloud ID: \")\n",
"ELASTIC_API_KEY = getpass(\"Elastic Api Key: \")\n",
"\n",
"client = Elasticsearch(cloud_id=ELASTIC_CLOUD_ID, api_key=ELASTIC_API_KEY,)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "abf51067-61f8-4cf3-b950-464805ea0e8d",
"metadata": {},
"outputs": [],
"source": [
"# delete the notebook's index\n",
"if INDEX_NAME:\n",
" client.indices.delete(index=INDEX_NAME, ignore_unavailable=True)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Loading