From b5f161f656b133ca1302fd5d561b194d03d7819d Mon Sep 17 00:00:00 2001 From: Joseph McElroy Date: Mon, 3 Jul 2023 20:32:57 +0100 Subject: [PATCH 01/54] skeleton of notebooks --- .../generative-ai/chatbot.ipynb | 0 .../generative-ai/question-answering.ipynb | 24 +++++++++++++++++++ ...summarise-search-results-with-openai.ipynb | 0 colab-notebooks-examples/index.md | 0 .../integrations/cohere/cohere-intro.ipynb | 0 .../hugging-face/hugging-face-endpoints.ipynb | 0 .../loading-model-from-hugging-face.ipynb | 0 .../integrations/llama-index/intro.ipynb | 0 .../integrations/openai/intro.ipynb | 0 .../langchain-elasticsearch-embeddings.ipynb | 18 ++++++++++++++ .../langchain/langchain-vector-store.ipynb | 0 .../search/00-quick-start.ipynb | 0 .../search/01-vector-search.ipynb | 0 .../02-keyword-querying-filtering.ipynb | 0 .../search/03-hybrid-search-with-rrf.ipynb | 0 .../search/04-ELSER.ipynb | 0 .../search/05-aggregations.ipynb | 0 17 files changed, 42 insertions(+) create mode 100644 colab-notebooks-examples/generative-ai/chatbot.ipynb create mode 100644 colab-notebooks-examples/generative-ai/question-answering.ipynb create mode 100644 colab-notebooks-examples/generative-ai/summarise-search-results-with-openai.ipynb create mode 100644 colab-notebooks-examples/index.md create mode 100644 colab-notebooks-examples/integrations/cohere/cohere-intro.ipynb create mode 100644 colab-notebooks-examples/integrations/hugging-face/hugging-face-endpoints.ipynb create mode 100644 colab-notebooks-examples/integrations/hugging-face/loading-model-from-hugging-face.ipynb create mode 100644 colab-notebooks-examples/integrations/llama-index/intro.ipynb create mode 100644 colab-notebooks-examples/integrations/openai/intro.ipynb create mode 100644 colab-notebooks-examples/langchain/langchain-elasticsearch-embeddings.ipynb create mode 100644 colab-notebooks-examples/langchain/langchain-vector-store.ipynb create mode 100644 colab-notebooks-examples/search/00-quick-start.ipynb create mode 100644 colab-notebooks-examples/search/01-vector-search.ipynb create mode 100644 colab-notebooks-examples/search/02-keyword-querying-filtering.ipynb create mode 100644 colab-notebooks-examples/search/03-hybrid-search-with-rrf.ipynb create mode 100644 colab-notebooks-examples/search/04-ELSER.ipynb create mode 100644 colab-notebooks-examples/search/05-aggregations.ipynb diff --git a/colab-notebooks-examples/generative-ai/chatbot.ipynb b/colab-notebooks-examples/generative-ai/chatbot.ipynb new file mode 100644 index 00000000..e69de29b diff --git a/colab-notebooks-examples/generative-ai/question-answering.ipynb b/colab-notebooks-examples/generative-ai/question-answering.ipynb new file mode 100644 index 00000000..5750b9d7 --- /dev/null +++ b/colab-notebooks-examples/generative-ai/question-answering.ipynb @@ -0,0 +1,24 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "question answering example doing the following:\n", + "\n", + "1. load the deepset model stored on huggingface via eland into elasticsearch\n", + "2. setup a pipeline + ingest data (wikipedia) to enrich\n", + "3. ask questions on dataset\n" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/colab-notebooks-examples/generative-ai/summarise-search-results-with-openai.ipynb b/colab-notebooks-examples/generative-ai/summarise-search-results-with-openai.ipynb new file mode 100644 index 00000000..e69de29b diff --git a/colab-notebooks-examples/index.md b/colab-notebooks-examples/index.md new file mode 100644 index 00000000..e69de29b diff --git a/colab-notebooks-examples/integrations/cohere/cohere-intro.ipynb b/colab-notebooks-examples/integrations/cohere/cohere-intro.ipynb new file mode 100644 index 00000000..e69de29b diff --git a/colab-notebooks-examples/integrations/hugging-face/hugging-face-endpoints.ipynb b/colab-notebooks-examples/integrations/hugging-face/hugging-face-endpoints.ipynb new file mode 100644 index 00000000..e69de29b diff --git a/colab-notebooks-examples/integrations/hugging-face/loading-model-from-hugging-face.ipynb b/colab-notebooks-examples/integrations/hugging-face/loading-model-from-hugging-face.ipynb new file mode 100644 index 00000000..e69de29b diff --git a/colab-notebooks-examples/integrations/llama-index/intro.ipynb b/colab-notebooks-examples/integrations/llama-index/intro.ipynb new file mode 100644 index 00000000..e69de29b diff --git a/colab-notebooks-examples/integrations/openai/intro.ipynb b/colab-notebooks-examples/integrations/openai/intro.ipynb new file mode 100644 index 00000000..e69de29b diff --git a/colab-notebooks-examples/langchain/langchain-elasticsearch-embeddings.ipynb b/colab-notebooks-examples/langchain/langchain-elasticsearch-embeddings.ipynb new file mode 100644 index 00000000..264fe34f --- /dev/null +++ b/colab-notebooks-examples/langchain/langchain-elasticsearch-embeddings.ipynb @@ -0,0 +1,18 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [] + } + ], + "metadata": { + "language_info": { + "name": "python" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/colab-notebooks-examples/langchain/langchain-vector-store.ipynb b/colab-notebooks-examples/langchain/langchain-vector-store.ipynb new file mode 100644 index 00000000..e69de29b diff --git a/colab-notebooks-examples/search/00-quick-start.ipynb b/colab-notebooks-examples/search/00-quick-start.ipynb new file mode 100644 index 00000000..e69de29b diff --git a/colab-notebooks-examples/search/01-vector-search.ipynb b/colab-notebooks-examples/search/01-vector-search.ipynb new file mode 100644 index 00000000..e69de29b diff --git a/colab-notebooks-examples/search/02-keyword-querying-filtering.ipynb b/colab-notebooks-examples/search/02-keyword-querying-filtering.ipynb new file mode 100644 index 00000000..e69de29b diff --git a/colab-notebooks-examples/search/03-hybrid-search-with-rrf.ipynb b/colab-notebooks-examples/search/03-hybrid-search-with-rrf.ipynb new file mode 100644 index 00000000..e69de29b diff --git a/colab-notebooks-examples/search/04-ELSER.ipynb b/colab-notebooks-examples/search/04-ELSER.ipynb new file mode 100644 index 00000000..e69de29b diff --git a/colab-notebooks-examples/search/05-aggregations.ipynb b/colab-notebooks-examples/search/05-aggregations.ipynb new file mode 100644 index 00000000..e69de29b From 7f28b239ad1601ff20affde2ba2112d46b138b04 Mon Sep 17 00:00:00 2001 From: Joseph McElroy Date: Mon, 3 Jul 2023 20:32:57 +0100 Subject: [PATCH 02/54] skeleton of notebooks --- .../generative-ai/chatbot.ipynb | 0 .../generative-ai/question-answering.ipynb | 24 +++++++++++++++++++ ...summarise-search-results-with-openai.ipynb | 0 colab-notebooks-examples/index.md | 0 .../integrations/cohere/cohere-intro.ipynb | 0 .../hugging-face/hugging-face-endpoints.ipynb | 0 .../loading-model-from-hugging-face.ipynb | 0 .../integrations/llama-index/intro.ipynb | 0 .../integrations/openai/intro.ipynb | 0 .../langchain-elasticsearch-embeddings.ipynb | 18 ++++++++++++++ .../langchain/langchain-vector-store.ipynb | 0 .../search/00-quick-start.ipynb | 0 .../search/01-vector-search.ipynb | 0 .../02-keyword-querying-filtering.ipynb | 0 .../search/03-hybrid-search-with-rrf.ipynb | 0 .../search/04-ELSER.ipynb | 0 .../search/05-aggregations.ipynb | 0 17 files changed, 42 insertions(+) create mode 100644 colab-notebooks-examples/generative-ai/chatbot.ipynb create mode 100644 colab-notebooks-examples/generative-ai/question-answering.ipynb create mode 100644 colab-notebooks-examples/generative-ai/summarise-search-results-with-openai.ipynb create mode 100644 colab-notebooks-examples/index.md create mode 100644 colab-notebooks-examples/integrations/cohere/cohere-intro.ipynb create mode 100644 colab-notebooks-examples/integrations/hugging-face/hugging-face-endpoints.ipynb create mode 100644 colab-notebooks-examples/integrations/hugging-face/loading-model-from-hugging-face.ipynb create mode 100644 colab-notebooks-examples/integrations/llama-index/intro.ipynb create mode 100644 colab-notebooks-examples/integrations/openai/intro.ipynb create mode 100644 colab-notebooks-examples/langchain/langchain-elasticsearch-embeddings.ipynb create mode 100644 colab-notebooks-examples/langchain/langchain-vector-store.ipynb create mode 100644 colab-notebooks-examples/search/00-quick-start.ipynb create mode 100644 colab-notebooks-examples/search/01-vector-search.ipynb create mode 100644 colab-notebooks-examples/search/02-keyword-querying-filtering.ipynb create mode 100644 colab-notebooks-examples/search/03-hybrid-search-with-rrf.ipynb create mode 100644 colab-notebooks-examples/search/04-ELSER.ipynb create mode 100644 colab-notebooks-examples/search/05-aggregations.ipynb diff --git a/colab-notebooks-examples/generative-ai/chatbot.ipynb b/colab-notebooks-examples/generative-ai/chatbot.ipynb new file mode 100644 index 00000000..e69de29b diff --git a/colab-notebooks-examples/generative-ai/question-answering.ipynb b/colab-notebooks-examples/generative-ai/question-answering.ipynb new file mode 100644 index 00000000..5750b9d7 --- /dev/null +++ b/colab-notebooks-examples/generative-ai/question-answering.ipynb @@ -0,0 +1,24 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "question answering example doing the following:\n", + "\n", + "1. load the deepset model stored on huggingface via eland into elasticsearch\n", + "2. setup a pipeline + ingest data (wikipedia) to enrich\n", + "3. ask questions on dataset\n" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/colab-notebooks-examples/generative-ai/summarise-search-results-with-openai.ipynb b/colab-notebooks-examples/generative-ai/summarise-search-results-with-openai.ipynb new file mode 100644 index 00000000..e69de29b diff --git a/colab-notebooks-examples/index.md b/colab-notebooks-examples/index.md new file mode 100644 index 00000000..e69de29b diff --git a/colab-notebooks-examples/integrations/cohere/cohere-intro.ipynb b/colab-notebooks-examples/integrations/cohere/cohere-intro.ipynb new file mode 100644 index 00000000..e69de29b diff --git a/colab-notebooks-examples/integrations/hugging-face/hugging-face-endpoints.ipynb b/colab-notebooks-examples/integrations/hugging-face/hugging-face-endpoints.ipynb new file mode 100644 index 00000000..e69de29b diff --git a/colab-notebooks-examples/integrations/hugging-face/loading-model-from-hugging-face.ipynb b/colab-notebooks-examples/integrations/hugging-face/loading-model-from-hugging-face.ipynb new file mode 100644 index 00000000..e69de29b diff --git a/colab-notebooks-examples/integrations/llama-index/intro.ipynb b/colab-notebooks-examples/integrations/llama-index/intro.ipynb new file mode 100644 index 00000000..e69de29b diff --git a/colab-notebooks-examples/integrations/openai/intro.ipynb b/colab-notebooks-examples/integrations/openai/intro.ipynb new file mode 100644 index 00000000..e69de29b diff --git a/colab-notebooks-examples/langchain/langchain-elasticsearch-embeddings.ipynb b/colab-notebooks-examples/langchain/langchain-elasticsearch-embeddings.ipynb new file mode 100644 index 00000000..264fe34f --- /dev/null +++ b/colab-notebooks-examples/langchain/langchain-elasticsearch-embeddings.ipynb @@ -0,0 +1,18 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [] + } + ], + "metadata": { + "language_info": { + "name": "python" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/colab-notebooks-examples/langchain/langchain-vector-store.ipynb b/colab-notebooks-examples/langchain/langchain-vector-store.ipynb new file mode 100644 index 00000000..e69de29b diff --git a/colab-notebooks-examples/search/00-quick-start.ipynb b/colab-notebooks-examples/search/00-quick-start.ipynb new file mode 100644 index 00000000..e69de29b diff --git a/colab-notebooks-examples/search/01-vector-search.ipynb b/colab-notebooks-examples/search/01-vector-search.ipynb new file mode 100644 index 00000000..e69de29b diff --git a/colab-notebooks-examples/search/02-keyword-querying-filtering.ipynb b/colab-notebooks-examples/search/02-keyword-querying-filtering.ipynb new file mode 100644 index 00000000..e69de29b diff --git a/colab-notebooks-examples/search/03-hybrid-search-with-rrf.ipynb b/colab-notebooks-examples/search/03-hybrid-search-with-rrf.ipynb new file mode 100644 index 00000000..e69de29b diff --git a/colab-notebooks-examples/search/04-ELSER.ipynb b/colab-notebooks-examples/search/04-ELSER.ipynb new file mode 100644 index 00000000..e69de29b diff --git a/colab-notebooks-examples/search/05-aggregations.ipynb b/colab-notebooks-examples/search/05-aggregations.ipynb new file mode 100644 index 00000000..e69de29b From a31884f59b6af11d94c830ca693a26cec86e02b2 Mon Sep 17 00:00:00 2001 From: Joseph McElroy Date: Mon, 3 Jul 2023 21:34:30 +0100 Subject: [PATCH 03/54] simple notebook --- .../search/00-quick-start.ipynb | 534 ++++++++++++++++++ 1 file changed, 534 insertions(+) diff --git a/colab-notebooks-examples/search/00-quick-start.ipynb b/colab-notebooks-examples/search/00-quick-start.ipynb index e69de29b..82f3271c 100644 --- a/colab-notebooks-examples/search/00-quick-start.ipynb +++ b/colab-notebooks-examples/search/00-quick-start.ipynb @@ -0,0 +1,534 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "87773ce7", + "metadata": { + "id": "87773ce7" + }, + "source": [ + "# Elasticsearch Quick Start\n", + "\n", + "\"Open\n", + "\n", + "This interactive notebook will introduce you to the very basics of getting started with simple Elasticsearch queries, using the official [Elasticsearch Python client](https://www.elastic.co/guide/en/elasticsearch/client/python-api/current/connecting.html).\n", + "We'll run through getting the client up and running, indexing a small data set into Elasticsearch, and performing basic searches against your data." + ] + }, + { + "cell_type": "markdown", + "id": "a32202e2", + "metadata": { + "id": "a32202e2" + }, + "source": [ + "## Create Elastic Cloud deployment\n", + "\n", + "If you don't have an Elastic Cloud deployment, sign up [here](https://cloud.elastic.co/registration?fromURI=%2Fhome) for a free trial.\n", + "\n", + "- Go to the [Create deployment](https://cloud.elastic.co/deployments/create) page\n", + " - Select **Create deployment**" + ] + }, + { + "cell_type": "markdown", + "id": "52a6a607", + "metadata": { + "id": "52a6a607" + }, + "source": [ + "## Install packages and import modules\n", + "\n", + "To get started, we'll need to connect to our Elastic deployment using the Python client.\n", + "Because we're using an Elastic Cloud deployment, we'll use the **Cloud ID** to identify our deployment.\n", + "\n", + "First we need to `pip` install the following packages:\n", + "\n", + "- `elasticsearch`\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ffc5fa6f", + "metadata": { + "scrolled": false, + "id": "ffc5fa6f" + }, + "outputs": [], + "source": [ + "!pip install elasticsearch" + ] + }, + { + "cell_type": "markdown", + "id": "d9cb4609", + "metadata": { + "id": "d9cb4609" + }, + "source": [ + "Next we need to import the modules we need." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "099415ba", + "metadata": { + "id": "099415ba" + }, + "outputs": [], + "source": [ + "from elasticsearch import Elasticsearch, helpers\n", + "from urllib.request import urlopen" + ] + }, + { + "cell_type": "markdown", + "id": "0241694c", + "metadata": { + "id": "0241694c" + }, + "source": [ + "## Initialize the Elasticsearch client\n", + "\n", + "Now we can instantiate the [Elasticsearch python client](https://www.elastic.co/guide/en/elasticsearch/client/python-api/current/index.html), providing the cloud id and password in your deployment." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f38e0397", + "metadata": { + "id": "f38e0397" + }, + "outputs": [], + "source": [ + "# Create the client instance\n", + "client = Elasticsearch(\n", + " cloud_id=\"\",\n", + " basic_auth=(\"elastic\", \"\")\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "fcd165fa", + "metadata": { + "id": "fcd165fa" + }, + "source": [ + "If you're running Elasticsearch locally or self-managed, you can pass in the Elasticsearch host instead. [Read more](https://www.elastic.co/guide/en/elasticsearch/client/python-api/current/connecting.html#_verifying_https_with_certificate_fingerprints_python_3_10_or_later) on how to connect to Elasticsearch locally" + ] + }, + { + "cell_type": "markdown", + "id": "1462ebd8", + "metadata": { + "id": "1462ebd8" + }, + "source": [ + "Confirm that the client has connected with this test." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25c618eb", + "metadata": { + "id": "25c618eb" + }, + "outputs": [], + "source": [ + "print(client.info())" + ] + }, + { + "cell_type": "markdown", + "id": "61e1e6d8", + "metadata": { + "id": "61e1e6d8" + }, + "source": [ + "## Index some test data\n", + "\n", + "Our client is set up and connected to our Elastic deployment.\n", + "Now we need some data to test out the basics of Elasticsearch queries.\n", + "We'll use a small index of books with the following fields:\n", + "\n", + "- `title`\n", + "- `authors`\n", + "- `publish_date`\n", + "- `num_reviews`\n", + "- `publisher`\n", + "\n", + "### Create index\n", + "\n", + "Let's create an Elasticsearch index with the correct mappings for our test data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6bc95238", + "metadata": { + "id": "6bc95238" + }, + "outputs": [], + "source": [ + "# Define the mapping\n", + "mapping = {\n", + " \"mappings\": {\n", + " \"properties\": {\n", + " \"title\": {\"type\": \"text\"},\n", + " \"authors\": {\"type\": \"keyword\"},\n", + " \"summary\": {\"type\": \"text\"},\n", + " \"publish_date\": {\"type\": \"date\"},\n", + " \"num_reviews\": {\"type\": \"integer\"},\n", + " \"publisher\": {\"type\": \"keyword\"}\n", + " }\n", + " }\n", + "}\n", + "\n", + "# Create the index\n", + "client.indices.create(index='book_index', body=mapping)\n" + ] + }, + { + "cell_type": "markdown", + "id": "075f5eb6", + "metadata": { + "id": "075f5eb6" + }, + "source": [ + "### Index test data\n", + "\n", + "Run the following command to upload some test data, containing information about 10 popular programming books.\n", + "\n", + "ℹ️ If you'd like to upload your own data from a URL, refer to the following [notebook](https://github.com/leemthompo/notebook-tests/blob/main/load-data-from-url.ipynb) for an example." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "008d723e", + "metadata": { + "id": "008d723e" + }, + "outputs": [], + "source": [ + "books = [\n", + " {\n", + " \"title\": \"The Pragmatic Programmer: Your Journey to Mastery\",\n", + " \"authors\": [\"andrew hunt\", \"david thomas\"],\n", + " \"summary\": \"A guide to pragmatic programming for software engineers and developers\",\n", + " \"publish_date\": \"2019-10-29\",\n", + " \"num_reviews\": 30,\n", + " \"publisher\": \"addison-wesley\"\n", + " },\n", + " {\n", + " \"title\": \"Python Crash Course\",\n", + " \"authors\": [\"eric matthes\"],\n", + " \"summary\": \"A fast-paced, no-nonsense guide to programming in Python\",\n", + " \"publish_date\": \"2019-05-03\",\n", + " \"num_reviews\": 42,\n", + " \"publisher\": \"no starch press\"\n", + " },\n", + " {\n", + " \"title\": \"Artificial Intelligence: A Modern Approach\",\n", + " \"authors\": [\"stuart russell\", \"peter norvig\"],\n", + " \"summary\": \"Comprehensive introduction to the theory and practice of artificial intelligence\",\n", + " \"publish_date\": \"2020-04-06\",\n", + " \"num_reviews\": 39,\n", + " \"publisher\": \"pearson\"\n", + " },\n", + " {\n", + " \"title\": \"Clean Code: A Handbook of Agile Software Craftsmanship\",\n", + " \"authors\": [\"robert c. martin\"],\n", + " \"summary\": \"A guide to writing code that is easy to read, understand and maintain\",\n", + " \"publish_date\": \"2008-08-11\",\n", + " \"num_reviews\": 55,\n", + " \"publisher\": \"prentice hall\"\n", + " },\n", + " {\n", + " \"title\": \"You Don't Know JS: Up & Going\",\n", + " \"authors\": [\"kyle simpson\"],\n", + " \"summary\": \"Introduction to JavaScript and programming as a whole\",\n", + " \"publish_date\": \"2015-03-27\",\n", + " \"num_reviews\": 36,\n", + " \"publisher\": \"oreilly\"\n", + " },\n", + " {\n", + " \"title\": \"Eloquent JavaScript\",\n", + " \"authors\": [\"marijn haverbeke\"],\n", + " \"summary\": \"A modern introduction to programming\",\n", + " \"publish_date\": \"2018-12-04\",\n", + " \"num_reviews\": 38,\n", + " \"publisher\": \"no starch press\"\n", + " },\n", + " {\n", + " \"title\": \"Design Patterns: Elements of Reusable Object-Oriented Software\",\n", + " \"authors\": [\"erich gamma\", \"richard helm\", \"ralph johnson\", \"john vlissides\"],\n", + " \"summary\": \"Guide to design patterns that can be used in any object-oriented language\",\n", + " \"publish_date\": \"1994-10-31\",\n", + " \"num_reviews\": 45,\n", + " \"publisher\": \"addison-wesley\"\n", + " },\n", + " {\n", + " \"title\": \"The Clean Coder: A Code of Conduct for Professional Programmers\",\n", + " \"authors\": [\"robert c. martin\"],\n", + " \"summary\": \"A guide to professional conduct in the field of software engineering\",\n", + " \"publish_date\": \"2011-05-13\",\n", + " \"num_reviews\": 20,\n", + " \"publisher\": \"prentice hall\"\n", + " },\n", + " {\n", + " \"title\": \"JavaScript: The Good Parts\",\n", + " \"authors\": [\"douglas crockford\"],\n", + " \"summary\": \"A deep dive into the parts of JavaScript that are essential to writing maintainable code\",\n", + " \"publish_date\": \"2008-05-15\",\n", + " \"num_reviews\": 51,\n", + " \"publisher\": \"oreilly\"\n", + " },\n", + " {\n", + " \"title\": \"Introduction to the Theory of Computation\",\n", + " \"authors\": [\"michael sipser\"],\n", + " \"summary\": \"Introduction to the theory of computation and complexity theory\",\n", + " \"publish_date\": \"2012-06-27\",\n", + " \"num_reviews\": 33,\n", + " \"publisher\": \"cengage learning\"\n", + " },\n", + "]\n", + "\n", + " actions = []\n", + " for book in books:\n", + " actions.append({\"index\": {\"_index\": \"book_index\"})\n", + " actions.append(doc)\n", + "\n", + " client.bulk(index=\"book_index\", operations=actions)\n" + ] + }, + { + "cell_type": "markdown", + "id": "cd8b03e0", + "metadata": { + "id": "cd8b03e0" + }, + "source": [ + "## Aside: Pretty printing Elasticsearch responses\n", + "\n", + "Your API calls will return hard-to-read nested JSON.\n", + "We'll create a little function called `pretty_response` to return nice, human-readable outputs from our examples." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f12ce2c9", + "metadata": { + "id": "f12ce2c9" + }, + "outputs": [], + "source": [ + "def pretty_response(response):\n", + " for hit in response['hits']['hits']:\n", + " id = hit['_id']\n", + " publication_date = hit['_source']['publish_date']\n", + " score = hit['_score']\n", + " title = hit['_source']['title']\n", + " summary = hit['_source']['summary']\n", + " pretty_output = (f\"\\nID: {id}\\nPublication date: {publication_date}\\nTitle: {title}\\nSummary: {summary}\\nScore: {score}\")\n", + " print(pretty_output)" + ] + }, + { + "cell_type": "markdown", + "id": "39bdefe0", + "metadata": { + "id": "39bdefe0" + }, + "source": [ + "## Simple queries\n", + "\n", + "Let's start by looking at simple queries which search for a particular value in a particular field." + ] + }, + { + "cell_type": "markdown", + "id": "a4bf32e8", + "metadata": { + "id": "a4bf32e8" + }, + "source": [ + "### `match` query\n", + "\n", + "The match query is the standard query for performing a full-text search.\n", + "Returns documents that match a provided text, number, date or boolean value.\n", + "The provided text is [analyzed](https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-overview.html#analysis-overview) before matching.\n", + "\n", + "For multiple word queries, Elasticsearch searches for each word using OR logic.\n", + "So 'deep dive' and 'deep drive' will still return a match in our index." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cd9a164c", + "metadata": { + "scrolled": true, + "id": "cd9a164c" + }, + "outputs": [], + "source": [ + "response = client.search(index=\"book_index\",\n", + " query={\n", + " 'match': {\n", + " 'summary': 'deep dive'\n", + " }})\n", + "\n", + "pretty_response(response)" + ] + }, + { + "cell_type": "markdown", + "source": [ + "if you need to match the query on multiple fields, you can use multi_match" + ], + "metadata": { + "id": "LdAZE1M7cMMi" + }, + "id": "LdAZE1M7cMMi" + }, + { + "cell_type": "code", + "execution_count": null, + "id": "69d78b71", + "metadata": { + "id": "69d78b71" + }, + "outputs": [], + "source": [ + "response = client.search(\n", + " query= {\n", + " \"multi_match\": {\n", + " \"query\": \"javascript\",\n", + " \"fields\": [\"summary\", \"title\"],\n", + " }\n", + " }\n", + ")\n", + "pretty_response(response)" + ] + }, + { + "cell_type": "markdown", + "id": "c7234ecb", + "metadata": { + "id": "c7234ecb" + }, + "source": [ + "### `term` query\n", + "\n", + "Returns documents that contain an exact term in a provided field.\n", + "\n", + "You can use the term query to find documents based on a precise value such as a price, a product ID, or a username.\n", + "\n", + "⚠️ Avoid using the `term` query for [text](https://www.elastic.co/guide/en/elasticsearch/reference/current/text.html) fields, because these fields are modified by analysis.\n", + "To search text field values, use the `match` query instead.\n", + "\n", + "Let's say we want to search for an exact book ID:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cbb4972e", + "metadata": { + "id": "cbb4972e" + }, + "outputs": [], + "source": [ + "query = {\n", + " \"term\": {\n", + " \"_id\": {\n", + " \"value\": \"_eTh_IgBM7k8MFjP03ir\",\n", + " \"boost\": 1.0\n", + " }\n", + " }\n", + " }\n", + "\n", + "response = client.search(query=query, index=\"book_index\")\n", + "pretty_response(response)" + ] + }, + { + "cell_type": "markdown", + "id": "dd081dd7", + "metadata": { + "id": "dd081dd7" + }, + "source": [ + "Note that if you just searched for a substring like `gBM7k8` here, you would not get a match!\n", + "Use `term` queries to match full, exact values on non-`text` fields." + ] + }, + { + "cell_type": "markdown", + "id": "f7861ad8", + "metadata": { + "id": "f7861ad8" + }, + "source": [ + "### `range` query\n", + "\n", + "Returns documents that contain terms within a provided range.\n", + "Let's say we want to find all books published between January 1st 2019 and January 1st 2020." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "851561c1", + "metadata": { + "id": "851561c1" + }, + "outputs": [], + "source": [ + "query = {\n", + " \"range\": {\n", + " \"publish_date\": {\n", + " \"gte\": \"2019-01-01\", # gte = greater than or equal to\n", + " \"lte\": \"2020-01-01\", # lte = less than or equal to\n", + " }\n", + " }\n", + " }\n", + "\n", + "response = client.search(query=query, index=\"book_index\")\n", + "pretty_response(response)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.16" + }, + "colab": { + "provenance": [] + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} \ No newline at end of file From f21efb85e41863025110e84d2e6ec2555412f5d5 Mon Sep 17 00:00:00 2001 From: Joseph McElroy Date: Mon, 3 Jul 2023 21:36:25 +0100 Subject: [PATCH 04/54] updates --- .../search/00-quick-start.ipynb | 43 ++++++++++++------- 1 file changed, 28 insertions(+), 15 deletions(-) diff --git a/colab-notebooks-examples/search/00-quick-start.ipynb b/colab-notebooks-examples/search/00-quick-start.ipynb index 82f3271c..ab9dc49f 100644 --- a/colab-notebooks-examples/search/00-quick-start.ipynb +++ b/colab-notebooks-examples/search/00-quick-start.ipynb @@ -1,6 +1,7 @@ { "cells": [ { + "attachments": {}, "cell_type": "markdown", "id": "87773ce7", "metadata": { @@ -16,6 +17,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "a32202e2", "metadata": { @@ -42,9 +44,7 @@ "To get started, we'll need to connect to our Elastic deployment using the Python client.\n", "Because we're using an Elastic Cloud deployment, we'll use the **Cloud ID** to identify our deployment.\n", "\n", - "First we need to `pip` install the following packages:\n", - "\n", - "- `elasticsearch`\n" + "First we need to install the `elasticsearch` Python client." ] }, { @@ -52,8 +52,8 @@ "execution_count": null, "id": "ffc5fa6f", "metadata": { - "scrolled": false, - "id": "ffc5fa6f" + "id": "ffc5fa6f", + "scrolled": false }, "outputs": [], "source": [ @@ -61,6 +61,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "d9cb4609", "metadata": { @@ -84,6 +85,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "0241694c", "metadata": { @@ -112,6 +114,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "fcd165fa", "metadata": { @@ -122,6 +125,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "1462ebd8", "metadata": { @@ -144,6 +148,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "61e1e6d8", "metadata": { @@ -195,6 +200,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "075f5eb6", "metadata": { @@ -309,6 +315,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "cd8b03e0", "metadata": { @@ -342,6 +349,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "39bdefe0", "metadata": { @@ -354,6 +362,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "a4bf32e8", "metadata": { @@ -375,8 +384,8 @@ "execution_count": null, "id": "cd9a164c", "metadata": { - "scrolled": true, - "id": "cd9a164c" + "id": "cd9a164c", + "scrolled": true }, "outputs": [], "source": [ @@ -390,14 +399,15 @@ ] }, { + "attachments": {}, "cell_type": "markdown", - "source": [ - "if you need to match the query on multiple fields, you can use multi_match" - ], + "id": "LdAZE1M7cMMi", "metadata": { "id": "LdAZE1M7cMMi" }, - "id": "LdAZE1M7cMMi" + "source": [ + "if you need to match the query on multiple fields, you can use multi_match" + ] }, { "cell_type": "code", @@ -420,6 +430,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "c7234ecb", "metadata": { @@ -461,6 +472,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "dd081dd7", "metadata": { @@ -472,6 +484,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "f7861ad8", "metadata": { @@ -508,6 +521,9 @@ } ], "metadata": { + "colab": { + "provenance": [] + }, "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", @@ -524,11 +540,8 @@ "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.16" - }, - "colab": { - "provenance": [] } }, "nbformat": 4, "nbformat_minor": 5 -} \ No newline at end of file +} From 95f4feb95ecaf38f6556b181024416970179a876 Mon Sep 17 00:00:00 2001 From: Joseph McElroy Date: Tue, 4 Jul 2023 11:23:35 +0100 Subject: [PATCH 05/54] basics --- .../search/00-quick-start.ipynb | 5591 ++++++++++++++++- 1 file changed, 5414 insertions(+), 177 deletions(-) diff --git a/colab-notebooks-examples/search/00-quick-start.ipynb b/colab-notebooks-examples/search/00-quick-start.ipynb index ab9dc49f..0722421e 100644 --- a/colab-notebooks-examples/search/00-quick-start.ipynb +++ b/colab-notebooks-examples/search/00-quick-start.ipynb @@ -1,7 +1,6 @@ { "cells": [ { - "attachments": {}, "cell_type": "markdown", "id": "87773ce7", "metadata": { @@ -17,7 +16,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "a32202e2", "metadata": { @@ -49,19 +47,39 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "ffc5fa6f", "metadata": { "id": "ffc5fa6f", - "scrolled": false + "scrolled": false, + "outputId": "d9693f20-1482-4a4d-b74d-a876885f92fa", + "colab": { + "base_uri": "https://localhost:8080/" + } }, - "outputs": [], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m393.8/393.8 kB\u001b[0m \u001b[31m7.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.0/86.0 kB\u001b[0m \u001b[31m8.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.2/7.2 MB\u001b[0m \u001b[31m68.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m46.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m236.8/236.8 kB\u001b[0m \u001b[31m20.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m59.5/59.5 kB\u001b[0m \u001b[31m3.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.8/7.8 MB\u001b[0m \u001b[31m89.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m63.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h Building wheel for sentence-transformers (setup.py) ... \u001b[?25l\u001b[?25hdone\n" + ] + } + ], "source": [ - "!pip install elasticsearch" + "!pip install -qU elasticsearch sentence-transformers==2.2.2" ] }, { - "attachments": {}, "cell_type": "markdown", "id": "d9cb4609", "metadata": { @@ -73,7 +91,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "099415ba", "metadata": { "id": "099415ba" @@ -85,7 +103,406 @@ ] }, { - "attachments": {}, + "cell_type": "markdown", + "source": [ + "and add the sentence transformer" + ], + "metadata": { + "id": "28AH8LhI-0UD" + }, + "id": "28AH8LhI-0UD" + }, + { + "cell_type": "code", + "source": [ + "from sentence_transformers import SentenceTransformer\n", + "import torch\n", + "\n", + "device = 'cuda' if torch.cuda.is_available() else 'cpu'\n", + "\n", + "model = SentenceTransformer('all-MiniLM-L6-v2', device=device)\n", + "model" + ], + "metadata": { + "id": "WHC3hHGW-wbI", + "outputId": "e1afa0b3-6f39-47cd-da21-c6adadc7b7de", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 552, + "referenced_widgets": [ + "8a47d05dd32e452b89c1c62f15cb57a3", + "34d9035bbca449fba7a64a7353ce9200", + "230c71ac97db42aeb048c392154c7ad8", + "327fdab26f534c918c41f9b6eb8da61e", + "6a7bbaad9509474b8b77bfcd216c2e4a", + "cca1ec7ef9174e37a5e17b2a77b39aad", + "e5616536b1824a388fa9e1ed826307f9", + "254086255be24f2b8e0ccfcfb072f379", + "d7b8af67a3a8454c97d9a68caff040c7", + "22d2c55179744b4f8d57c15aa60bd0ef", + "face41511e7c4c748e123d176b219f13", + "b27dfc2df0f94537b977baa0211165f7", + "27a5814b873b463d8ec884fce5b3002b", + "d7e199cec93c42308436a13f0101798d", + "51023e9b26e348278fb5788c4b1d3cf1", + "e37dae5f929042eea3b021d81c7e89f9", + "f41fc8e604524a118ea2b863a01d5201", + "a407443370464f68b4f503a5df4b2fd4", + "a09c50472f244d15875780d53e4cb680", + "e1623d2abb5244708dc2d5ae9bce9e6f", + "9bac318fe82342c4a9a489034d54b85c", + "6aa05f91d1a24d8fbe6bd0083649c9d9", + "70e02ff9e19b4a9f991e0777b42b22c4", + "84727c7020574c809ca155efa0996217", + "0e0a2c012cdf402a8c7275d42fb47d16", + "fe235613377d4443815ad548d7b19a2c", + "30ad2a5963c04d188b64448513352576", + "0229a8c31ca04b5aa8f36f049b6333c0", + "775697558ad74bd0b01b8728617395df", + "fd06e080368345679c280ae286dcf118", + "c385ab3c1ffb4a9391feaf8017ac2124", + "2a76d9c1c37a45ab8f4d7da6541c11c3", + "3897a135ce4d413f95305560e9d5f51d", + "e41fb5a2fbcf485899df455800c29f66", + "637b076d35c247348d61e924f86c7509", + "fb5eba47d398448a991aaca68e4c33f0", + "5a549312926048079d979df5f31f6668", + "c7f8a26c31154c8f9bf65dddb4095c04", + "b999f7e8580d47bf956607b2dbf59604", + "835c285bb5bf41999c6e845d485ab845", + "2083d409adf345e3b6209bf9b4e8d5df", + "4d1b0ec8de0f466eba688a33a95b6dc8", + "8ae23bf62ca44f3e9953ec0b89705a42", + "6c57b2b268a44ad09c188941cc1fa47a", + "0a566ba9f07d4a56a69dcfc467c8cfa8", + "c825476eb2a24cc9b8707d23bd0589ed", + "d290678ad04e431ba4923c0dba777399", + "b105661ae91240a9ac645a531f87db35", + "44cd00539c8c487ba2b58819b96d90b8", + "00f331e549504656ac4d8f1f07e95768", + "e6864110fe5b4caf8e1b869162a6c45a", + "cafef97c29564483b15c21b647e2e0d1", + "6a664e104c2f462f838738ea966a0115", + "c987b5a057dc475bb7cb702afe198f74", + "b571138b82b146f6add4079c7c4a3fea", + "2097ee5afa034d5fa208191458925956", + "0c0e6363c3d44ad992727ad3031b685a", + "85667ca054aa431eb50fa26233244e65", + "dceed128f53f4726afe578574c19815d", + "862c408691204fb2902f588ea8a2f614", + "57f9e08a916f4c7584d1e5c625adf509", + "98f9d91b220d4c86aaa4a20994e858a8", + "25081b6df89c4f6890d7e359905b3b8a", + "6fad6161615f45688f3c44d6179ba204", + "7eed45dcf5004dccb734eb315415a535", + "a5cd7c8a692f43f7859b4df8334c3f0f", + "3d4d613296054330bedd8eac39978a18", + "b11f3b77d8604aed8051678c6f12b172", + "f7f3cae2779d41b9b08d299596c6026c", + "9db4b498a78e435db8afd06c6632a522", + "9513eacff50642e7b0579c7de57bdacf", + "6edf7e9504fe48be802d443b3a0dd3a5", + "22a5d61c70c545d095c54b4b066d4b87", + "b8769ba773c144428df49ed117a4ba2e", + "914892930f74452d96cd787c128c955b", + "d10b85816ca540c0a5456e4126dc99c6", + "c686e95be6db4030b77cfcc170aaf722", + "09af9df668ad43b4a8bd2c10d36a2aab", + "eb1d39f0a2254be7857baa26432a5e56", + "73cb794c915e493e9623d5f598a71029", + "4d21a6cce39141f594d5b5d71caa5a61", + "753703fac8c54e268e0a4eb373580a0b", + "2787caea3cbc4f029e766b35ccaa6013", + "ae251276d3c8482ab768130bde5b9de3", + "440fcc9ad14241dba8e724aac69803e9", + "b5608a8229a34e8e8405bf0420104ae4", + "8c4e637876534783959a3daa9254a65a", + "1181941459e04ce29dfaf48ae1e6dff7", + "ee53579d441b4b2a97176d161b479cd1", + "ae834f736104436593dd069cf3bdd5a2", + "3418257fe9114669b00bc242c620bfb3", + "24f54307ee3b499ab7519f713ce994e1", + "e0d94012952e43549dd4d158c30c91d6", + "fa8d7e78724341ca84f0f829e33d756d", + "4cbf54c836d74602b94388254b5bf8eb", + "640450c5f17646a5b42a1ab13218db23", + "8a280b8dae964aaea3be0a299e354b17", + "f22a4dfe0f6e44c2a0bc37b3fe488439", + "5c26c4be0a0240499d427e67d89cd869", + "1fe019c07e0c4f62adfc826f89eb2dbd", + "3b2a66e66c5c4e8a8bafd6d340a779b9", + "6c2891e1aae043afbba8430ae097b882", + "7743a69d92f94e8fa358b458fe8aa4a5", + "a76ea0a19ea74594a843c62aa6fb03e1", + "8a909360e5704220987b703ba4cb3dff", + "cc4c4534d0b54d0187d5a7ed82a0376e", + "8ff8b86348424eda9678a932fbbad132", + "d43e683a46ef4d8892635fc1da1bd7c0", + "3e4fbf2956b343218d2aec4d867e1c94", + "4bde08a13c41477ba6c7a4697ab599a0", + "54062dca2e8743a68da565014c44304c", + "79efd9353e4f4ee690de5d38bd98c7aa", + "28451c97f4fa4b8991186907a50738e8", + "50d576d87fae494182632cbed50372ac", + "a57b74c1c4b6497ba8e0dafad0af4ddf", + "08a1df059a4b4c26a87ccea8c5d45bbd", + "8a37a86d8ed44c1cb70eca68215c913c", + "c3b2c605e7e94299a5dadf771baf6f0a", + "69fa56f6dfde42f883b24069f65ed745", + "bff335481e214de5b02478938672df38", + "4e25e350551e49a2bf50e081fcb0184a", + "ac4fdd9c828d473384662742dca85aa7", + "c4f9e573288a46d19efae3d101dbbdef", + "9adc61965b944edc9abff9d0de7e745a", + "483b1513ebcd4e718e69202b30163973", + "04e89773ebeb42db87c64d7b9ec7cb26", + "3f50cc3742724ab2bf8434c341eee0bd", + "449d71ebd4df4ddbb755d30fdff25358", + "530177663d7f47d39c43beb3ce7fab78", + "1ad860cd423c4fd893066e6925579eb7", + "7c1fc9a54be14954acd59abde2fca97f", + "92790cf373b14a058b31fc0d1fcc5c94", + "b889cd1323134a9cb0b2eb81ceb3cde1", + "7e85d153ec15430689bc77ef6bf2b3c8", + "e706e32cd2a642eb9618dd0dd75d7664", + "7c252184b6c449d1bbba4807cde4672a", + "df86bc204d014d13990917c348b09769", + "eb16cb52b8cb438984c4c75c3074de84", + "9ea920d0083f46e2add318c845b3fddf", + "cb4add21dd514bf7a3d2e51726603e36", + "ac2387223ca04438a3e4bb59cd2f7019", + "2450d6be578e4eb6960111c26c55fa43", + "8d97796bc9964114853ce5803584d159", + "05c5fdaa4d624b44857f6b7d08426e6c", + "71e2f0ea4906451f9555a638f7e713b6", + "fdb9c374bc1e48688e439dc82103c545", + "f14f96faaf3444cda3da447b10d57cee", + "95007200cc10458882a9e81d055978b9", + "530c9177c7024e98a24d8db80651ab01", + "a5abdec4f10f4b5385f01722480139e3", + "6f25e1c5d19e474aaa3f4d5806a53445", + "4fd8f1d3cb944c2e8a0a806e8c89a26f", + "143c8f9e754240e6a5a8386da714cb1b", + "3d3d1beb5ed346209eac8f4b38325ae1" + ] + } + }, + "id": "WHC3hHGW-wbI", + "execution_count": 3, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "Downloading (…)e9125/.gitattributes: 0%| | 0.00/1.18k [00:00\",\n", - " basic_auth=(\"elastic\", \"\")\n", + " cloud_id=\"My_deployment:dXMtY2VudHJhbDEuZ2NwLmNsb3VkLmVzLmlvOjQ0MyQxYTU2YWQyMTU4N2M0NGQzOTMwOTMyZWI5ZmExZDhlOCRiNGZkMDBhYTNlZjI0ODdiYmU5OGQ5N2YyNTBlYWUyYw==\",\n", + " basic_auth=(\"elastic\", \"OdmjAw5cQGtTbvGqI3CTX18q\")\n", ")" ] }, { - "attachments": {}, "cell_type": "markdown", "id": "fcd165fa", "metadata": { @@ -125,7 +541,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "1462ebd8", "metadata": { @@ -137,18 +552,29 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "id": "25c618eb", "metadata": { - "id": "25c618eb" + "id": "25c618eb", + "outputId": "f3ea04dc-a30a-4ea1-a393-addb35d29fac", + "colab": { + "base_uri": "https://localhost:8080/" + } }, - "outputs": [], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "{'name': 'instance-0000000000', 'cluster_name': '1a56ad21587c44d3930932eb9fa1d8e8', 'cluster_uuid': 'gX4zlwtlR4qhZpp1SPm4Yg', 'version': {'number': '8.8.1', 'build_flavor': 'default', 'build_type': 'docker', 'build_hash': 'f8edfccba429b6477927a7c1ce1bc6729521305e', 'build_date': '2023-06-05T21:32:25.188464208Z', 'build_snapshot': False, 'lucene_version': '9.6.0', 'minimum_wire_compatibility_version': '7.17.0', 'minimum_index_compatibility_version': '7.0.0'}, 'tagline': 'You Know, for Search'}\n" + ] + } + ], "source": [ "print(client.info())" ] }, { - "attachments": {}, "cell_type": "markdown", "id": "61e1e6d8", "metadata": { @@ -174,12 +600,35 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "id": "6bc95238", "metadata": { - "id": "6bc95238" + "id": "6bc95238", + "outputId": "2b6d0360-43e8-4cbe-fada-80bf475510ed", + "colab": { + "base_uri": "https://localhost:8080/" + } }, - "outputs": [], + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + ":17: DeprecationWarning: The 'body' parameter is deprecated and will be removed in a future version. Instead use individual parameters.\n", + " client.indices.create(index='book_index', body=mapping)\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'book_index'})" + ] + }, + "metadata": {}, + "execution_count": 7 + } + ], "source": [ "# Define the mapping\n", "mapping = {\n", @@ -190,7 +639,8 @@ " \"summary\": {\"type\": \"text\"},\n", " \"publish_date\": {\"type\": \"date\"},\n", " \"num_reviews\": {\"type\": \"integer\"},\n", - " \"publisher\": {\"type\": \"keyword\"}\n", + " \"publisher\": {\"type\": \"keyword\"},\n", + " \"title_vector\": { \"type\": \"dense_vector\", \"dims\": 384, \"index\": \"true\", \"similarity\": \"dot_product\" }\n", " }\n", " }\n", "}\n", @@ -200,7 +650,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "075f5eb6", "metadata": { @@ -216,12 +665,27 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "id": "008d723e", "metadata": { - "id": "008d723e" + "id": "008d723e", + "outputId": "430bbad4-404d-4f5d-dc23-d4e462b931fa", + "colab": { + "base_uri": "https://localhost:8080/" + } }, - "outputs": [], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "ObjectApiResponse({'took': 47, 'errors': False, 'items': [{'index': {'_index': 'book_index', '_id': 'eWsDIIkB6SgI-NN4Uquf', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 0, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'book_index', '_id': 'emsDIIkB6SgI-NN4Uquf', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 1, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'book_index', '_id': 'e2sDIIkB6SgI-NN4Uquf', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 2, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'book_index', '_id': 'fGsDIIkB6SgI-NN4Uquf', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 3, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'book_index', '_id': 'fWsDIIkB6SgI-NN4Uquf', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 4, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'book_index', '_id': 'fmsDIIkB6SgI-NN4Uquf', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 5, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'book_index', '_id': 'f2sDIIkB6SgI-NN4Uquf', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 6, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'book_index', '_id': 'gGsDIIkB6SgI-NN4Uquf', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 7, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'book_index', '_id': 'gWsDIIkB6SgI-NN4Uquf', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 8, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'book_index', '_id': 'gmsDIIkB6SgI-NN4Uquf', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 9, '_primary_term': 1, 'status': 201}}]})" + ] + }, + "metadata": {}, + "execution_count": 8 + } + ], "source": [ "books = [\n", " {\n", @@ -306,16 +770,17 @@ " },\n", "]\n", "\n", - " actions = []\n", - " for book in books:\n", - " actions.append({\"index\": {\"_index\": \"book_index\"})\n", - " actions.append(doc)\n", + "actions = []\n", + "for book in books:\n", + " actions.append({\"index\": {\"_index\": \"book_index\"}})\n", + " titleEmbedding = model.encode(book[\"title\"]).tolist()\n", + " book[\"title_vector\"] = titleEmbedding\n", + " actions.append(book)\n", "\n", - " client.bulk(index=\"book_index\", operations=actions)\n" + "client.bulk(index=\"book_index\", operations=actions)\n" ] }, { - "attachments": {}, "cell_type": "markdown", "id": "cd8b03e0", "metadata": { @@ -349,174 +814,154 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "39bdefe0", "metadata": { "id": "39bdefe0" }, "source": [ - "## Simple queries\n", + "##Querying\n", "\n", "Let's start by looking at simple queries which search for a particular value in a particular field." ] }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "a4bf32e8", - "metadata": { - "id": "a4bf32e8" - }, - "source": [ - "### `match` query\n", - "\n", - "The match query is the standard query for performing a full-text search.\n", - "Returns documents that match a provided text, number, date or boolean value.\n", - "The provided text is [analyzed](https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-overview.html#analysis-overview) before matching.\n", - "\n", - "For multiple word queries, Elasticsearch searches for each word using OR logic.\n", - "So 'deep dive' and 'deep drive' will still return a match in our index." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "cd9a164c", - "metadata": { - "id": "cd9a164c", - "scrolled": true - }, - "outputs": [], - "source": [ - "response = client.search(index=\"book_index\",\n", - " query={\n", - " 'match': {\n", - " 'summary': 'deep dive'\n", - " }})\n", - "\n", - "pretty_response(response)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "LdAZE1M7cMMi", - "metadata": { - "id": "LdAZE1M7cMMi" - }, - "source": [ - "if you need to match the query on multiple fields, you can use multi_match" - ] - }, { "cell_type": "code", - "execution_count": null, - "id": "69d78b71", - "metadata": { - "id": "69d78b71" - }, - "outputs": [], - "source": [ - "response = client.search(\n", - " query= {\n", - " \"multi_match\": {\n", - " \"query\": \"javascript\",\n", - " \"fields\": [\"summary\", \"title\"],\n", - " }\n", - " }\n", - ")\n", - "pretty_response(response)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "c7234ecb", - "metadata": { - "id": "c7234ecb" - }, "source": [ - "### `term` query\n", - "\n", - "Returns documents that contain an exact term in a provided field.\n", - "\n", - "You can use the term query to find documents based on a precise value such as a price, a product ID, or a username.\n", - "\n", - "⚠️ Avoid using the `term` query for [text](https://www.elastic.co/guide/en/elasticsearch/reference/current/text.html) fields, because these fields are modified by analysis.\n", - "To search text field values, use the `match` query instead.\n", + "def pretty_response(response):\n", + " for hit in response['hits']['hits']:\n", + " id = hit['_id']\n", + " publication_date = hit['_source']['publish_date']\n", + " score = hit['_score']\n", + " title = hit['_source']['title']\n", + " summary = hit['_source']['summary']\n", + " pretty_output = (f\"\\nID: {id}\\nPublication date: {publication_date}\\nTitle: {title}\\nSummary: {summary}\\nScore: {score}\")\n", + " print(pretty_output)\n", "\n", - "Let's say we want to search for an exact book ID:\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "cbb4972e", - "metadata": { - "id": "cbb4972e" - }, - "outputs": [], - "source": [ - "query = {\n", - " \"term\": {\n", - " \"_id\": {\n", - " \"value\": \"_eTh_IgBM7k8MFjP03ir\",\n", - " \"boost\": 1.0\n", - " }\n", - " }\n", + "response = client.search(index=\"book_index\", body={\n", + " \"knn\": {\n", + " \"field\": \"title_vector\",\n", + " \"query_vector\": model.encode(\"Best Project management books?\"),\n", + " \"k\": 10,\n", + " \"num_candidates\": 100\n", " }\n", + "})\n", "\n", - "response = client.search(query=query, index=\"book_index\")\n", - "pretty_response(response)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "dd081dd7", - "metadata": { - "id": "dd081dd7" - }, - "source": [ - "Note that if you just searched for a substring like `gBM7k8` here, you would not get a match!\n", - "Use `term` queries to match full, exact values on non-`text` fields." - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "f7861ad8", + "pretty_response(response)\n", + "\n", + "\n" + ], "metadata": { - "id": "f7861ad8" + "id": "Df7hwcIjYwMT", + "outputId": "a5569fa1-163a-45a0-d9d6-bc779feb59db", + "colab": { + "base_uri": "https://localhost:8080/" + } }, - "source": [ - "### `range` query\n", - "\n", - "Returns documents that contain terms within a provided range.\n", - "Let's say we want to find all books published between January 1st 2019 and January 1st 2020." + "id": "Df7hwcIjYwMT", + "execution_count": 11, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + ":11: DeprecationWarning: The 'body' parameter is deprecated and will be removed in a future version. Instead use individual parameters.\n", + " response = client.search(index=\"book_index\", body={\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n", + "ID: fGsDIIkB6SgI-NN4Uquf\n", + "Publication date: 2008-08-11\n", + "Title: Clean Code: A Handbook of Agile Software Craftsmanship\n", + "Summary: A guide to writing code that is easy to read, understand and maintain\n", + "Score: 0.6285683\n", + "\n", + "ID: eWsDIIkB6SgI-NN4Uquf\n", + "Publication date: 2019-10-29\n", + "Title: The Pragmatic Programmer: Your Journey to Mastery\n", + "Summary: A guide to pragmatic programming for software engineers and developers\n", + "Score: 0.62295747\n", + "\n", + "ID: gGsDIIkB6SgI-NN4Uquf\n", + "Publication date: 2011-05-13\n", + "Title: The Clean Coder: A Code of Conduct for Professional Programmers\n", + "Summary: A guide to professional conduct in the field of software engineering\n", + "Score: 0.5932041\n", + "\n", + "ID: f2sDIIkB6SgI-NN4Uquf\n", + "Publication date: 1994-10-31\n", + "Title: Design Patterns: Elements of Reusable Object-Oriented Software\n", + "Summary: Guide to design patterns that can be used in any object-oriented language\n", + "Score: 0.5909667\n", + "\n", + "ID: gmsDIIkB6SgI-NN4Uquf\n", + "Publication date: 2012-06-27\n", + "Title: Introduction to the Theory of Computation\n", + "Summary: Introduction to the theory of computation and complexity theory\n", + "Score: 0.5843217\n", + "\n", + "ID: gWsDIIkB6SgI-NN4Uquf\n", + "Publication date: 2008-05-15\n", + "Title: JavaScript: The Good Parts\n", + "Summary: A deep dive into the parts of JavaScript that are essential to writing maintainable code\n", + "Score: 0.5775348\n", + "\n", + "ID: e2sDIIkB6SgI-NN4Uquf\n", + "Publication date: 2020-04-06\n", + "Title: Artificial Intelligence: A Modern Approach\n", + "Summary: Comprehensive introduction to the theory and practice of artificial intelligence\n", + "Score: 0.5705365\n", + "\n", + "ID: emsDIIkB6SgI-NN4Uquf\n", + "Publication date: 2019-05-03\n", + "Title: Python Crash Course\n", + "Summary: A fast-paced, no-nonsense guide to programming in Python\n", + "Score: 0.55375147\n", + "\n", + "ID: fmsDIIkB6SgI-NN4Uquf\n", + "Publication date: 2018-12-04\n", + "Title: Eloquent JavaScript\n", + "Summary: A modern introduction to programming\n", + "Score: 0.531436\n", + "\n", + "ID: fWsDIIkB6SgI-NN4Uquf\n", + "Publication date: 2015-03-27\n", + "Title: You Don't Know JS: Up & Going\n", + "Summary: Introduction to JavaScript and programming as a whole\n", + "Score: 0.52609706\n" + ] + } ] }, { "cell_type": "code", - "execution_count": null, - "id": "851561c1", + "source": [ + "client.indices.delete(index=\"book_index\")" + ], "metadata": { - "id": "851561c1" + "id": "pWYkmofeaSk8", + "outputId": "c8aa61e0-c33f-4a9b-e05d-09d2d5e75ef3", + "colab": { + "base_uri": "https://localhost:8080/" + } }, - "outputs": [], - "source": [ - "query = {\n", - " \"range\": {\n", - " \"publish_date\": {\n", - " \"gte\": \"2019-01-01\", # gte = greater than or equal to\n", - " \"lte\": \"2020-01-01\", # lte = less than or equal to\n", - " }\n", - " }\n", - " }\n", - "\n", - "response = client.search(query=query, index=\"book_index\")\n", - "pretty_response(response)" + "id": "pWYkmofeaSk8", + "execution_count": 13, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "ObjectApiResponse({'acknowledged': True})" + ] + }, + "metadata": {}, + "execution_count": 13 + } ] } ], @@ -540,8 +985,4800 @@ "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "8a47d05dd32e452b89c1c62f15cb57a3": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_34d9035bbca449fba7a64a7353ce9200", + "IPY_MODEL_230c71ac97db42aeb048c392154c7ad8", + "IPY_MODEL_327fdab26f534c918c41f9b6eb8da61e" + ], + "layout": "IPY_MODEL_6a7bbaad9509474b8b77bfcd216c2e4a" + } + }, + "34d9035bbca449fba7a64a7353ce9200": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_cca1ec7ef9174e37a5e17b2a77b39aad", + "placeholder": "​", + "style": "IPY_MODEL_e5616536b1824a388fa9e1ed826307f9", + "value": "Downloading (…)e9125/.gitattributes: 100%" + } + }, + "230c71ac97db42aeb048c392154c7ad8": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_254086255be24f2b8e0ccfcfb072f379", + "max": 1175, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_d7b8af67a3a8454c97d9a68caff040c7", + "value": 1175 + } + }, + "327fdab26f534c918c41f9b6eb8da61e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_22d2c55179744b4f8d57c15aa60bd0ef", + "placeholder": "​", + "style": "IPY_MODEL_face41511e7c4c748e123d176b219f13", + "value": " 1.18k/1.18k [00:00<00:00, 54.2kB/s]" + } + }, + "6a7bbaad9509474b8b77bfcd216c2e4a": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "cca1ec7ef9174e37a5e17b2a77b39aad": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "e5616536b1824a388fa9e1ed826307f9": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "254086255be24f2b8e0ccfcfb072f379": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "d7b8af67a3a8454c97d9a68caff040c7": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "22d2c55179744b4f8d57c15aa60bd0ef": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "face41511e7c4c748e123d176b219f13": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "b27dfc2df0f94537b977baa0211165f7": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_27a5814b873b463d8ec884fce5b3002b", + "IPY_MODEL_d7e199cec93c42308436a13f0101798d", + "IPY_MODEL_51023e9b26e348278fb5788c4b1d3cf1" + ], + "layout": "IPY_MODEL_e37dae5f929042eea3b021d81c7e89f9" + } + }, + "27a5814b873b463d8ec884fce5b3002b": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_f41fc8e604524a118ea2b863a01d5201", + "placeholder": "​", + "style": "IPY_MODEL_a407443370464f68b4f503a5df4b2fd4", + "value": "Downloading (…)_Pooling/config.json: 100%" + } + }, + "d7e199cec93c42308436a13f0101798d": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_a09c50472f244d15875780d53e4cb680", + "max": 190, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_e1623d2abb5244708dc2d5ae9bce9e6f", + "value": 190 + } + }, + "51023e9b26e348278fb5788c4b1d3cf1": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_9bac318fe82342c4a9a489034d54b85c", + "placeholder": "​", + "style": "IPY_MODEL_6aa05f91d1a24d8fbe6bd0083649c9d9", + "value": " 190/190 [00:00<00:00, 10.5kB/s]" + } + }, + "e37dae5f929042eea3b021d81c7e89f9": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "f41fc8e604524a118ea2b863a01d5201": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "a407443370464f68b4f503a5df4b2fd4": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "a09c50472f244d15875780d53e4cb680": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "e1623d2abb5244708dc2d5ae9bce9e6f": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "9bac318fe82342c4a9a489034d54b85c": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "6aa05f91d1a24d8fbe6bd0083649c9d9": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "70e02ff9e19b4a9f991e0777b42b22c4": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_84727c7020574c809ca155efa0996217", + "IPY_MODEL_0e0a2c012cdf402a8c7275d42fb47d16", + "IPY_MODEL_fe235613377d4443815ad548d7b19a2c" + ], + "layout": "IPY_MODEL_30ad2a5963c04d188b64448513352576" + } + }, + "84727c7020574c809ca155efa0996217": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_0229a8c31ca04b5aa8f36f049b6333c0", + "placeholder": "​", + "style": "IPY_MODEL_775697558ad74bd0b01b8728617395df", + "value": "Downloading (…)7e55de9125/README.md: 100%" + } + }, + "0e0a2c012cdf402a8c7275d42fb47d16": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_fd06e080368345679c280ae286dcf118", + "max": 10610, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_c385ab3c1ffb4a9391feaf8017ac2124", + "value": 10610 + } + }, + "fe235613377d4443815ad548d7b19a2c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_2a76d9c1c37a45ab8f4d7da6541c11c3", + "placeholder": "​", + "style": "IPY_MODEL_3897a135ce4d413f95305560e9d5f51d", + "value": " 10.6k/10.6k [00:00<00:00, 446kB/s]" + } + }, + "30ad2a5963c04d188b64448513352576": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "0229a8c31ca04b5aa8f36f049b6333c0": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "775697558ad74bd0b01b8728617395df": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "fd06e080368345679c280ae286dcf118": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "c385ab3c1ffb4a9391feaf8017ac2124": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "2a76d9c1c37a45ab8f4d7da6541c11c3": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "3897a135ce4d413f95305560e9d5f51d": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "e41fb5a2fbcf485899df455800c29f66": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_637b076d35c247348d61e924f86c7509", + "IPY_MODEL_fb5eba47d398448a991aaca68e4c33f0", + "IPY_MODEL_5a549312926048079d979df5f31f6668" + ], + "layout": "IPY_MODEL_c7f8a26c31154c8f9bf65dddb4095c04" + } + }, + "637b076d35c247348d61e924f86c7509": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_b999f7e8580d47bf956607b2dbf59604", + "placeholder": "​", + "style": "IPY_MODEL_835c285bb5bf41999c6e845d485ab845", + "value": "Downloading (…)55de9125/config.json: 100%" + } + }, + "fb5eba47d398448a991aaca68e4c33f0": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_2083d409adf345e3b6209bf9b4e8d5df", + "max": 612, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_4d1b0ec8de0f466eba688a33a95b6dc8", + "value": 612 + } + }, + "5a549312926048079d979df5f31f6668": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_8ae23bf62ca44f3e9953ec0b89705a42", + "placeholder": "​", + "style": "IPY_MODEL_6c57b2b268a44ad09c188941cc1fa47a", + "value": " 612/612 [00:00<00:00, 34.3kB/s]" + } + }, + "c7f8a26c31154c8f9bf65dddb4095c04": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "b999f7e8580d47bf956607b2dbf59604": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "835c285bb5bf41999c6e845d485ab845": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "2083d409adf345e3b6209bf9b4e8d5df": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "4d1b0ec8de0f466eba688a33a95b6dc8": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "8ae23bf62ca44f3e9953ec0b89705a42": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "6c57b2b268a44ad09c188941cc1fa47a": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "0a566ba9f07d4a56a69dcfc467c8cfa8": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_c825476eb2a24cc9b8707d23bd0589ed", + "IPY_MODEL_d290678ad04e431ba4923c0dba777399", + "IPY_MODEL_b105661ae91240a9ac645a531f87db35" + ], + "layout": "IPY_MODEL_44cd00539c8c487ba2b58819b96d90b8" + } + }, + "c825476eb2a24cc9b8707d23bd0589ed": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_00f331e549504656ac4d8f1f07e95768", + "placeholder": "​", + "style": "IPY_MODEL_e6864110fe5b4caf8e1b869162a6c45a", + "value": "Downloading (…)ce_transformers.json: 100%" + } + }, + "d290678ad04e431ba4923c0dba777399": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_cafef97c29564483b15c21b647e2e0d1", + "max": 116, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_6a664e104c2f462f838738ea966a0115", + "value": 116 + } + }, + "b105661ae91240a9ac645a531f87db35": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_c987b5a057dc475bb7cb702afe198f74", + "placeholder": "​", + "style": "IPY_MODEL_b571138b82b146f6add4079c7c4a3fea", + "value": " 116/116 [00:00<00:00, 4.55kB/s]" + } + }, + "44cd00539c8c487ba2b58819b96d90b8": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "00f331e549504656ac4d8f1f07e95768": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "e6864110fe5b4caf8e1b869162a6c45a": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "cafef97c29564483b15c21b647e2e0d1": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "6a664e104c2f462f838738ea966a0115": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "c987b5a057dc475bb7cb702afe198f74": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "b571138b82b146f6add4079c7c4a3fea": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "2097ee5afa034d5fa208191458925956": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_0c0e6363c3d44ad992727ad3031b685a", + "IPY_MODEL_85667ca054aa431eb50fa26233244e65", + "IPY_MODEL_dceed128f53f4726afe578574c19815d" + ], + "layout": "IPY_MODEL_862c408691204fb2902f588ea8a2f614" + } + }, + "0c0e6363c3d44ad992727ad3031b685a": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_57f9e08a916f4c7584d1e5c625adf509", + "placeholder": "​", + "style": "IPY_MODEL_98f9d91b220d4c86aaa4a20994e858a8", + "value": "Downloading (…)125/data_config.json: 100%" + } + }, + "85667ca054aa431eb50fa26233244e65": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_25081b6df89c4f6890d7e359905b3b8a", + "max": 39265, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_6fad6161615f45688f3c44d6179ba204", + "value": 39265 + } + }, + "dceed128f53f4726afe578574c19815d": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_7eed45dcf5004dccb734eb315415a535", + "placeholder": "​", + "style": "IPY_MODEL_a5cd7c8a692f43f7859b4df8334c3f0f", + "value": " 39.3k/39.3k [00:00<00:00, 228kB/s]" + } + }, + "862c408691204fb2902f588ea8a2f614": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "57f9e08a916f4c7584d1e5c625adf509": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "98f9d91b220d4c86aaa4a20994e858a8": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "25081b6df89c4f6890d7e359905b3b8a": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "6fad6161615f45688f3c44d6179ba204": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "7eed45dcf5004dccb734eb315415a535": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "a5cd7c8a692f43f7859b4df8334c3f0f": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "3d4d613296054330bedd8eac39978a18": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_b11f3b77d8604aed8051678c6f12b172", + "IPY_MODEL_f7f3cae2779d41b9b08d299596c6026c", + "IPY_MODEL_9db4b498a78e435db8afd06c6632a522" + ], + "layout": "IPY_MODEL_9513eacff50642e7b0579c7de57bdacf" + } + }, + "b11f3b77d8604aed8051678c6f12b172": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_6edf7e9504fe48be802d443b3a0dd3a5", + "placeholder": "​", + "style": "IPY_MODEL_22a5d61c70c545d095c54b4b066d4b87", + "value": "Downloading pytorch_model.bin: 100%" + } + }, + "f7f3cae2779d41b9b08d299596c6026c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_b8769ba773c144428df49ed117a4ba2e", + "max": 90888945, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_914892930f74452d96cd787c128c955b", + "value": 90888945 + } + }, + "9db4b498a78e435db8afd06c6632a522": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_d10b85816ca540c0a5456e4126dc99c6", + "placeholder": "​", + "style": "IPY_MODEL_c686e95be6db4030b77cfcc170aaf722", + "value": " 90.9M/90.9M [00:01<00:00, 85.9MB/s]" + } + }, + "9513eacff50642e7b0579c7de57bdacf": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "6edf7e9504fe48be802d443b3a0dd3a5": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "22a5d61c70c545d095c54b4b066d4b87": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "b8769ba773c144428df49ed117a4ba2e": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "914892930f74452d96cd787c128c955b": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "d10b85816ca540c0a5456e4126dc99c6": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "c686e95be6db4030b77cfcc170aaf722": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "09af9df668ad43b4a8bd2c10d36a2aab": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_eb1d39f0a2254be7857baa26432a5e56", + "IPY_MODEL_73cb794c915e493e9623d5f598a71029", + "IPY_MODEL_4d21a6cce39141f594d5b5d71caa5a61" + ], + "layout": "IPY_MODEL_753703fac8c54e268e0a4eb373580a0b" + } + }, + "eb1d39f0a2254be7857baa26432a5e56": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_2787caea3cbc4f029e766b35ccaa6013", + "placeholder": "​", + "style": "IPY_MODEL_ae251276d3c8482ab768130bde5b9de3", + "value": "Downloading (…)nce_bert_config.json: 100%" + } + }, + "73cb794c915e493e9623d5f598a71029": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_440fcc9ad14241dba8e724aac69803e9", + "max": 53, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_b5608a8229a34e8e8405bf0420104ae4", + "value": 53 + } + }, + "4d21a6cce39141f594d5b5d71caa5a61": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_8c4e637876534783959a3daa9254a65a", + "placeholder": "​", + "style": "IPY_MODEL_1181941459e04ce29dfaf48ae1e6dff7", + "value": " 53.0/53.0 [00:00<00:00, 2.38kB/s]" + } + }, + "753703fac8c54e268e0a4eb373580a0b": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "2787caea3cbc4f029e766b35ccaa6013": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "ae251276d3c8482ab768130bde5b9de3": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "440fcc9ad14241dba8e724aac69803e9": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "b5608a8229a34e8e8405bf0420104ae4": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "8c4e637876534783959a3daa9254a65a": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "1181941459e04ce29dfaf48ae1e6dff7": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "ee53579d441b4b2a97176d161b479cd1": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_ae834f736104436593dd069cf3bdd5a2", + "IPY_MODEL_3418257fe9114669b00bc242c620bfb3", + "IPY_MODEL_24f54307ee3b499ab7519f713ce994e1" + ], + "layout": "IPY_MODEL_e0d94012952e43549dd4d158c30c91d6" + } + }, + "ae834f736104436593dd069cf3bdd5a2": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_fa8d7e78724341ca84f0f829e33d756d", + "placeholder": "​", + "style": "IPY_MODEL_4cbf54c836d74602b94388254b5bf8eb", + "value": "Downloading (…)cial_tokens_map.json: 100%" + } + }, + "3418257fe9114669b00bc242c620bfb3": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_640450c5f17646a5b42a1ab13218db23", + "max": 112, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_8a280b8dae964aaea3be0a299e354b17", + "value": 112 + } + }, + "24f54307ee3b499ab7519f713ce994e1": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_f22a4dfe0f6e44c2a0bc37b3fe488439", + "placeholder": "​", + "style": "IPY_MODEL_5c26c4be0a0240499d427e67d89cd869", + "value": " 112/112 [00:00<00:00, 5.97kB/s]" + } + }, + "e0d94012952e43549dd4d158c30c91d6": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "fa8d7e78724341ca84f0f829e33d756d": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "4cbf54c836d74602b94388254b5bf8eb": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "640450c5f17646a5b42a1ab13218db23": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "8a280b8dae964aaea3be0a299e354b17": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "f22a4dfe0f6e44c2a0bc37b3fe488439": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "5c26c4be0a0240499d427e67d89cd869": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "1fe019c07e0c4f62adfc826f89eb2dbd": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_3b2a66e66c5c4e8a8bafd6d340a779b9", + "IPY_MODEL_6c2891e1aae043afbba8430ae097b882", + "IPY_MODEL_7743a69d92f94e8fa358b458fe8aa4a5" + ], + "layout": "IPY_MODEL_a76ea0a19ea74594a843c62aa6fb03e1" + } + }, + "3b2a66e66c5c4e8a8bafd6d340a779b9": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_8a909360e5704220987b703ba4cb3dff", + "placeholder": "​", + "style": "IPY_MODEL_cc4c4534d0b54d0187d5a7ed82a0376e", + "value": "Downloading (…)e9125/tokenizer.json: 100%" + } + }, + "6c2891e1aae043afbba8430ae097b882": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_8ff8b86348424eda9678a932fbbad132", + "max": 466247, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_d43e683a46ef4d8892635fc1da1bd7c0", + "value": 466247 + } + }, + "7743a69d92f94e8fa358b458fe8aa4a5": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_3e4fbf2956b343218d2aec4d867e1c94", + "placeholder": "​", + "style": "IPY_MODEL_4bde08a13c41477ba6c7a4697ab599a0", + "value": " 466k/466k [00:00<00:00, 905kB/s]" + } + }, + "a76ea0a19ea74594a843c62aa6fb03e1": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "8a909360e5704220987b703ba4cb3dff": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "cc4c4534d0b54d0187d5a7ed82a0376e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "8ff8b86348424eda9678a932fbbad132": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "d43e683a46ef4d8892635fc1da1bd7c0": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "3e4fbf2956b343218d2aec4d867e1c94": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "4bde08a13c41477ba6c7a4697ab599a0": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "54062dca2e8743a68da565014c44304c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_79efd9353e4f4ee690de5d38bd98c7aa", + "IPY_MODEL_28451c97f4fa4b8991186907a50738e8", + "IPY_MODEL_50d576d87fae494182632cbed50372ac" + ], + "layout": "IPY_MODEL_a57b74c1c4b6497ba8e0dafad0af4ddf" + } + }, + "79efd9353e4f4ee690de5d38bd98c7aa": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_08a1df059a4b4c26a87ccea8c5d45bbd", + "placeholder": "​", + "style": "IPY_MODEL_8a37a86d8ed44c1cb70eca68215c913c", + "value": "Downloading (…)okenizer_config.json: 100%" + } + }, + "28451c97f4fa4b8991186907a50738e8": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_c3b2c605e7e94299a5dadf771baf6f0a", + "max": 350, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_69fa56f6dfde42f883b24069f65ed745", + "value": 350 + } + }, + "50d576d87fae494182632cbed50372ac": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_bff335481e214de5b02478938672df38", + "placeholder": "​", + "style": "IPY_MODEL_4e25e350551e49a2bf50e081fcb0184a", + "value": " 350/350 [00:00<00:00, 20.4kB/s]" + } + }, + "a57b74c1c4b6497ba8e0dafad0af4ddf": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "08a1df059a4b4c26a87ccea8c5d45bbd": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "8a37a86d8ed44c1cb70eca68215c913c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "c3b2c605e7e94299a5dadf771baf6f0a": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "69fa56f6dfde42f883b24069f65ed745": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "bff335481e214de5b02478938672df38": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "4e25e350551e49a2bf50e081fcb0184a": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "ac4fdd9c828d473384662742dca85aa7": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_c4f9e573288a46d19efae3d101dbbdef", + "IPY_MODEL_9adc61965b944edc9abff9d0de7e745a", + "IPY_MODEL_483b1513ebcd4e718e69202b30163973" + ], + "layout": "IPY_MODEL_04e89773ebeb42db87c64d7b9ec7cb26" + } + }, + "c4f9e573288a46d19efae3d101dbbdef": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_3f50cc3742724ab2bf8434c341eee0bd", + "placeholder": "​", + "style": "IPY_MODEL_449d71ebd4df4ddbb755d30fdff25358", + "value": "Downloading (…)9125/train_script.py: 100%" + } + }, + "9adc61965b944edc9abff9d0de7e745a": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_530177663d7f47d39c43beb3ce7fab78", + "max": 13156, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_1ad860cd423c4fd893066e6925579eb7", + "value": 13156 + } + }, + "483b1513ebcd4e718e69202b30163973": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_7c1fc9a54be14954acd59abde2fca97f", + "placeholder": "​", + "style": "IPY_MODEL_92790cf373b14a058b31fc0d1fcc5c94", + "value": " 13.2k/13.2k [00:00<00:00, 402kB/s]" + } + }, + "04e89773ebeb42db87c64d7b9ec7cb26": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "3f50cc3742724ab2bf8434c341eee0bd": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "449d71ebd4df4ddbb755d30fdff25358": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "530177663d7f47d39c43beb3ce7fab78": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "1ad860cd423c4fd893066e6925579eb7": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "7c1fc9a54be14954acd59abde2fca97f": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "92790cf373b14a058b31fc0d1fcc5c94": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "b889cd1323134a9cb0b2eb81ceb3cde1": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_7e85d153ec15430689bc77ef6bf2b3c8", + "IPY_MODEL_e706e32cd2a642eb9618dd0dd75d7664", + "IPY_MODEL_7c252184b6c449d1bbba4807cde4672a" + ], + "layout": "IPY_MODEL_df86bc204d014d13990917c348b09769" + } + }, + "7e85d153ec15430689bc77ef6bf2b3c8": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_eb16cb52b8cb438984c4c75c3074de84", + "placeholder": "​", + "style": "IPY_MODEL_9ea920d0083f46e2add318c845b3fddf", + "value": "Downloading (…)7e55de9125/vocab.txt: 100%" + } + }, + "e706e32cd2a642eb9618dd0dd75d7664": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_cb4add21dd514bf7a3d2e51726603e36", + "max": 231508, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_ac2387223ca04438a3e4bb59cd2f7019", + "value": 231508 + } + }, + "7c252184b6c449d1bbba4807cde4672a": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_2450d6be578e4eb6960111c26c55fa43", + "placeholder": "​", + "style": "IPY_MODEL_8d97796bc9964114853ce5803584d159", + "value": " 232k/232k [00:00<00:00, 1.37MB/s]" + } + }, + "df86bc204d014d13990917c348b09769": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "eb16cb52b8cb438984c4c75c3074de84": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "9ea920d0083f46e2add318c845b3fddf": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "cb4add21dd514bf7a3d2e51726603e36": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "ac2387223ca04438a3e4bb59cd2f7019": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "2450d6be578e4eb6960111c26c55fa43": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "8d97796bc9964114853ce5803584d159": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "05c5fdaa4d624b44857f6b7d08426e6c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_71e2f0ea4906451f9555a638f7e713b6", + "IPY_MODEL_fdb9c374bc1e48688e439dc82103c545", + "IPY_MODEL_f14f96faaf3444cda3da447b10d57cee" + ], + "layout": "IPY_MODEL_95007200cc10458882a9e81d055978b9" + } + }, + "71e2f0ea4906451f9555a638f7e713b6": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_530c9177c7024e98a24d8db80651ab01", + "placeholder": "​", + "style": "IPY_MODEL_a5abdec4f10f4b5385f01722480139e3", + "value": "Downloading (…)5de9125/modules.json: 100%" + } + }, + "fdb9c374bc1e48688e439dc82103c545": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_6f25e1c5d19e474aaa3f4d5806a53445", + "max": 349, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_4fd8f1d3cb944c2e8a0a806e8c89a26f", + "value": 349 + } + }, + "f14f96faaf3444cda3da447b10d57cee": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_143c8f9e754240e6a5a8386da714cb1b", + "placeholder": "​", + "style": "IPY_MODEL_3d3d1beb5ed346209eac8f4b38325ae1", + "value": " 349/349 [00:00<00:00, 14.8kB/s]" + } + }, + "95007200cc10458882a9e81d055978b9": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "530c9177c7024e98a24d8db80651ab01": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "a5abdec4f10f4b5385f01722480139e3": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "6f25e1c5d19e474aaa3f4d5806a53445": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "4fd8f1d3cb944c2e8a0a806e8c89a26f": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "143c8f9e754240e6a5a8386da714cb1b": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "3d3d1beb5ed346209eac8f4b38325ae1": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + } + } } }, "nbformat": 4, "nbformat_minor": 5 -} +} \ No newline at end of file From 69d1a644b295a8bb63e8d730ba78fb2f02569a09 Mon Sep 17 00:00:00 2001 From: Joseph McElroy Date: Tue, 4 Jul 2023 11:29:12 +0100 Subject: [PATCH 06/54] updates --- .../search/00-quick-start.ipynb | 16 ++++++++++++---- ...ipynb => 01-keyword-querying-filtering.ipynb} | 0 ...ing.ipynb => 02-hybrid-search-with-rrf.ipynb} | 0 ...brid-search-with-rrf.ipynb => 03-ELSER.ipynb} | 0 colab-notebooks-examples/search/04-ELSER.ipynb | 0 .../search/05-aggregations.ipynb | 0 6 files changed, 12 insertions(+), 4 deletions(-) rename colab-notebooks-examples/search/{01-vector-search.ipynb => 01-keyword-querying-filtering.ipynb} (100%) rename colab-notebooks-examples/search/{02-keyword-querying-filtering.ipynb => 02-hybrid-search-with-rrf.ipynb} (100%) rename colab-notebooks-examples/search/{03-hybrid-search-with-rrf.ipynb => 03-ELSER.ipynb} (100%) delete mode 100644 colab-notebooks-examples/search/04-ELSER.ipynb delete mode 100644 colab-notebooks-examples/search/05-aggregations.ipynb diff --git a/colab-notebooks-examples/search/00-quick-start.ipynb b/colab-notebooks-examples/search/00-quick-start.ipynb index ab9dc49f..5a153ee4 100644 --- a/colab-notebooks-examples/search/00-quick-start.ipynb +++ b/colab-notebooks-examples/search/00-quick-start.ipynb @@ -49,15 +49,23 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "ffc5fa6f", "metadata": { "id": "ffc5fa6f", "scrolled": false }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "zsh:1: command not found: pip\n" + ] + } + ], "source": [ - "!pip install elasticsearch" + "!pip install -qU elasticsearch sentence-transformers==2.2.2" ] }, { @@ -539,7 +547,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.16" + "version": "3.9.6" } }, "nbformat": 4, diff --git a/colab-notebooks-examples/search/01-vector-search.ipynb b/colab-notebooks-examples/search/01-keyword-querying-filtering.ipynb similarity index 100% rename from colab-notebooks-examples/search/01-vector-search.ipynb rename to colab-notebooks-examples/search/01-keyword-querying-filtering.ipynb diff --git a/colab-notebooks-examples/search/02-keyword-querying-filtering.ipynb b/colab-notebooks-examples/search/02-hybrid-search-with-rrf.ipynb similarity index 100% rename from colab-notebooks-examples/search/02-keyword-querying-filtering.ipynb rename to colab-notebooks-examples/search/02-hybrid-search-with-rrf.ipynb diff --git a/colab-notebooks-examples/search/03-hybrid-search-with-rrf.ipynb b/colab-notebooks-examples/search/03-ELSER.ipynb similarity index 100% rename from colab-notebooks-examples/search/03-hybrid-search-with-rrf.ipynb rename to colab-notebooks-examples/search/03-ELSER.ipynb diff --git a/colab-notebooks-examples/search/04-ELSER.ipynb b/colab-notebooks-examples/search/04-ELSER.ipynb deleted file mode 100644 index e69de29b..00000000 diff --git a/colab-notebooks-examples/search/05-aggregations.ipynb b/colab-notebooks-examples/search/05-aggregations.ipynb deleted file mode 100644 index e69de29b..00000000 From 38fe6ef94190675d2716676614e09a79020c41c4 Mon Sep 17 00:00:00 2001 From: Joseph McElroy Date: Tue, 4 Jul 2023 11:47:55 +0100 Subject: [PATCH 07/54] clean up --- .../search/00-quick-start.ipynb | 3090 ++++++++--------- 1 file changed, 1421 insertions(+), 1669 deletions(-) diff --git a/colab-notebooks-examples/search/00-quick-start.ipynb b/colab-notebooks-examples/search/00-quick-start.ipynb index 632174d8..03b14278 100644 --- a/colab-notebooks-examples/search/00-quick-start.ipynb +++ b/colab-notebooks-examples/search/00-quick-start.ipynb @@ -1,6 +1,7 @@ { "cells": [ { + "attachments": {}, "cell_type": "markdown", "id": "87773ce7", "metadata": { @@ -16,6 +17,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "a32202e2", "metadata": { @@ -31,6 +33,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "52a6a607", "metadata": { @@ -50,17 +53,17 @@ "execution_count": 1, "id": "ffc5fa6f", "metadata": { - "id": "ffc5fa6f", - "scrolled": false, - "outputId": "d9693f20-1482-4a4d-b74d-a876885f92fa", "colab": { "base_uri": "https://localhost:8080/" - } + }, + "id": "ffc5fa6f", + "outputId": "d9693f20-1482-4a4d-b74d-a876885f92fa", + "scrolled": false }, "outputs": [ { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m393.8/393.8 kB\u001b[0m \u001b[31m7.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.0/86.0 kB\u001b[0m \u001b[31m8.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", @@ -80,6 +83,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "d9cb4609", "metadata": { @@ -98,34 +102,26 @@ }, "outputs": [], "source": [ - "from elasticsearch import Elasticsearch, helpers\n", + "from elasticsearch import Elasticsearch\n", "from urllib.request import urlopen" ] }, { + "attachments": {}, "cell_type": "markdown", - "source": [ - "and add the sentence transformer" - ], + "id": "28AH8LhI-0UD", "metadata": { "id": "28AH8LhI-0UD" }, - "id": "28AH8LhI-0UD" + "source": [ + "and add the sentence transformer" + ] }, { "cell_type": "code", - "source": [ - "from sentence_transformers import SentenceTransformer\n", - "import torch\n", - "\n", - "device = 'cuda' if torch.cuda.is_available() else 'cpu'\n", - "\n", - "model = SentenceTransformer('all-MiniLM-L6-v2', device=device)\n", - "model" - ], + "execution_count": null, + "id": "WHC3hHGW-wbI", "metadata": { - "id": "WHC3hHGW-wbI", - "outputId": "e1afa0b3-6f39-47cd-da21-c6adadc7b7de", "colab": { "base_uri": "https://localhost:8080/", "height": 552, @@ -285,224 +281,23 @@ "143c8f9e754240e6a5a8386da714cb1b", "3d3d1beb5ed346209eac8f4b38325ae1" ] - } - }, - "id": "WHC3hHGW-wbI", - "execution_count": 3, - "outputs": [ - { - "output_type": "display_data", - "data": { - "text/plain": [ - "Downloading (…)e9125/.gitattributes: 0%| | 0.00/1.18k [00:00\",\n", + " basic_auth=(\"elastic\", \"\")\n", ")" ] }, { + "attachments": {}, "cell_type": "markdown", "id": "fcd165fa", "metadata": { @@ -541,6 +337,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "1462ebd8", "metadata": { @@ -552,29 +349,22 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "25c618eb", "metadata": { - "id": "25c618eb", - "outputId": "f3ea04dc-a30a-4ea1-a393-addb35d29fac", "colab": { "base_uri": "https://localhost:8080/" - } + }, + "id": "25c618eb", + "outputId": "f3ea04dc-a30a-4ea1-a393-addb35d29fac" }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "{'name': 'instance-0000000000', 'cluster_name': '1a56ad21587c44d3930932eb9fa1d8e8', 'cluster_uuid': 'gX4zlwtlR4qhZpp1SPm4Yg', 'version': {'number': '8.8.1', 'build_flavor': 'default', 'build_type': 'docker', 'build_hash': 'f8edfccba429b6477927a7c1ce1bc6729521305e', 'build_date': '2023-06-05T21:32:25.188464208Z', 'build_snapshot': False, 'lucene_version': '9.6.0', 'minimum_wire_compatibility_version': '7.17.0', 'minimum_index_compatibility_version': '7.0.0'}, 'tagline': 'You Know, for Search'}\n" - ] - } - ], + "outputs": [], "source": [ "print(client.info())" ] }, { + "attachments": {}, "cell_type": "markdown", "id": "61e1e6d8", "metadata": { @@ -600,35 +390,16 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "id": "6bc95238", "metadata": { - "id": "6bc95238", - "outputId": "2b6d0360-43e8-4cbe-fada-80bf475510ed", "colab": { "base_uri": "https://localhost:8080/" - } - }, - "outputs": [ - { - "output_type": "stream", - "name": "stderr", - "text": [ - ":17: DeprecationWarning: The 'body' parameter is deprecated and will be removed in a future version. Instead use individual parameters.\n", - " client.indices.create(index='book_index', body=mapping)\n" - ] }, - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'book_index'})" - ] - }, - "metadata": {}, - "execution_count": 7 - } - ], + "id": "6bc95238", + "outputId": "2b6d0360-43e8-4cbe-fada-80bf475510ed" + }, + "outputs": [], "source": [ "# Define the mapping\n", "mapping = {\n", @@ -650,6 +421,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "075f5eb6", "metadata": { @@ -665,27 +437,16 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "id": "008d723e", "metadata": { - "id": "008d723e", - "outputId": "430bbad4-404d-4f5d-dc23-d4e462b931fa", "colab": { "base_uri": "https://localhost:8080/" - } + }, + "id": "008d723e", + "outputId": "430bbad4-404d-4f5d-dc23-d4e462b931fa" }, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "ObjectApiResponse({'took': 47, 'errors': False, 'items': [{'index': {'_index': 'book_index', '_id': 'eWsDIIkB6SgI-NN4Uquf', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 0, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'book_index', '_id': 'emsDIIkB6SgI-NN4Uquf', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 1, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'book_index', '_id': 'e2sDIIkB6SgI-NN4Uquf', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 2, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'book_index', '_id': 'fGsDIIkB6SgI-NN4Uquf', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 3, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'book_index', '_id': 'fWsDIIkB6SgI-NN4Uquf', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 4, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'book_index', '_id': 'fmsDIIkB6SgI-NN4Uquf', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 5, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'book_index', '_id': 'f2sDIIkB6SgI-NN4Uquf', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 6, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'book_index', '_id': 'gGsDIIkB6SgI-NN4Uquf', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 7, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'book_index', '_id': 'gWsDIIkB6SgI-NN4Uquf', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 8, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'book_index', '_id': 'gmsDIIkB6SgI-NN4Uquf', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 9, '_primary_term': 1, 'status': 201}}]})" - ] - }, - "metadata": {}, - "execution_count": 8 - } - ], + "outputs": [], "source": [ "books = [\n", " {\n", @@ -781,6 +542,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "cd8b03e0", "metadata": { @@ -814,6 +576,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "39bdefe0", "metadata": { @@ -827,51 +590,27 @@ }, { "cell_type": "code", - "source": [ - "def pretty_response(response):\n", - " for hit in response['hits']['hits']:\n", - " id = hit['_id']\n", - " publication_date = hit['_source']['publish_date']\n", - " score = hit['_score']\n", - " title = hit['_source']['title']\n", - " summary = hit['_source']['summary']\n", - " pretty_output = (f\"\\nID: {id}\\nPublication date: {publication_date}\\nTitle: {title}\\nSummary: {summary}\\nScore: {score}\")\n", - " print(pretty_output)\n", - "\n", - "response = client.search(index=\"book_index\", body={\n", - " \"knn\": {\n", - " \"field\": \"title_vector\",\n", - " \"query_vector\": model.encode(\"Best Project management books?\"),\n", - " \"k\": 10,\n", - " \"num_candidates\": 100\n", - " }\n", - "})\n", - "\n", - "pretty_response(response)\n", - "\n", - "\n" - ], + "execution_count": 11, + "id": "Df7hwcIjYwMT", "metadata": { - "id": "Df7hwcIjYwMT", - "outputId": "a5569fa1-163a-45a0-d9d6-bc779feb59db", "colab": { "base_uri": "https://localhost:8080/" - } + }, + "id": "Df7hwcIjYwMT", + "outputId": "a5569fa1-163a-45a0-d9d6-bc779feb59db" }, - "id": "Df7hwcIjYwMT", - "execution_count": 11, "outputs": [ { - "output_type": "stream", "name": "stderr", + "output_type": "stream", "text": [ ":11: DeprecationWarning: The 'body' parameter is deprecated and will be removed in a future version. Instead use individual parameters.\n", " response = client.search(index=\"book_index\", body={\n" ] }, { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "\n", "ID: fGsDIIkB6SgI-NN4Uquf\n", @@ -935,33 +674,46 @@ "Score: 0.52609706\n" ] } + ], + "source": [ + "def pretty_response(response):\n", + " for hit in response['hits']['hits']:\n", + " id = hit['_id']\n", + " publication_date = hit['_source']['publish_date']\n", + " score = hit['_score']\n", + " title = hit['_source']['title']\n", + " summary = hit['_source']['summary']\n", + " pretty_output = (f\"\\nID: {id}\\nPublication date: {publication_date}\\nTitle: {title}\\nSummary: {summary}\\nScore: {score}\")\n", + " print(pretty_output)\n", + "\n", + "response = client.search(index=\"book_index\", body={\n", + " \"knn\": {\n", + " \"field\": \"title_vector\",\n", + " \"query_vector\": model.encode(\"Best Project management books?\"),\n", + " \"k\": 10,\n", + " \"num_candidates\": 100\n", + " }\n", + "})\n", + "\n", + "pretty_response(response)\n", + "\n", + "\n" ] }, { "cell_type": "code", - "source": [ - "client.indices.delete(index=\"book_index\")" - ], + "execution_count": null, + "id": "pWYkmofeaSk8", "metadata": { - "id": "pWYkmofeaSk8", - "outputId": "c8aa61e0-c33f-4a9b-e05d-09d2d5e75ef3", "colab": { "base_uri": "https://localhost:8080/" - } + }, + "id": "pWYkmofeaSk8", + "outputId": "c8aa61e0-c33f-4a9b-e05d-09d2d5e75ef3" }, - "id": "pWYkmofeaSk8", - "execution_count": 13, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "ObjectApiResponse({'acknowledged': True})" - ] - }, - "metadata": {}, - "execution_count": 13 - } + "outputs": [], + "source": [ + "client.indices.delete(index=\"book_index\")" ] } ], @@ -988,102 +740,14 @@ }, "widgets": { "application/vnd.jupyter.widget-state+json": { - "8a47d05dd32e452b89c1c62f15cb57a3": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_34d9035bbca449fba7a64a7353ce9200", - "IPY_MODEL_230c71ac97db42aeb048c392154c7ad8", - "IPY_MODEL_327fdab26f534c918c41f9b6eb8da61e" - ], - "layout": "IPY_MODEL_6a7bbaad9509474b8b77bfcd216c2e4a" - } - }, - "34d9035bbca449fba7a64a7353ce9200": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", + "00f331e549504656ac4d8f1f07e95768": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_cca1ec7ef9174e37a5e17b2a77b39aad", - "placeholder": "​", - "style": "IPY_MODEL_e5616536b1824a388fa9e1ed826307f9", - "value": "Downloading (…)e9125/.gitattributes: 100%" - } - }, - "230c71ac97db42aeb048c392154c7ad8": { - "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_254086255be24f2b8e0ccfcfb072f379", - "max": 1175, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_d7b8af67a3a8454c97d9a68caff040c7", - "value": 1175 - } - }, - "327fdab26f534c918c41f9b6eb8da61e": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_22d2c55179744b4f8d57c15aa60bd0ef", - "placeholder": "​", - "style": "IPY_MODEL_face41511e7c4c748e123d176b219f13", - "value": " 1.18k/1.18k [00:00<00:00, 54.2kB/s]" - } - }, - "6a7bbaad9509474b8b77bfcd216c2e4a": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", @@ -1128,10 +792,10 @@ "width": null } }, - "cca1ec7ef9174e37a5e17b2a77b39aad": { + "0229a8c31ca04b5aa8f36f049b6333c0": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -1180,25 +844,10 @@ "width": null } }, - "e5616536b1824a388fa9e1ed826307f9": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "254086255be24f2b8e0ccfcfb072f379": { + "04e89773ebeb42db87c64d7b9ec7cb26": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -1247,26 +896,32 @@ "width": null } }, - "d7b8af67a3a8454c97d9a68caff040c7": { + "05c5fdaa4d624b44857f6b7d08426e6c": { "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", + "model_name": "HBoxModel", "state": { + "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", + "_model_name": "HBoxModel", "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_71e2f0ea4906451f9555a638f7e713b6", + "IPY_MODEL_fdb9c374bc1e48688e439dc82103c545", + "IPY_MODEL_f14f96faaf3444cda3da447b10d57cee" + ], + "layout": "IPY_MODEL_95007200cc10458882a9e81d055978b9" } }, - "22d2c55179744b4f8d57c15aa60bd0ef": { + "08a1df059a4b4c26a87ccea8c5d45bbd": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -1315,25 +970,32 @@ "width": null } }, - "face41511e7c4c748e123d176b219f13": { + "09af9df668ad43b4a8bd2c10d36a2aab": { "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", + "model_name": "HBoxModel", "state": { + "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", + "_model_name": "HBoxModel", "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_eb1d39f0a2254be7857baa26432a5e56", + "IPY_MODEL_73cb794c915e493e9623d5f598a71029", + "IPY_MODEL_4d21a6cce39141f594d5b5d71caa5a61" + ], + "layout": "IPY_MODEL_753703fac8c54e268e0a4eb373580a0b" } }, - "b27dfc2df0f94537b977baa0211165f7": { + "0a566ba9f07d4a56a69dcfc467c8cfa8": { "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", "model_module_version": "1.5.0", + "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", @@ -1345,17 +1007,17 @@ "_view_name": "HBoxView", "box_style": "", "children": [ - "IPY_MODEL_27a5814b873b463d8ec884fce5b3002b", - "IPY_MODEL_d7e199cec93c42308436a13f0101798d", - "IPY_MODEL_51023e9b26e348278fb5788c4b1d3cf1" + "IPY_MODEL_c825476eb2a24cc9b8707d23bd0589ed", + "IPY_MODEL_d290678ad04e431ba4923c0dba777399", + "IPY_MODEL_b105661ae91240a9ac645a531f87db35" ], - "layout": "IPY_MODEL_e37dae5f929042eea3b021d81c7e89f9" + "layout": "IPY_MODEL_44cd00539c8c487ba2b58819b96d90b8" } }, - "27a5814b873b463d8ec884fce5b3002b": { + "0c0e6363c3d44ad992727ad3031b685a": { "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", "model_module_version": "1.5.0", + "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", @@ -1367,16 +1029,16 @@ "_view_name": "HTMLView", "description": "", "description_tooltip": null, - "layout": "IPY_MODEL_f41fc8e604524a118ea2b863a01d5201", + "layout": "IPY_MODEL_57f9e08a916f4c7584d1e5c625adf509", "placeholder": "​", - "style": "IPY_MODEL_a407443370464f68b4f503a5df4b2fd4", - "value": "Downloading (…)_Pooling/config.json: 100%" + "style": "IPY_MODEL_98f9d91b220d4c86aaa4a20994e858a8", + "value": "Downloading (…)125/data_config.json: 100%" } }, - "d7e199cec93c42308436a13f0101798d": { + "0e0a2c012cdf402a8c7275d42fb47d16": { "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", @@ -1389,39 +1051,33 @@ "bar_style": "success", "description": "", "description_tooltip": null, - "layout": "IPY_MODEL_a09c50472f244d15875780d53e4cb680", - "max": 190, + "layout": "IPY_MODEL_fd06e080368345679c280ae286dcf118", + "max": 10610, "min": 0, "orientation": "horizontal", - "style": "IPY_MODEL_e1623d2abb5244708dc2d5ae9bce9e6f", - "value": 190 + "style": "IPY_MODEL_c385ab3c1ffb4a9391feaf8017ac2124", + "value": 10610 } }, - "51023e9b26e348278fb5788c4b1d3cf1": { + "1181941459e04ce29dfaf48ae1e6dff7": { "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", "state": { - "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", + "_model_name": "DescriptionStyleModel", "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_9bac318fe82342c4a9a489034d54b85c", - "placeholder": "​", - "style": "IPY_MODEL_6aa05f91d1a24d8fbe6bd0083649c9d9", - "value": " 190/190 [00:00<00:00, 10.5kB/s]" + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" } }, - "e37dae5f929042eea3b021d81c7e89f9": { + "143c8f9e754240e6a5a8386da714cb1b": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -1470,10 +1126,48 @@ "width": null } }, - "f41fc8e604524a118ea2b863a01d5201": { + "1ad860cd423c4fd893066e6925579eb7": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "1fe019c07e0c4f62adfc826f89eb2dbd": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_3b2a66e66c5c4e8a8bafd6d340a779b9", + "IPY_MODEL_6c2891e1aae043afbba8430ae097b882", + "IPY_MODEL_7743a69d92f94e8fa358b458fe8aa4a5" + ], + "layout": "IPY_MODEL_a76ea0a19ea74594a843c62aa6fb03e1" + } + }, + "2083d409adf345e3b6209bf9b4e8d5df": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -1522,10 +1216,32 @@ "width": null } }, - "a407443370464f68b4f503a5df4b2fd4": { + "2097ee5afa034d5fa208191458925956": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_0c0e6363c3d44ad992727ad3031b685a", + "IPY_MODEL_85667ca054aa431eb50fa26233244e65", + "IPY_MODEL_dceed128f53f4726afe578574c19815d" + ], + "layout": "IPY_MODEL_862c408691204fb2902f588ea8a2f614" + } + }, + "22a5d61c70c545d095c54b4b066d4b87": { "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", @@ -1537,10 +1253,10 @@ "description_width": "" } }, - "a09c50472f244d15875780d53e4cb680": { + "22d2c55179744b4f8d57c15aa60bd0ef": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -1589,26 +1305,34 @@ "width": null } }, - "e1623d2abb5244708dc2d5ae9bce9e6f": { + "230c71ac97db42aeb048c392154c7ad8": { "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", "state": { + "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", + "_model_name": "FloatProgressModel", "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_254086255be24f2b8e0ccfcfb072f379", + "max": 1175, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_d7b8af67a3a8454c97d9a68caff040c7", + "value": 1175 } }, - "9bac318fe82342c4a9a489034d54b85c": { + "2450d6be578e4eb6960111c26c55fa43": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -1657,47 +1381,10 @@ "width": null } }, - "6aa05f91d1a24d8fbe6bd0083649c9d9": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "70e02ff9e19b4a9f991e0777b42b22c4": { + "24f54307ee3b499ab7519f713ce994e1": { "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_84727c7020574c809ca155efa0996217", - "IPY_MODEL_0e0a2c012cdf402a8c7275d42fb47d16", - "IPY_MODEL_fe235613377d4443815ad548d7b19a2c" - ], - "layout": "IPY_MODEL_30ad2a5963c04d188b64448513352576" - } - }, - "84727c7020574c809ca155efa0996217": { - "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", - "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", @@ -1709,61 +1396,68 @@ "_view_name": "HTMLView", "description": "", "description_tooltip": null, - "layout": "IPY_MODEL_0229a8c31ca04b5aa8f36f049b6333c0", + "layout": "IPY_MODEL_f22a4dfe0f6e44c2a0bc37b3fe488439", "placeholder": "​", - "style": "IPY_MODEL_775697558ad74bd0b01b8728617395df", - "value": "Downloading (…)7e55de9125/README.md: 100%" - } - }, - "0e0a2c012cdf402a8c7275d42fb47d16": { - "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_fd06e080368345679c280ae286dcf118", - "max": 10610, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_c385ab3c1ffb4a9391feaf8017ac2124", - "value": 10610 + "style": "IPY_MODEL_5c26c4be0a0240499d427e67d89cd869", + "value": " 112/112 [00:00<00:00, 5.97kB/s]" } }, - "fe235613377d4443815ad548d7b19a2c": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", + "25081b6df89c4f6890d7e359905b3b8a": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_2a76d9c1c37a45ab8f4d7da6541c11c3", - "placeholder": "​", - "style": "IPY_MODEL_3897a135ce4d413f95305560e9d5f51d", - "value": " 10.6k/10.6k [00:00<00:00, 446kB/s]" + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null } }, - "30ad2a5963c04d188b64448513352576": { + "254086255be24f2b8e0ccfcfb072f379": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -1812,10 +1506,10 @@ "width": null } }, - "0229a8c31ca04b5aa8f36f049b6333c0": { + "2787caea3cbc4f029e766b35ccaa6013": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -1864,25 +1558,55 @@ "width": null } }, - "775697558ad74bd0b01b8728617395df": { + "27a5814b873b463d8ec884fce5b3002b": { "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", + "model_name": "HTMLModel", "state": { + "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", + "_model_name": "HTMLModel", "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_f41fc8e604524a118ea2b863a01d5201", + "placeholder": "​", + "style": "IPY_MODEL_a407443370464f68b4f503a5df4b2fd4", + "value": "Downloading (…)_Pooling/config.json: 100%" } }, - "fd06e080368345679c280ae286dcf118": { + "28451c97f4fa4b8991186907a50738e8": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_c3b2c605e7e94299a5dadf771baf6f0a", + "max": 350, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_69fa56f6dfde42f883b24069f65ed745", + "value": 350 + } + }, + "2a76d9c1c37a45ab8f4d7da6541c11c3": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -1931,26 +1655,10 @@ "width": null } }, - "c385ab3c1ffb4a9391feaf8017ac2124": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "2a76d9c1c37a45ab8f4d7da6541c11c3": { + "30ad2a5963c04d188b64448513352576": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -1999,47 +1707,55 @@ "width": null } }, - "3897a135ce4d413f95305560e9d5f51d": { + "327fdab26f534c918c41f9b6eb8da61e": { "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", + "model_name": "HTMLModel", "state": { + "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", + "_model_name": "HTMLModel", "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_22d2c55179744b4f8d57c15aa60bd0ef", + "placeholder": "​", + "style": "IPY_MODEL_face41511e7c4c748e123d176b219f13", + "value": " 1.18k/1.18k [00:00<00:00, 54.2kB/s]" } }, - "e41fb5a2fbcf485899df455800c29f66": { + "3418257fe9114669b00bc242c620bfb3": { "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", + "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_637b076d35c247348d61e924f86c7509", - "IPY_MODEL_fb5eba47d398448a991aaca68e4c33f0", - "IPY_MODEL_5a549312926048079d979df5f31f6668" - ], - "layout": "IPY_MODEL_c7f8a26c31154c8f9bf65dddb4095c04" + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_640450c5f17646a5b42a1ab13218db23", + "max": 112, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_8a280b8dae964aaea3be0a299e354b17", + "value": 112 } }, - "637b076d35c247348d61e924f86c7509": { + "34d9035bbca449fba7a64a7353ce9200": { "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", "model_module_version": "1.5.0", + "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", @@ -2051,40 +1767,31 @@ "_view_name": "HTMLView", "description": "", "description_tooltip": null, - "layout": "IPY_MODEL_b999f7e8580d47bf956607b2dbf59604", + "layout": "IPY_MODEL_cca1ec7ef9174e37a5e17b2a77b39aad", "placeholder": "​", - "style": "IPY_MODEL_835c285bb5bf41999c6e845d485ab845", - "value": "Downloading (…)55de9125/config.json: 100%" + "style": "IPY_MODEL_e5616536b1824a388fa9e1ed826307f9", + "value": "Downloading (…)e9125/.gitattributes: 100%" } }, - "fb5eba47d398448a991aaca68e4c33f0": { + "3897a135ce4d413f95305560e9d5f51d": { "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", "state": { - "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", + "_model_name": "DescriptionStyleModel", "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_2083d409adf345e3b6209bf9b4e8d5df", - "max": 612, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_4d1b0ec8de0f466eba688a33a95b6dc8", - "value": 612 + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" } }, - "5a549312926048079d979df5f31f6668": { + "3b2a66e66c5c4e8a8bafd6d340a779b9": { "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", "model_module_version": "1.5.0", + "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", @@ -2096,16 +1803,53 @@ "_view_name": "HTMLView", "description": "", "description_tooltip": null, - "layout": "IPY_MODEL_8ae23bf62ca44f3e9953ec0b89705a42", + "layout": "IPY_MODEL_8a909360e5704220987b703ba4cb3dff", "placeholder": "​", - "style": "IPY_MODEL_6c57b2b268a44ad09c188941cc1fa47a", - "value": " 612/612 [00:00<00:00, 34.3kB/s]" + "style": "IPY_MODEL_cc4c4534d0b54d0187d5a7ed82a0376e", + "value": "Downloading (…)e9125/tokenizer.json: 100%" } }, - "c7f8a26c31154c8f9bf65dddb4095c04": { + "3d3d1beb5ed346209eac8f4b38325ae1": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "3d4d613296054330bedd8eac39978a18": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_b11f3b77d8604aed8051678c6f12b172", + "IPY_MODEL_f7f3cae2779d41b9b08d299596c6026c", + "IPY_MODEL_9db4b498a78e435db8afd06c6632a522" + ], + "layout": "IPY_MODEL_9513eacff50642e7b0579c7de57bdacf" + } + }, + "3e4fbf2956b343218d2aec4d867e1c94": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -2154,10 +1898,10 @@ "width": null } }, - "b999f7e8580d47bf956607b2dbf59604": { + "3f50cc3742724ab2bf8434c341eee0bd": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -2206,25 +1950,10 @@ "width": null } }, - "835c285bb5bf41999c6e845d485ab845": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "2083d409adf345e3b6209bf9b4e8d5df": { + "440fcc9ad14241dba8e724aac69803e9": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -2273,26 +2002,25 @@ "width": null } }, - "4d1b0ec8de0f466eba688a33a95b6dc8": { + "449d71ebd4df4ddbb755d30fdff25358": { "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", + "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", - "bar_color": null, "description_width": "" } }, - "8ae23bf62ca44f3e9953ec0b89705a42": { + "44cd00539c8c487ba2b58819b96d90b8": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -2341,10 +2069,46 @@ "width": null } }, - "6c57b2b268a44ad09c188941cc1fa47a": { + "483b1513ebcd4e718e69202b30163973": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_7c1fc9a54be14954acd59abde2fca97f", + "placeholder": "​", + "style": "IPY_MODEL_92790cf373b14a058b31fc0d1fcc5c94", + "value": " 13.2k/13.2k [00:00<00:00, 402kB/s]" + } + }, + "4bde08a13c41477ba6c7a4697ab599a0": { "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "4cbf54c836d74602b94388254b5bf8eb": { + "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", @@ -2356,32 +2120,26 @@ "description_width": "" } }, - "0a566ba9f07d4a56a69dcfc467c8cfa8": { + "4d1b0ec8de0f466eba688a33a95b6dc8": { "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", "state": { - "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", + "_model_name": "ProgressStyleModel", "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_c825476eb2a24cc9b8707d23bd0589ed", - "IPY_MODEL_d290678ad04e431ba4923c0dba777399", - "IPY_MODEL_b105661ae91240a9ac645a531f87db35" - ], - "layout": "IPY_MODEL_44cd00539c8c487ba2b58819b96d90b8" + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" } }, - "c825476eb2a24cc9b8707d23bd0589ed": { + "4d21a6cce39141f594d5b5d71caa5a61": { "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", "model_module_version": "1.5.0", + "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", @@ -2393,40 +2151,47 @@ "_view_name": "HTMLView", "description": "", "description_tooltip": null, - "layout": "IPY_MODEL_00f331e549504656ac4d8f1f07e95768", + "layout": "IPY_MODEL_8c4e637876534783959a3daa9254a65a", "placeholder": "​", - "style": "IPY_MODEL_e6864110fe5b4caf8e1b869162a6c45a", - "value": "Downloading (…)ce_transformers.json: 100%" + "style": "IPY_MODEL_1181941459e04ce29dfaf48ae1e6dff7", + "value": " 53.0/53.0 [00:00<00:00, 2.38kB/s]" } }, - "d290678ad04e431ba4923c0dba777399": { + "4e25e350551e49a2bf50e081fcb0184a": { "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", "state": { - "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", + "_model_name": "DescriptionStyleModel", "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_cafef97c29564483b15c21b647e2e0d1", - "max": 116, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_6a664e104c2f462f838738ea966a0115", - "value": 116 + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" } }, - "b105661ae91240a9ac645a531f87db35": { + "4fd8f1d3cb944c2e8a0a806e8c89a26f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "50d576d87fae494182632cbed50372ac": { "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", "model_module_version": "1.5.0", + "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", @@ -2438,16 +2203,37 @@ "_view_name": "HTMLView", "description": "", "description_tooltip": null, - "layout": "IPY_MODEL_c987b5a057dc475bb7cb702afe198f74", + "layout": "IPY_MODEL_bff335481e214de5b02478938672df38", "placeholder": "​", - "style": "IPY_MODEL_b571138b82b146f6add4079c7c4a3fea", - "value": " 116/116 [00:00<00:00, 4.55kB/s]" + "style": "IPY_MODEL_4e25e350551e49a2bf50e081fcb0184a", + "value": " 350/350 [00:00<00:00, 20.4kB/s]" } }, - "44cd00539c8c487ba2b58819b96d90b8": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", + "51023e9b26e348278fb5788c4b1d3cf1": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_9bac318fe82342c4a9a489034d54b85c", + "placeholder": "​", + "style": "IPY_MODEL_6aa05f91d1a24d8fbe6bd0083649c9d9", + "value": " 190/190 [00:00<00:00, 10.5kB/s]" + } + }, + "530177663d7f47d39c43beb3ce7fab78": { + "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -2496,10 +2282,10 @@ "width": null } }, - "00f331e549504656ac4d8f1f07e95768": { + "530c9177c7024e98a24d8db80651ab01": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -2548,25 +2334,32 @@ "width": null } }, - "e6864110fe5b4caf8e1b869162a6c45a": { + "54062dca2e8743a68da565014c44304c": { "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", + "model_name": "HBoxModel", "state": { + "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", + "_model_name": "HBoxModel", "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_79efd9353e4f4ee690de5d38bd98c7aa", + "IPY_MODEL_28451c97f4fa4b8991186907a50738e8", + "IPY_MODEL_50d576d87fae494182632cbed50372ac" + ], + "layout": "IPY_MODEL_a57b74c1c4b6497ba8e0dafad0af4ddf" } }, - "cafef97c29564483b15c21b647e2e0d1": { + "57f9e08a916f4c7584d1e5c625adf509": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -2615,26 +2408,67 @@ "width": null } }, - "6a664e104c2f462f838738ea966a0115": { + "5a549312926048079d979df5f31f6668": { "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", + "model_name": "HTMLModel", "state": { + "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_8ae23bf62ca44f3e9953ec0b89705a42", + "placeholder": "​", + "style": "IPY_MODEL_6c57b2b268a44ad09c188941cc1fa47a", + "value": " 612/612 [00:00<00:00, 34.3kB/s]" + } + }, + "5c26c4be0a0240499d427e67d89cd869": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", - "bar_color": null, "description_width": "" } }, - "c987b5a057dc475bb7cb702afe198f74": { + "637b076d35c247348d61e924f86c7509": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_b999f7e8580d47bf956607b2dbf59604", + "placeholder": "​", + "style": "IPY_MODEL_835c285bb5bf41999c6e845d485ab845", + "value": "Downloading (…)55de9125/config.json: 100%" + } + }, + "640450c5f17646a5b42a1ab13218db23": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -2683,68 +2517,109 @@ "width": null } }, - "b571138b82b146f6add4079c7c4a3fea": { + "69fa56f6dfde42f883b24069f65ed745": { "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", + "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", + "bar_color": null, "description_width": "" } }, - "2097ee5afa034d5fa208191458925956": { + "6a664e104c2f462f838738ea966a0115": { "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", "state": { - "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", + "_model_name": "ProgressStyleModel", "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_0c0e6363c3d44ad992727ad3031b685a", - "IPY_MODEL_85667ca054aa431eb50fa26233244e65", - "IPY_MODEL_dceed128f53f4726afe578574c19815d" - ], - "layout": "IPY_MODEL_862c408691204fb2902f588ea8a2f614" + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" } }, - "0c0e6363c3d44ad992727ad3031b685a": { + "6a7bbaad9509474b8b77bfcd216c2e4a": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "6aa05f91d1a24d8fbe6bd0083649c9d9": { "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", "state": { - "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", + "_model_name": "DescriptionStyleModel", "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_57f9e08a916f4c7584d1e5c625adf509", - "placeholder": "​", - "style": "IPY_MODEL_98f9d91b220d4c86aaa4a20994e858a8", - "value": "Downloading (…)125/data_config.json: 100%" + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" } }, - "85667ca054aa431eb50fa26233244e65": { + "6c2891e1aae043afbba8430ae097b882": { "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", @@ -2757,39 +2632,33 @@ "bar_style": "success", "description": "", "description_tooltip": null, - "layout": "IPY_MODEL_25081b6df89c4f6890d7e359905b3b8a", - "max": 39265, + "layout": "IPY_MODEL_8ff8b86348424eda9678a932fbbad132", + "max": 466247, "min": 0, "orientation": "horizontal", - "style": "IPY_MODEL_6fad6161615f45688f3c44d6179ba204", - "value": 39265 + "style": "IPY_MODEL_d43e683a46ef4d8892635fc1da1bd7c0", + "value": 466247 } }, - "dceed128f53f4726afe578574c19815d": { + "6c57b2b268a44ad09c188941cc1fa47a": { "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", "state": { - "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", + "_model_name": "DescriptionStyleModel", "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_7eed45dcf5004dccb734eb315415a535", - "placeholder": "​", - "style": "IPY_MODEL_a5cd7c8a692f43f7859b4df8334c3f0f", - "value": " 39.3k/39.3k [00:00<00:00, 228kB/s]" + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" } }, - "862c408691204fb2902f588ea8a2f614": { + "6edf7e9504fe48be802d443b3a0dd3a5": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -2838,10 +2707,10 @@ "width": null } }, - "57f9e08a916f4c7584d1e5c625adf509": { + "6f25e1c5d19e474aaa3f4d5806a53445": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -2890,25 +2759,93 @@ "width": null } }, - "98f9d91b220d4c86aaa4a20994e858a8": { + "6fad6161615f45688f3c44d6179ba204": { "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", + "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", + "bar_color": null, "description_width": "" } }, - "25081b6df89c4f6890d7e359905b3b8a": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", + "70e02ff9e19b4a9f991e0777b42b22c4": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_84727c7020574c809ca155efa0996217", + "IPY_MODEL_0e0a2c012cdf402a8c7275d42fb47d16", + "IPY_MODEL_fe235613377d4443815ad548d7b19a2c" + ], + "layout": "IPY_MODEL_30ad2a5963c04d188b64448513352576" + } + }, + "71e2f0ea4906451f9555a638f7e713b6": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_530c9177c7024e98a24d8db80651ab01", + "placeholder": "​", + "style": "IPY_MODEL_a5abdec4f10f4b5385f01722480139e3", + "value": "Downloading (…)5de9125/modules.json: 100%" + } + }, + "73cb794c915e493e9623d5f598a71029": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_440fcc9ad14241dba8e724aac69803e9", + "max": 53, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_b5608a8229a34e8e8405bf0420104ae4", + "value": 53 + } + }, + "753703fac8c54e268e0a4eb373580a0b": { + "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -2957,26 +2894,67 @@ "width": null } }, - "6fad6161615f45688f3c44d6179ba204": { + "7743a69d92f94e8fa358b458fe8aa4a5": { "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", + "model_name": "HTMLModel", "state": { + "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_3e4fbf2956b343218d2aec4d867e1c94", + "placeholder": "​", + "style": "IPY_MODEL_4bde08a13c41477ba6c7a4697ab599a0", + "value": " 466k/466k [00:00<00:00, 905kB/s]" + } + }, + "775697558ad74bd0b01b8728617395df": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", - "bar_color": null, "description_width": "" } }, - "7eed45dcf5004dccb734eb315415a535": { + "79efd9353e4f4ee690de5d38bd98c7aa": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_08a1df059a4b4c26a87ccea8c5d45bbd", + "placeholder": "​", + "style": "IPY_MODEL_8a37a86d8ed44c1cb70eca68215c913c", + "value": "Downloading (…)okenizer_config.json: 100%" + } + }, + "7c1fc9a54be14954acd59abde2fca97f": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -3025,47 +3003,10 @@ "width": null } }, - "a5cd7c8a692f43f7859b4df8334c3f0f": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "3d4d613296054330bedd8eac39978a18": { + "7c252184b6c449d1bbba4807cde4672a": { "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_b11f3b77d8604aed8051678c6f12b172", - "IPY_MODEL_f7f3cae2779d41b9b08d299596c6026c", - "IPY_MODEL_9db4b498a78e435db8afd06c6632a522" - ], - "layout": "IPY_MODEL_9513eacff50642e7b0579c7de57bdacf" - } - }, - "b11f3b77d8604aed8051678c6f12b172": { - "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", - "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", @@ -3077,40 +3018,16 @@ "_view_name": "HTMLView", "description": "", "description_tooltip": null, - "layout": "IPY_MODEL_6edf7e9504fe48be802d443b3a0dd3a5", + "layout": "IPY_MODEL_2450d6be578e4eb6960111c26c55fa43", "placeholder": "​", - "style": "IPY_MODEL_22a5d61c70c545d095c54b4b066d4b87", - "value": "Downloading pytorch_model.bin: 100%" + "style": "IPY_MODEL_8d97796bc9964114853ce5803584d159", + "value": " 232k/232k [00:00<00:00, 1.37MB/s]" } }, - "f7f3cae2779d41b9b08d299596c6026c": { + "7e85d153ec15430689bc77ef6bf2b3c8": { "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_b8769ba773c144428df49ed117a4ba2e", - "max": 90888945, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_914892930f74452d96cd787c128c955b", - "value": 90888945 - } - }, - "9db4b498a78e435db8afd06c6632a522": { - "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", - "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", @@ -3122,16 +3039,16 @@ "_view_name": "HTMLView", "description": "", "description_tooltip": null, - "layout": "IPY_MODEL_d10b85816ca540c0a5456e4126dc99c6", + "layout": "IPY_MODEL_eb16cb52b8cb438984c4c75c3074de84", "placeholder": "​", - "style": "IPY_MODEL_c686e95be6db4030b77cfcc170aaf722", - "value": " 90.9M/90.9M [00:01<00:00, 85.9MB/s]" + "style": "IPY_MODEL_9ea920d0083f46e2add318c845b3fddf", + "value": "Downloading (…)7e55de9125/vocab.txt: 100%" } }, - "9513eacff50642e7b0579c7de57bdacf": { + "7eed45dcf5004dccb734eb315415a535": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -3180,10 +3097,70 @@ "width": null } }, - "6edf7e9504fe48be802d443b3a0dd3a5": { + "835c285bb5bf41999c6e845d485ab845": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "84727c7020574c809ca155efa0996217": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_0229a8c31ca04b5aa8f36f049b6333c0", + "placeholder": "​", + "style": "IPY_MODEL_775697558ad74bd0b01b8728617395df", + "value": "Downloading (…)7e55de9125/README.md: 100%" + } + }, + "85667ca054aa431eb50fa26233244e65": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_25081b6df89c4f6890d7e359905b3b8a", + "max": 39265, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_6fad6161615f45688f3c44d6179ba204", + "value": 39265 + } + }, + "862c408691204fb2902f588ea8a2f614": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -3232,10 +3209,26 @@ "width": null } }, - "22a5d61c70c545d095c54b4b066d4b87": { + "8a280b8dae964aaea3be0a299e354b17": { "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "8a37a86d8ed44c1cb70eca68215c913c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", @@ -3247,10 +3240,32 @@ "description_width": "" } }, - "b8769ba773c144428df49ed117a4ba2e": { + "8a47d05dd32e452b89c1c62f15cb57a3": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_34d9035bbca449fba7a64a7353ce9200", + "IPY_MODEL_230c71ac97db42aeb048c392154c7ad8", + "IPY_MODEL_327fdab26f534c918c41f9b6eb8da61e" + ], + "layout": "IPY_MODEL_6a7bbaad9509474b8b77bfcd216c2e4a" + } + }, + "8a909360e5704220987b703ba4cb3dff": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -3299,26 +3314,10 @@ "width": null } }, - "914892930f74452d96cd787c128c955b": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "d10b85816ca540c0a5456e4126dc99c6": { + "8ae23bf62ca44f3e9953ec0b89705a42": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -3367,113 +3366,10 @@ "width": null } }, - "c686e95be6db4030b77cfcc170aaf722": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "09af9df668ad43b4a8bd2c10d36a2aab": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_eb1d39f0a2254be7857baa26432a5e56", - "IPY_MODEL_73cb794c915e493e9623d5f598a71029", - "IPY_MODEL_4d21a6cce39141f594d5b5d71caa5a61" - ], - "layout": "IPY_MODEL_753703fac8c54e268e0a4eb373580a0b" - } - }, - "eb1d39f0a2254be7857baa26432a5e56": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_2787caea3cbc4f029e766b35ccaa6013", - "placeholder": "​", - "style": "IPY_MODEL_ae251276d3c8482ab768130bde5b9de3", - "value": "Downloading (…)nce_bert_config.json: 100%" - } - }, - "73cb794c915e493e9623d5f598a71029": { - "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_440fcc9ad14241dba8e724aac69803e9", - "max": 53, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_b5608a8229a34e8e8405bf0420104ae4", - "value": 53 - } - }, - "4d21a6cce39141f594d5b5d71caa5a61": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_8c4e637876534783959a3daa9254a65a", - "placeholder": "​", - "style": "IPY_MODEL_1181941459e04ce29dfaf48ae1e6dff7", - "value": " 53.0/53.0 [00:00<00:00, 2.38kB/s]" - } - }, - "753703fac8c54e268e0a4eb373580a0b": { + "8c4e637876534783959a3daa9254a65a": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -3522,10 +3418,25 @@ "width": null } }, - "2787caea3cbc4f029e766b35ccaa6013": { + "8d97796bc9964114853ce5803584d159": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "8ff8b86348424eda9678a932fbbad132": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -3574,10 +3485,26 @@ "width": null } }, - "ae251276d3c8482ab768130bde5b9de3": { + "914892930f74452d96cd787c128c955b": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "92790cf373b14a058b31fc0d1fcc5c94": { "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", @@ -3589,10 +3516,10 @@ "description_width": "" } }, - "440fcc9ad14241dba8e724aac69803e9": { + "95007200cc10458882a9e81d055978b9": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -3641,26 +3568,10 @@ "width": null } }, - "b5608a8229a34e8e8405bf0420104ae4": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "8c4e637876534783959a3daa9254a65a": { + "9513eacff50642e7b0579c7de57bdacf": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -3709,10 +3620,10 @@ "width": null } }, - "1181941459e04ce29dfaf48ae1e6dff7": { + "98f9d91b220d4c86aaa4a20994e858a8": { "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", @@ -3720,57 +3631,14 @@ "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "ee53579d441b4b2a97176d161b479cd1": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_ae834f736104436593dd069cf3bdd5a2", - "IPY_MODEL_3418257fe9114669b00bc242c620bfb3", - "IPY_MODEL_24f54307ee3b499ab7519f713ce994e1" - ], - "layout": "IPY_MODEL_e0d94012952e43549dd4d158c30c91d6" - } - }, - "ae834f736104436593dd069cf3bdd5a2": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_fa8d7e78724341ca84f0f829e33d756d", - "placeholder": "​", - "style": "IPY_MODEL_4cbf54c836d74602b94388254b5bf8eb", - "value": "Downloading (…)cial_tokens_map.json: 100%" + "_view_name": "StyleView", + "description_width": "" } }, - "3418257fe9114669b00bc242c620bfb3": { + "9adc61965b944edc9abff9d0de7e745a": { "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", @@ -3783,39 +3651,18 @@ "bar_style": "success", "description": "", "description_tooltip": null, - "layout": "IPY_MODEL_640450c5f17646a5b42a1ab13218db23", - "max": 112, + "layout": "IPY_MODEL_530177663d7f47d39c43beb3ce7fab78", + "max": 13156, "min": 0, "orientation": "horizontal", - "style": "IPY_MODEL_8a280b8dae964aaea3be0a299e354b17", - "value": 112 - } - }, - "24f54307ee3b499ab7519f713ce994e1": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_f22a4dfe0f6e44c2a0bc37b3fe488439", - "placeholder": "​", - "style": "IPY_MODEL_5c26c4be0a0240499d427e67d89cd869", - "value": " 112/112 [00:00<00:00, 5.97kB/s]" + "style": "IPY_MODEL_1ad860cd423c4fd893066e6925579eb7", + "value": 13156 } }, - "e0d94012952e43549dd4d158c30c91d6": { + "9bac318fe82342c4a9a489034d54b85c": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -3864,10 +3711,46 @@ "width": null } }, - "fa8d7e78724341ca84f0f829e33d756d": { + "9db4b498a78e435db8afd06c6632a522": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_d10b85816ca540c0a5456e4126dc99c6", + "placeholder": "​", + "style": "IPY_MODEL_c686e95be6db4030b77cfcc170aaf722", + "value": " 90.9M/90.9M [00:01<00:00, 85.9MB/s]" + } + }, + "9ea920d0083f46e2add318c845b3fddf": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "a09c50472f244d15875780d53e4cb680": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -3916,10 +3799,10 @@ "width": null } }, - "4cbf54c836d74602b94388254b5bf8eb": { + "a407443370464f68b4f503a5df4b2fd4": { "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", @@ -3931,10 +3814,10 @@ "description_width": "" } }, - "640450c5f17646a5b42a1ab13218db23": { + "a57b74c1c4b6497ba8e0dafad0af4ddf": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -3983,26 +3866,40 @@ "width": null } }, - "8a280b8dae964aaea3be0a299e354b17": { + "a5abdec4f10f4b5385f01722480139e3": { "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", + "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", - "bar_color": null, "description_width": "" } }, - "f22a4dfe0f6e44c2a0bc37b3fe488439": { + "a5cd7c8a692f43f7859b4df8334c3f0f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "a76ea0a19ea74594a843c62aa6fb03e1": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -4051,25 +3948,26 @@ "width": null } }, - "5c26c4be0a0240499d427e67d89cd869": { + "ac2387223ca04438a3e4bb59cd2f7019": { "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", + "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", + "bar_color": null, "description_width": "" } }, - "1fe019c07e0c4f62adfc826f89eb2dbd": { + "ac4fdd9c828d473384662742dca85aa7": { "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", "model_module_version": "1.5.0", + "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", @@ -4081,17 +3979,32 @@ "_view_name": "HBoxView", "box_style": "", "children": [ - "IPY_MODEL_3b2a66e66c5c4e8a8bafd6d340a779b9", - "IPY_MODEL_6c2891e1aae043afbba8430ae097b882", - "IPY_MODEL_7743a69d92f94e8fa358b458fe8aa4a5" + "IPY_MODEL_c4f9e573288a46d19efae3d101dbbdef", + "IPY_MODEL_9adc61965b944edc9abff9d0de7e745a", + "IPY_MODEL_483b1513ebcd4e718e69202b30163973" ], - "layout": "IPY_MODEL_a76ea0a19ea74594a843c62aa6fb03e1" + "layout": "IPY_MODEL_04e89773ebeb42db87c64d7b9ec7cb26" } }, - "3b2a66e66c5c4e8a8bafd6d340a779b9": { + "ae251276d3c8482ab768130bde5b9de3": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "ae834f736104436593dd069cf3bdd5a2": { "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", "model_module_version": "1.5.0", + "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", @@ -4103,40 +4016,37 @@ "_view_name": "HTMLView", "description": "", "description_tooltip": null, - "layout": "IPY_MODEL_8a909360e5704220987b703ba4cb3dff", + "layout": "IPY_MODEL_fa8d7e78724341ca84f0f829e33d756d", "placeholder": "​", - "style": "IPY_MODEL_cc4c4534d0b54d0187d5a7ed82a0376e", - "value": "Downloading (…)e9125/tokenizer.json: 100%" + "style": "IPY_MODEL_4cbf54c836d74602b94388254b5bf8eb", + "value": "Downloading (…)cial_tokens_map.json: 100%" } }, - "6c2891e1aae043afbba8430ae097b882": { + "b105661ae91240a9ac645a531f87db35": { "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", "model_module_version": "1.5.0", + "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", + "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", + "_view_name": "HTMLView", "description": "", "description_tooltip": null, - "layout": "IPY_MODEL_8ff8b86348424eda9678a932fbbad132", - "max": 466247, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_d43e683a46ef4d8892635fc1da1bd7c0", - "value": 466247 + "layout": "IPY_MODEL_c987b5a057dc475bb7cb702afe198f74", + "placeholder": "​", + "style": "IPY_MODEL_b571138b82b146f6add4079c7c4a3fea", + "value": " 116/116 [00:00<00:00, 4.55kB/s]" } }, - "7743a69d92f94e8fa358b458fe8aa4a5": { + "b11f3b77d8604aed8051678c6f12b172": { "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", "model_module_version": "1.5.0", + "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", @@ -4148,120 +4058,54 @@ "_view_name": "HTMLView", "description": "", "description_tooltip": null, - "layout": "IPY_MODEL_3e4fbf2956b343218d2aec4d867e1c94", + "layout": "IPY_MODEL_6edf7e9504fe48be802d443b3a0dd3a5", "placeholder": "​", - "style": "IPY_MODEL_4bde08a13c41477ba6c7a4697ab599a0", - "value": " 466k/466k [00:00<00:00, 905kB/s]" + "style": "IPY_MODEL_22a5d61c70c545d095c54b4b066d4b87", + "value": "Downloading pytorch_model.bin: 100%" } }, - "a76ea0a19ea74594a843c62aa6fb03e1": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", + "b27dfc2df0f94537b977baa0211165f7": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_27a5814b873b463d8ec884fce5b3002b", + "IPY_MODEL_d7e199cec93c42308436a13f0101798d", + "IPY_MODEL_51023e9b26e348278fb5788c4b1d3cf1" + ], + "layout": "IPY_MODEL_e37dae5f929042eea3b021d81c7e89f9" } }, - "8a909360e5704220987b703ba4cb3dff": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", + "b5608a8229a34e8e8405bf0420104ae4": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" } }, - "cc4c4534d0b54d0187d5a7ed82a0376e": { + "b571138b82b146f6add4079c7c4a3fea": { "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", @@ -4273,10 +4117,10 @@ "description_width": "" } }, - "8ff8b86348424eda9678a932fbbad132": { + "b8769ba773c144428df49ed117a4ba2e": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -4325,26 +4169,32 @@ "width": null } }, - "d43e683a46ef4d8892635fc1da1bd7c0": { + "b889cd1323134a9cb0b2eb81ceb3cde1": { "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", + "model_name": "HBoxModel", "state": { + "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", + "_model_name": "HBoxModel", "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_7e85d153ec15430689bc77ef6bf2b3c8", + "IPY_MODEL_e706e32cd2a642eb9618dd0dd75d7664", + "IPY_MODEL_7c252184b6c449d1bbba4807cde4672a" + ], + "layout": "IPY_MODEL_df86bc204d014d13990917c348b09769" } }, - "3e4fbf2956b343218d2aec4d867e1c94": { + "b999f7e8580d47bf956607b2dbf59604": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -4393,113 +4243,10 @@ "width": null } }, - "4bde08a13c41477ba6c7a4697ab599a0": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "54062dca2e8743a68da565014c44304c": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_79efd9353e4f4ee690de5d38bd98c7aa", - "IPY_MODEL_28451c97f4fa4b8991186907a50738e8", - "IPY_MODEL_50d576d87fae494182632cbed50372ac" - ], - "layout": "IPY_MODEL_a57b74c1c4b6497ba8e0dafad0af4ddf" - } - }, - "79efd9353e4f4ee690de5d38bd98c7aa": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_08a1df059a4b4c26a87ccea8c5d45bbd", - "placeholder": "​", - "style": "IPY_MODEL_8a37a86d8ed44c1cb70eca68215c913c", - "value": "Downloading (…)okenizer_config.json: 100%" - } - }, - "28451c97f4fa4b8991186907a50738e8": { - "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_c3b2c605e7e94299a5dadf771baf6f0a", - "max": 350, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_69fa56f6dfde42f883b24069f65ed745", - "value": 350 - } - }, - "50d576d87fae494182632cbed50372ac": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_bff335481e214de5b02478938672df38", - "placeholder": "​", - "style": "IPY_MODEL_4e25e350551e49a2bf50e081fcb0184a", - "value": " 350/350 [00:00<00:00, 20.4kB/s]" - } - }, - "a57b74c1c4b6497ba8e0dafad0af4ddf": { + "bff335481e214de5b02478938672df38": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -4548,10 +4295,26 @@ "width": null } }, - "08a1df059a4b4c26a87ccea8c5d45bbd": { + "c385ab3c1ffb4a9391feaf8017ac2124": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "c3b2c605e7e94299a5dadf771baf6f0a": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -4600,10 +4363,31 @@ "width": null } }, - "8a37a86d8ed44c1cb70eca68215c913c": { + "c4f9e573288a46d19efae3d101dbbdef": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_3f50cc3742724ab2bf8434c341eee0bd", + "placeholder": "​", + "style": "IPY_MODEL_449d71ebd4df4ddbb755d30fdff25358", + "value": "Downloading (…)9125/train_script.py: 100%" + } + }, + "c686e95be6db4030b77cfcc170aaf722": { "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", @@ -4615,10 +4399,10 @@ "description_width": "" } }, - "c3b2c605e7e94299a5dadf771baf6f0a": { + "c7f8a26c31154c8f9bf65dddb4095c04": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -4667,26 +4451,31 @@ "width": null } }, - "69fa56f6dfde42f883b24069f65ed745": { + "c825476eb2a24cc9b8707d23bd0589ed": { "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", + "model_name": "HTMLModel", "state": { + "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", + "_model_name": "HTMLModel", "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_00f331e549504656ac4d8f1f07e95768", + "placeholder": "​", + "style": "IPY_MODEL_e6864110fe5b4caf8e1b869162a6c45a", + "value": "Downloading (…)ce_transformers.json: 100%" } }, - "bff335481e214de5b02478938672df38": { + "c987b5a057dc475bb7cb702afe198f74": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -4735,113 +4524,10 @@ "width": null } }, - "4e25e350551e49a2bf50e081fcb0184a": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "ac4fdd9c828d473384662742dca85aa7": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_c4f9e573288a46d19efae3d101dbbdef", - "IPY_MODEL_9adc61965b944edc9abff9d0de7e745a", - "IPY_MODEL_483b1513ebcd4e718e69202b30163973" - ], - "layout": "IPY_MODEL_04e89773ebeb42db87c64d7b9ec7cb26" - } - }, - "c4f9e573288a46d19efae3d101dbbdef": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_3f50cc3742724ab2bf8434c341eee0bd", - "placeholder": "​", - "style": "IPY_MODEL_449d71ebd4df4ddbb755d30fdff25358", - "value": "Downloading (…)9125/train_script.py: 100%" - } - }, - "9adc61965b944edc9abff9d0de7e745a": { - "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_530177663d7f47d39c43beb3ce7fab78", - "max": 13156, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_1ad860cd423c4fd893066e6925579eb7", - "value": 13156 - } - }, - "483b1513ebcd4e718e69202b30163973": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_7c1fc9a54be14954acd59abde2fca97f", - "placeholder": "​", - "style": "IPY_MODEL_92790cf373b14a058b31fc0d1fcc5c94", - "value": " 13.2k/13.2k [00:00<00:00, 402kB/s]" - } - }, - "04e89773ebeb42db87c64d7b9ec7cb26": { + "cafef97c29564483b15c21b647e2e0d1": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -4890,10 +4576,10 @@ "width": null } }, - "3f50cc3742724ab2bf8434c341eee0bd": { + "cb4add21dd514bf7a3d2e51726603e36": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -4942,10 +4628,10 @@ "width": null } }, - "449d71ebd4df4ddbb755d30fdff25358": { + "cc4c4534d0b54d0187d5a7ed82a0376e": { "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", @@ -4957,10 +4643,10 @@ "description_width": "" } }, - "530177663d7f47d39c43beb3ce7fab78": { + "cca1ec7ef9174e37a5e17b2a77b39aad": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -5009,26 +4695,10 @@ "width": null } }, - "1ad860cd423c4fd893066e6925579eb7": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "7c1fc9a54be14954acd59abde2fca97f": { + "d10b85816ca540c0a5456e4126dc99c6": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -5077,68 +4747,66 @@ "width": null } }, - "92790cf373b14a058b31fc0d1fcc5c94": { + "d290678ad04e431ba4923c0dba777399": { "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", "state": { + "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", + "_model_name": "FloatProgressModel", "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_cafef97c29564483b15c21b647e2e0d1", + "max": 116, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_6a664e104c2f462f838738ea966a0115", + "value": 116 } }, - "b889cd1323134a9cb0b2eb81ceb3cde1": { + "d43e683a46ef4d8892635fc1da1bd7c0": { "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", "state": { - "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", + "_model_name": "ProgressStyleModel", "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_7e85d153ec15430689bc77ef6bf2b3c8", - "IPY_MODEL_e706e32cd2a642eb9618dd0dd75d7664", - "IPY_MODEL_7c252184b6c449d1bbba4807cde4672a" - ], - "layout": "IPY_MODEL_df86bc204d014d13990917c348b09769" + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" } }, - "7e85d153ec15430689bc77ef6bf2b3c8": { + "d7b8af67a3a8454c97d9a68caff040c7": { "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", "state": { - "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", + "_model_name": "ProgressStyleModel", "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_eb16cb52b8cb438984c4c75c3074de84", - "placeholder": "​", - "style": "IPY_MODEL_9ea920d0083f46e2add318c845b3fddf", - "value": "Downloading (…)7e55de9125/vocab.txt: 100%" + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" } }, - "e706e32cd2a642eb9618dd0dd75d7664": { + "d7e199cec93c42308436a13f0101798d": { "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", @@ -5151,18 +4819,18 @@ "bar_style": "success", "description": "", "description_tooltip": null, - "layout": "IPY_MODEL_cb4add21dd514bf7a3d2e51726603e36", - "max": 231508, + "layout": "IPY_MODEL_a09c50472f244d15875780d53e4cb680", + "max": 190, "min": 0, "orientation": "horizontal", - "style": "IPY_MODEL_ac2387223ca04438a3e4bb59cd2f7019", - "value": 231508 + "style": "IPY_MODEL_e1623d2abb5244708dc2d5ae9bce9e6f", + "value": 190 } }, - "7c252184b6c449d1bbba4807cde4672a": { + "dceed128f53f4726afe578574c19815d": { "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", "model_module_version": "1.5.0", + "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", @@ -5174,16 +4842,16 @@ "_view_name": "HTMLView", "description": "", "description_tooltip": null, - "layout": "IPY_MODEL_2450d6be578e4eb6960111c26c55fa43", + "layout": "IPY_MODEL_7eed45dcf5004dccb734eb315415a535", "placeholder": "​", - "style": "IPY_MODEL_8d97796bc9964114853ce5803584d159", - "value": " 232k/232k [00:00<00:00, 1.37MB/s]" + "style": "IPY_MODEL_a5cd7c8a692f43f7859b4df8334c3f0f", + "value": " 39.3k/39.3k [00:00<00:00, 228kB/s]" } }, "df86bc204d014d13990917c348b09769": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -5232,10 +4900,10 @@ "width": null } }, - "eb16cb52b8cb438984c4c75c3074de84": { + "e0d94012952e43549dd4d158c30c91d6": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -5284,25 +4952,26 @@ "width": null } }, - "9ea920d0083f46e2add318c845b3fddf": { + "e1623d2abb5244708dc2d5ae9bce9e6f": { "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", + "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", + "bar_color": null, "description_width": "" } }, - "cb4add21dd514bf7a3d2e51726603e36": { + "e37dae5f929042eea3b021d81c7e89f9": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -5351,26 +5020,86 @@ "width": null } }, - "ac2387223ca04438a3e4bb59cd2f7019": { + "e41fb5a2fbcf485899df455800c29f66": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_637b076d35c247348d61e924f86c7509", + "IPY_MODEL_fb5eba47d398448a991aaca68e4c33f0", + "IPY_MODEL_5a549312926048079d979df5f31f6668" + ], + "layout": "IPY_MODEL_c7f8a26c31154c8f9bf65dddb4095c04" + } + }, + "e5616536b1824a388fa9e1ed826307f9": { "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", + "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", - "bar_color": null, "description_width": "" } }, - "2450d6be578e4eb6960111c26c55fa43": { + "e6864110fe5b4caf8e1b869162a6c45a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "e706e32cd2a642eb9618dd0dd75d7664": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_cb4add21dd514bf7a3d2e51726603e36", + "max": 231508, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_ac2387223ca04438a3e4bb59cd2f7019", + "value": 231508 + } + }, + "eb16cb52b8cb438984c4c75c3074de84": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -5419,47 +5148,10 @@ "width": null } }, - "8d97796bc9964114853ce5803584d159": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "05c5fdaa4d624b44857f6b7d08426e6c": { + "eb1d39f0a2254be7857baa26432a5e56": { "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_71e2f0ea4906451f9555a638f7e713b6", - "IPY_MODEL_fdb9c374bc1e48688e439dc82103c545", - "IPY_MODEL_f14f96faaf3444cda3da447b10d57cee" - ], - "layout": "IPY_MODEL_95007200cc10458882a9e81d055978b9" - } - }, - "71e2f0ea4906451f9555a638f7e713b6": { - "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", - "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", @@ -5471,40 +5163,38 @@ "_view_name": "HTMLView", "description": "", "description_tooltip": null, - "layout": "IPY_MODEL_530c9177c7024e98a24d8db80651ab01", + "layout": "IPY_MODEL_2787caea3cbc4f029e766b35ccaa6013", "placeholder": "​", - "style": "IPY_MODEL_a5abdec4f10f4b5385f01722480139e3", - "value": "Downloading (…)5de9125/modules.json: 100%" + "style": "IPY_MODEL_ae251276d3c8482ab768130bde5b9de3", + "value": "Downloading (…)nce_bert_config.json: 100%" } }, - "fdb9c374bc1e48688e439dc82103c545": { + "ee53579d441b4b2a97176d161b479cd1": { "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", "model_module_version": "1.5.0", + "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", + "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_6f25e1c5d19e474aaa3f4d5806a53445", - "max": 349, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_4fd8f1d3cb944c2e8a0a806e8c89a26f", - "value": 349 + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_ae834f736104436593dd069cf3bdd5a2", + "IPY_MODEL_3418257fe9114669b00bc242c620bfb3", + "IPY_MODEL_24f54307ee3b499ab7519f713ce994e1" + ], + "layout": "IPY_MODEL_e0d94012952e43549dd4d158c30c91d6" } }, "f14f96faaf3444cda3da447b10d57cee": { "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", "model_module_version": "1.5.0", + "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", @@ -5522,10 +5212,10 @@ "value": " 349/349 [00:00<00:00, 14.8kB/s]" } }, - "95007200cc10458882a9e81d055978b9": { + "f22a4dfe0f6e44c2a0bc37b3fe488439": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -5574,10 +5264,10 @@ "width": null } }, - "530c9177c7024e98a24d8db80651ab01": { + "f41fc8e604524a118ea2b863a01d5201": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -5626,25 +5316,34 @@ "width": null } }, - "a5abdec4f10f4b5385f01722480139e3": { + "f7f3cae2779d41b9b08d299596c6026c": { "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", "state": { + "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", + "_model_name": "FloatProgressModel", "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_b8769ba773c144428df49ed117a4ba2e", + "max": 90888945, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_914892930f74452d96cd787c128c955b", + "value": 90888945 } }, - "6f25e1c5d19e474aaa3f4d5806a53445": { + "fa8d7e78724341ca84f0f829e33d756d": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -5693,26 +5392,49 @@ "width": null } }, - "4fd8f1d3cb944c2e8a0a806e8c89a26f": { + "face41511e7c4c748e123d176b219f13": { "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", + "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", - "bar_color": null, "description_width": "" } }, - "143c8f9e754240e6a5a8386da714cb1b": { + "fb5eba47d398448a991aaca68e4c33f0": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_2083d409adf345e3b6209bf9b4e8d5df", + "max": 612, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_4d1b0ec8de0f466eba688a33a95b6dc8", + "value": 612 + } + }, + "fd06e080368345679c280ae286dcf118": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -5761,19 +5483,49 @@ "width": null } }, - "3d3d1beb5ed346209eac8f4b38325ae1": { + "fdb9c374bc1e48688e439dc82103c545": { "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", "state": { + "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", + "_model_name": "FloatProgressModel", "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_6f25e1c5d19e474aaa3f4d5806a53445", + "max": 349, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_4fd8f1d3cb944c2e8a0a806e8c89a26f", + "value": 349 + } + }, + "fe235613377d4443815ad548d7b19a2c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_2a76d9c1c37a45ab8f4d7da6541c11c3", + "placeholder": "​", + "style": "IPY_MODEL_3897a135ce4d413f95305560e9d5f51d", + "value": " 10.6k/10.6k [00:00<00:00, 446kB/s]" } } } From 061a8cd543c55077732e63850c9dfdcac2ed1872 Mon Sep 17 00:00:00 2001 From: Joseph McElroy Date: Tue, 4 Jul 2023 12:30:41 +0100 Subject: [PATCH 08/54] clear output --- .../search/00-quick-start.ipynb | 21 ++----------------- 1 file changed, 2 insertions(+), 19 deletions(-) diff --git a/colab-notebooks-examples/search/00-quick-start.ipynb b/colab-notebooks-examples/search/00-quick-start.ipynb index 03b14278..9b870ec4 100644 --- a/colab-notebooks-examples/search/00-quick-start.ipynb +++ b/colab-notebooks-examples/search/00-quick-start.ipynb @@ -50,7 +50,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "ffc5fa6f", "metadata": { "colab": { @@ -60,24 +60,7 @@ "outputId": "d9693f20-1482-4a4d-b74d-a876885f92fa", "scrolled": false }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m393.8/393.8 kB\u001b[0m \u001b[31m7.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.0/86.0 kB\u001b[0m \u001b[31m8.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.2/7.2 MB\u001b[0m \u001b[31m68.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m46.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m236.8/236.8 kB\u001b[0m \u001b[31m20.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m59.5/59.5 kB\u001b[0m \u001b[31m3.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.8/7.8 MB\u001b[0m \u001b[31m89.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m63.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25h Building wheel for sentence-transformers (setup.py) ... \u001b[?25l\u001b[?25hdone\n" - ] - } - ], + "outputs": [], "source": [ "!pip install -qU elasticsearch sentence-transformers==2.2.2" ] From 8c3d834eb5deda7481512a40e95b8cc981295bf9 Mon Sep 17 00:00:00 2001 From: Joseph McElroy Date: Tue, 4 Jul 2023 12:31:40 +0100 Subject: [PATCH 09/54] clean up indenting --- colab-notebooks-examples/search/00-quick-start.ipynb | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/colab-notebooks-examples/search/00-quick-start.ipynb b/colab-notebooks-examples/search/00-quick-start.ipynb index 9b870ec4..f3094668 100644 --- a/colab-notebooks-examples/search/00-quick-start.ipynb +++ b/colab-notebooks-examples/search/00-quick-start.ipynb @@ -394,7 +394,12 @@ " \"publish_date\": {\"type\": \"date\"},\n", " \"num_reviews\": {\"type\": \"integer\"},\n", " \"publisher\": {\"type\": \"keyword\"},\n", - " \"title_vector\": { \"type\": \"dense_vector\", \"dims\": 384, \"index\": \"true\", \"similarity\": \"dot_product\" }\n", + " \"title_vector\": { \n", + " \"type\": \"dense_vector\", \n", + " \"dims\": 384, \n", + " \"index\": \"true\", \n", + " \"similarity\": \"dot_product\" \n", + " }\n", " }\n", " }\n", "}\n", From 22e33b71ec70d9f7c2bf43e6a7dc39d8a223f89e Mon Sep 17 00:00:00 2001 From: Joseph McElroy Date: Tue, 4 Jul 2023 17:17:32 +0100 Subject: [PATCH 10/54] Question ansswering using langchain --- .../generative-ai/question-answering.ipynb | 253 ++++++++++++++++-- 1 file changed, 231 insertions(+), 22 deletions(-) diff --git a/colab-notebooks-examples/generative-ai/question-answering.ipynb b/colab-notebooks-examples/generative-ai/question-answering.ipynb index 5750b9d7..ac2ac880 100644 --- a/colab-notebooks-examples/generative-ai/question-answering.ipynb +++ b/colab-notebooks-examples/generative-ai/question-answering.ipynb @@ -1,24 +1,233 @@ { - "cells": [ - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "question answering example doing the following:\n", - "\n", - "1. load the deepset model stored on huggingface via eland into elasticsearch\n", - "2. setup a pipeline + ingest data (wikipedia) to enrich\n", - "3. ask questions on dataset\n" - ] - } - ], - "metadata": { - "language_info": { - "name": "python" + "cells": [ + { + "cell_type": "code", + "source": [ + "import json\n", + "from urllib.request import urlopen\n", + "\n", + "url = \"https://raw.githubusercontent.com/elastic/elasticsearch-labs/main/esre-examples/python-flask-example/example-data/data.json\"\n", + "\n", + "response = urlopen(url)\n", + "\n", + "workplace_docs = json.loads(response.read())\n" + ], + "metadata": { + "id": "J8-93TiJsNyK" + }, + "execution_count": 29, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "!pip install -qU langchain jq openai elasticsearch tiktoken" + ], + "metadata": { + "id": "33A-cP-XvFCr" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "from langchain.embeddings.openai import OpenAIEmbeddings\n", + "from langchain.text_splitter import CharacterTextSplitter\n", + "from langchain.vectorstores import ElasticVectorSearch\n", + "from langchain.embeddings import OpenAIEmbeddings\n", + "from elasticsearch import Elasticsearch\n", + "\n", + "metadata = []\n", + "content = []\n", + "\n", + "for doc in workplace_docs:\n", + " content.append(doc[\"content\"])\n", + " metadata.append({\n", + " \"name\": doc[\"name\"],\n", + " \"summary\": doc[\"summary\"]\n", + " })\n", + "\n", + "text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=0)\n", + "docs = text_splitter.create_documents(content, metadatas=metadata)\n", + "\n", + "embeddings = OpenAIEmbeddings(openai_api_key=\"\")\n", + "\n", + "client = Elasticsearch(\n", + " cloud_id=\"My_deployment:dXMtY2VudHJhbDEuZ2NwLmNsb3VkLmVzLmlvOjQ0MyQxYTU2YWQyMTU4N2M0NGQzOTMwOTMyZWI5ZmExZDhlOCRiNGZkMDBhYTNlZjI0ODdiYmU5OGQ5N2YyNTBlYWUyYw==\",\n", + " basic_auth=(\"elastic\", \"\")\n", + ")\n" + ], + "metadata": { + "id": "dbHEoTF6vBXE" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Define the mapping\n", + "mapping = {\n", + " \"mappings\": {\n", + " \"properties\": {\n", + " \"text\": { \"type\": \"keyword\" },\n", + " \"vector\": {\n", + " \"type\": \"dense_vector\",\n", + " \"dims\": 1536,\n", + " \"index\": \"true\",\n", + " \"similarity\": \"dot_product\"\n", + " }\n", + " }\n", + " }\n", + "}\n", + "\n", + "# Create the index\n", + "client.indices.create(index='workplace_index', body=mapping)" + ], + "metadata": { + "id": "BfhBEjLsOr0l" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# get the embeddings from openAI\n", + "\n", + "texts = []\n", + "for passage in docs:\n", + " texts.append(passage.page_content)\n", + "\n", + "textEmbeddings = embeddings.embed_documents(texts)\n" + ], + "metadata": { + "id": "Qc1LXk-rOzNR" + }, + "execution_count": 132, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# persist the passage documents into elasticsearch\n", + "\n", + "actions = []\n", + "for i, passage in enumerate(docs):\n", + " actions.append({\"index\": {\"_index\": \"workplace_index\"}})\n", + " passageEmbedding = textEmbeddings[i]\n", + " actions.append({\n", + " \"text\": passage.page_content,\n", + " \"vector\":passageEmbedding,\n", + " \"metadata\": passage.metadata\n", + " })\n", + "\n", + "client.bulk(operations=actions)\n", + "\n" + ], + "metadata": { + "id": "4hkOZAIqQ5t5" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "Querying" + ], + "metadata": { + "id": "rXJH_MiWejv7" + } + }, + { + "cell_type": "code", + "source": [ + "from langchain.vectorstores.elastic_vector_search import ElasticKnnSearch\n", + "from langchain.llms import OpenAI\n", + "from langchain.chains import ConversationalRetrievalChain, RetrievalQA\n", + "\n", + "db = ElasticKnnSearch(\n", + " es_connection=client, index_name=\"workplace_index\", embedding=embeddings\n", + ")\n", + "\n", + "retriever = db.as_retriever()\n", + "\n", + "llm = OpenAI(openai_api_key=\"\")\n", + "\n", + "qa = RetrievalQA.from_chain_type(\n", + " llm=llm, chain_type=\"stuff\", retriever=retriever, return_source_documents=True\n", + ")\n", + "\n", + "ans = qa({\"query\": \"what is the nasa sales team?\"})\n", + "\n", + "print(\"answer\")\n", + "print(ans[\"result\"])\n", + "\n", + "print(\"sources\")\n", + "for doc in ans[\"source_documents\"]:\n", + " print(doc.metadata[\"name\"])\n", + " print(doc.page_content)" + ], + "metadata": { + "id": "OobeBT6rek7Q", + "outputId": "532e02ba-afb3-46fe-ca2a-35a2ad4a525a", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "execution_count": 134, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "answer\n", + " The NASA Sales team is the North America South America regional sales team. It is composed of dedicated account managers, sales representatives, and support staff, and is led by Laura Martinez (Area Vice-President of North America) and Gary Johnson (Area Vice-President of South America).\n", + "sources\n", + "Sales Organization Overview\n", + "Our sales organization is structured to effectively serve our customers and achieve our business objectives across multiple regions. The organization is divided into the following main regions:\n", + "\n", + "The Americas: This region includes the United States, Canada, Mexico, as well as Central and South America. The North America South America region (NASA) has two Area Vice-Presidents: Laura Martinez is the Area Vice-President of North America, and Gary Johnson is the Area Vice-President of South America.\n", + "Sales Organization Overview\n", + "Each regional sales team consists of dedicated account managers, sales representatives, and support staff, led by their respective Area Vice-Presidents. They are responsible for identifying and pursuing new business opportunities, nurturing existing client relationships, and ensuring customer satisfaction. The teams collaborate closely with other departments, such as marketing, product development, and customer support, to ensure we consistently deliver high-quality products and services to our clients.\n", + "Sales Engineering Collaboration\n", + "Title: Working with the Sales Team as an Engineer in a Tech Company\n", + "Sales Engineering Collaboration\n", + "Introduction:\n", + "As an engineer in a tech company, collaboration with the sales team is essential to ensure the success of the company's products and services. This guidance document aims to provide an overview of how engineers can effectively work with the sales team, fostering a positive and productive working environment.\n", + "Understanding the Sales Team's Role:\n", + "The sales team is responsible for promoting and selling the company's products and services to potential clients. Their role involves establishing relationships with customers, understanding their needs, and ensuring that the offered solutions align with their requirements.\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "client.indices.delete(index=\"workplace_index\")" + ], + "metadata": { + "id": "SOeP4-DLnwDB" + }, + "execution_count": null, + "outputs": [] + } + ], + "metadata": { + "language_info": { + "name": "python" + }, + "orig_nbformat": 4, + "colab": { + "provenance": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + } }, - "orig_nbformat": 4 - }, - "nbformat": 4, - "nbformat_minor": 2 -} + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file From ae88333ae9b0a6c3c7ca1f3eed568948986bbd10 Mon Sep 17 00:00:00 2001 From: Joseph McElroy Date: Tue, 4 Jul 2023 20:35:49 +0100 Subject: [PATCH 11/54] adding text --- .../generative-ai/question-answering.ipynb | 122 +++++++++++++----- 1 file changed, 91 insertions(+), 31 deletions(-) diff --git a/colab-notebooks-examples/generative-ai/question-answering.ipynb b/colab-notebooks-examples/generative-ai/question-answering.ipynb index ac2ac880..4b53f7f7 100644 --- a/colab-notebooks-examples/generative-ai/question-answering.ipynb +++ b/colab-notebooks-examples/generative-ai/question-answering.ipynb @@ -1,5 +1,48 @@ { "cells": [ + { + "cell_type": "markdown", + "source": [ + "# Question Answering with Langchain and OpenAI\n", + "\n", + "\"Open\n", + "\n", + "This interactive notebook uses Langchain to split fictional workplace documents into passages and uses OpenAI to transform these passages into embeddings and store them into Elasticsearch.\n", + "\n", + "\n", + "![image.png](data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAf4AAACiCAIAAAAvNjhKAAAMQWlDQ1BJQ0MgUHJvZmlsZQAASImVVwdYU8kWnluSkEBCCV1K6E0QqQGkhNACSC+CqIQkQCgxBoKKHV1UcO1iARu6KqLYAbGgiGJhUex9saCirIsFu/ImBXTdV7433zd3/vvPmf+cOXfm3jsAqJ/gisV5qAYA+aJCSVxoIGNMSiqD9BSQAApoQBvYcXkFYlZMTCSAZbD9e3l3HSCy9oqjTOuf/f+1aPIFBTwAkBiIM/gFvHyIDwKAV/HEkkIAiDLeYnKhWIZhBdoSGCDEC2Q4S4GrZDhDgffKbRLi2BC3AqCixuVKsgCgXYI8o4iXBTVofRA7i/hCEQDqDIj98vMn8iFOh9gW2oghlukzM37QyfqbZsaQJpebNYQVc5EXlSBhgTiPO/X/TMf/Lvl50kEf1rCqZUvC4mRzhnm7mTsxQobVIO4VZURFQ6wF8QchX24PMUrJloYlKuxRI14BG+YM6ELszOcGRUBsBHGIKC8qUslnZApDOBDDFYJOERZyEiDWh3iBoCA4XmmzSTIxTukLrc+UsFlK/ixXIvcr83VfmpvIUuq/zhZwlPoYrTg7IRliCsSWRcKkKIhpEDsV5MZHKG1GFWezowZtJNI4WfyWEMcJRKGBCn2sKFMSEqe0L8svGJwvtilbyIlS4v2F2QlhivxgrTyuPH44F+ySQMRKHNQRFIyJHJwLXxAUrJg79kwgSoxX6nwQFwbGKcbiFHFejNIeNxfkhcp4c4jdCorilWPxpEK4IBX6eKa4MCZBESdenMMNj1HEgy8FkYANggADSGHNABNBDhB29Db0wjtFTwjgAgnIAgLgqGQGRyTLe0TwGg+KwZ8QCUDB0LhAea8AFEH+6xCruDqCTHlvkXxELngCcT6IAHnwXiofJRrylgQeQ0b4D+9cWHkw3jxYZf3/nh9kvzMsyEQqGemgR4b6oCUxmBhEDCOGEO1wQ9wP98Ej4TUAVheciXsNzuO7PeEJoZPwkHCN0EW4NUFYIvkpytGgC+qHKHOR8WMucGuo6Y4H4r5QHSrjurghcMTdoB8W7g89u0OWrYxblhXGT9p/m8EPT0NpR3Ymo2Q9cgDZ9ueRNHua+5CKLNc/5kcRa8ZQvtlDPT/7Z/+QfT5sI362xBZgB7A27CR2DjuKNQAG1ow1Yu3YMRkeWl2P5atr0FucPJ5cqCP8h7/BJyvLZIFzrXOP8xdFX6FgiuwdDdgTxVMlwqzsQgYLfhEEDI6I5zSc4eLs4gqA7PuieH29iZV/NxDd9u/c3D8A8G0eGBg48p0LbwZgnyfc/oe/c7ZM+OlQBeDsYZ5UUqTgcNmFAN8S6nCnGQATYAFs4XxcgAfwAQEgGISDaJAAUsB4GH02XOcSMBlMB3NAKSgHS8EqsA5sBFvADrAb7AcN4Cg4Cc6AC+ASuAbuwNXTDV6APvAOfEYQhIRQETpigJgiVogD4oIwET8kGIlE4pAUJB3JQkSIFJmOzEXKkeXIOmQzUoPsQw4jJ5FzSCdyC3mA9CCvkU8ohqqh2qgxao2OQJkoC41AE9BxaBY6CS1G56GL0TVoNboLrUdPohfQa2gX+gLtxwCmiuliZpgjxsTYWDSWimViEmwmVoZVYNVYHdYEn/MVrAvrxT7iRJyOM3BHuILD8ESch0/CZ+KL8HX4Drweb8Wv4A/wPvwbgUowIjgQvAkcwhhCFmEyoZRQQdhGOEQ4DfdSN+EdkUjUJdoQPeFeTCHmEKcRFxHXE/cQTxA7iY+I/SQSyYDkQPIlRZO4pEJSKWktaRepmXSZ1E36oKKqYqriohKikqoiUilRqVDZqXJc5bLKU5XPZA2yFdmbHE3mk6eSl5C3kpvIF8nd5M8UTYoNxZeSQMmhzKGsodRRTlPuUt6oqqqaq3qpxqoKVWerrlHdq3pW9YHqRzUtNXs1tlqamlRtsdp2tRNqt9TeUKlUa2oANZVaSF1MraGeot6nfqDRaU40Do1Pm0WrpNXTLtNeqpPVrdRZ6uPVi9Ur1A+oX1Tv1SBrWGuwNbgaMzUqNQ5r3NDo16RrjtSM1szXXKS5U/Oc5jMtkpa1VrAWX2ue1hatU1qP6Bjdgs6m8+hz6Vvpp+nd2kRtG22Odo52ufZu7Q7tPh0tHTedJJ0pOpU6x3S6dDFda12Obp7uEt39utd1P+kZ67H0BHoL9er0Luu91x+mH6Av0C/T36N/Tf+TAcMg2CDXYJlBg8E9Q9zQ3jDWcLLhBsPThr3DtIf5DOMNKxu2f9htI9TI3ijOaJrRFqN2o35jE+NQY7HxWuNTxr0muiYBJjkmK02Om/SY0k39TIWmK02bTZ8zdBgsRh5jDaOV0WdmZBZmJjXbbNZh9tncxjzRvMR8j/k9C4oF0yLTYqVFi0WfpanlaMvplrWWt63IVkyrbKvVVm1W761trJOt51s3WD+z0bfh2BTb1NrctaXa+ttOsq22vWpHtGPa5dqtt7tkj9q722fbV9pfdEAdPByEDusdOocThnsNFw2vHn7DUc2R5VjkWOv4wEnXKdKpxKnB6eUIyxGpI5aNaBvxzdndOc95q/OdkVojw0eWjGwa+drF3oXnUuly1ZXqGuI6y7XR9ZWbg5vAbYPbTXe6+2j3+e4t7l89PD0kHnUePZ6WnumeVZ43mNrMGOYi5lkvgleg1yyvo14fvT28C733e//l4+iT67PT59kom1GCUVtHPfI19+X6bvbt8mP4pftt8uvyN/Pn+lf7PwywCOAHbAt4yrJj5bB2sV4GOgdKAg8Fvmd7s2ewTwRhQaFBZUEdwVrBicHrgu+HmIdkhdSG9IW6h04LPRFGCIsIWxZ2g2PM4XFqOH3hnuEzwlsj1CLiI9ZFPIy0j5RENo1GR4ePXjH6bpRVlCiqIRpEc6JXRN+LsYmZFHMklhgbE1sZ+yRuZNz0uLZ4evyE+J3x7xICE5Yk3Em0TZQmtiSpJ6Ul1SS9Tw5KXp7cNWbEmBljLqQYpghTGlNJqUmp21L7xwaPXTW2O809rTTt+jibcVPGnRtvOD5v/LEJ6hO4Ew6kE9KT03emf+FGc6u5/RmcjKqMPh6bt5r3gh/AX8nvEfgKlgueZvpmLs98luWbtSKrJ9s/uyK7V8gWrhO+ygnL2ZjzPjc6d3vuQF5y3p58lfz0/MMiLVGuqHWiycQpEzvFDuJScdck70mrJvVJIiTbCpCCcQWNhdrwR75daiv9RfqgyK+osujD5KTJB6ZoThFNaZ9qP3Xh1KfFIcW/TcOn8aa1TDebPmf6gxmsGZtnIjMzZrbMspg1b1b37NDZO+ZQ5uTO+b3EuWR5ydu5yXOb5hnPmz3v0S+hv9SW0kolpTfm+8zfuABfIFzQsdB14dqF38r4ZefLncsryr8s4i06/+vIX9f8OrA4c3HHEo8lG5YSl4qWXl/mv2zHcs3lxcsfrRi9on4lY2XZyrerJqw6V+FWsXE1ZbV0ddeayDWNay3XLl37ZV32umuVgZV7qoyqFla9X89ff3lDwIa6jcYbyzd+2iTcdHNz6Ob6auvqii3ELUVbnmxN2tr2G/O3mm2G28q3fd0u2t61I25Ha41nTc1Oo51LatFaaW3PrrRdl3YH7W6sc6zbvEd3T/lesFe69/m+9H3X90fsbznAPFB30Opg1SH6obJ6pH5qfV9DdkNXY0pj5+Hwwy1NPk2Hjjgd2X7U7GjlMZ1jS45Tjs87PtBc3Nx/Qnyi92TWyUctE1runBpz6mprbGvH6YjTZ8+EnDnVxmprPut79ug573OHzzPPN1zwuFDf7t5+6Hf33w91eHTUX/S82HjJ61JT56jO45f9L5+8EnTlzFXO1QvXoq51Xk+8fvNG2o2um/ybz27l3Xp1u+j25zuz7xLult3TuFdx3+h+9R92f+zp8ug69iDoQfvD+Id3HvEevXhc8PhL97wn1CcVT02f1jxzeXa0J6Tn0vOxz7tfiF987i39U/PPqpe2Lw/+FfBXe9+Yvu5XklcDrxe9MXiz/a3b25b+mP777/LffX5f9sHgw46PzI9tn5I/Pf08+Qvpy5qvdl+bvkV8uzuQPzAg5kq48l8BDFY0MxOA19sBoKYAQIfnM8pYxflPXhDFmVWOwH/CijOivHgAUAf/32N74d/NDQD2boXHL6ivngZADBWABC+AuroO1cGzmvxcKStEeA7YFP01Iz8D/JuiOHP+EPfPLZCpuoGf238Bk9B8XQo0or4AAAA4ZVhJZk1NACoAAAAIAAGHaQAEAAAAAQAAABoAAAAAAAKgAgAEAAAAAQAAAf6gAwAEAAAAAQAAAKIAAAAAGoXaLQAAJsNJREFUeAHtnXfYFcX1x1FBKbHgg6KigMZEsKMoMSoqalBUxBIhWGOMsfcSEwvGhr0QBPNYiCZKRIEQwPAEFRGNogRRihgLigJBjRpRVCy/T5gn6/5umbt3Z+fu7rvf/eO+80495zuzZ2fOnDmzyjfffNNMjxAQAkJACBQJgVWLxKx4FQJCQAgIgf8iINGvcSAEhIAQKBwCEv2F63IxLASEgBCQ6NcYEAJCQAgUDgGJ/sJ1uRgWAkJACEj0awwIASEgBAqHgER/4bpcDAsBISAEJPo1BoSAEBAChUNAor9wXS6GhYAQEAIS/RoDQkAICIHCISDRX7guF8NCQAgIAYl+jQEhIASEQOEQkOgvXJeLYSEgBISARL/GgBAQAkKgcAhI9Beuy8WwEBACQkCiX2NACAgBIVA4BCT6C9flYlgICAEhINGvMSAEhIAQKBwCEv2F63IxLASEgBCQ6NcYEAJCQAgUDgGJ/sJ1uRgWAkJACEj0awwIASEgBAqHgER/4bpcDAsBISAEJPo1BoSAEBAChUNAor9wXS6GhYAQEAIS/RoDQkAICIHCISDRX7guF8NCQAgIAYl+jQEhIASEQOEQkOgvXJeLYSEgBISARL/GgBAQAkKgcAhI9Beuy8WwEBACQqB5PAhWWWWVeAXzW+qbb77JL/HZpDzFUaTezOaQEFUNQyCm6Ie+Qr08KQqphg2FVBpKZRSpN1PpazWaKQSk8MlUd9RHzPvvv//BBx/UV0a5hUCzZp9++um8efPee+89gVFYBCT6c9n1kydP3nfffdu1a7fuuuvutttu48ePr8bG1VdfffTRR/OqM9V99dVXP//887vvvpt/w/kHDRp03HHHhWOCcMX8QaoCuUOAuUL//v3btGmz5ZZbrrfeevvtt9+iRYvicaGxEQ+3jJSS6M9IR9RBBoIbuf+jH/1o2bJlb731FqL/oIMOqjb9R6Py9ddft2zZ8oknnthoo40++eSTn/3sZx9++GG4vWOPPfaXv/xlOCYIV8wfpCqQLwQYDH379n3nnXdee+01wi+//DJD6LDDDovHhcZGPNwyUkqiPyMdUQcZZprWvXt35m6bbLLJZZdddumlly5fvvzRRx894YQTzjzzzPbt2++1117z588PKl2xYsWvf/3rf//73+Y933///d99990g9bHHHnv44Yc/++yzHj163HXXXVtttdV3v/vdO++8kwzh/KgIqHbNNdck21NPPRUUVyAvCEydOnXatGl08WabbQbNW2yxxR133LHTTjshxO+55x5GEdOCX/ziFySNGDFiu+22Yxgwur788kti/vGPfxx++OEMrQMPPPDJJ58kJjw2lixZwmKCVCYlM2fOJFVP1hHg4x/jgasYpfJbJFP8Isd5db/zne+cccYZY8eORaAbYB944AHo/PnPf86byTqgX79+xF955ZUDBw7k3SbplVdemTRpEoHRo0ezWg+6g3cepZDJ8/3vf588iACy8TkJ8v/nP//ZeOON+WZQ+cUXX0zr//rXv4Ia4gVoIl5Bx1JptetItnvxYcOGrb/++hXrYZwAS+/evZkHTJw4kfBtt932t7/9jU6//PLLKcLAOPHEE5977rlzzz0XZRFLyWBsMGnYeeed99lnH/Kb5SNr0IqtKDI7CMR894r28mSNXyZZ11133bbbbgthPGefffZXX31lRD/vIcMLRRDxSPMS0c/OMPEs+cNDMCz6efNJQtCTjWl+kP+RRx4hhnhTENF///33hyuJEabCGKXci6TVrjvljjWw8YMEr1gJ44SvAqOIVOYNfPtNNpYIpgij66OPPmIA3HfffQDItCAYG88//zwxb7zxBkX4JFDPQw89ZIrrN7MIxDfupLP1pIIAwh1Vz/krH74BaGYuueSSPffcE2L4GKyxxhoEOnTowO+LL77Ib10PGiTyo9Xhl5VBUPbNN99krmfiiUQbwKw/SFUgFwhsuummrPxYJmIdYAhGlP/ud787/vjj+Zdp+6qrrkoAVeFf/vIXhL7Jw2eeAKW+973vLV261CiLTJL5ff311wlQeRAZVicGkQpkCoH/9rSefCHwxz/+ESWsoXmDDTZA/cKq3Lx+vJ8mnjecAPOvellbbbXVKhbp2rUrKwlmhaQykZk1axZr/Io5FZlZBOhEaGPDP6AQhSFaoHXWWSeIIcCH4cILL2TJyPP2229PmTKFbeFTTz31+uuvZ7I/ZsyYcGbCa6+9Nr/MQkwRlEKxt45Lata//hDwMutvkkdm7EwhDf11UknNu+yyC5OvW2+99eSTT27evDkqV95P5vtEEkAzs8cee9x+++3I/Y4dO5aUNdM6VLFY+5QkVfw3yE+jZGCFgSoAlS5h36LfDnhFaqNH2itvZG9Gp9k9J2u1H//4x+eddx6LQgYMMhqBjqrH9HJQP9P/Bx98kE0jFnmnnXYa5mFoFEklHtx++9vfEkaxE4yNHXbYgRgUQWw+MTbYB549ezaWo0GFuQssXLhwwYIFrHRRjfJJ483iDAR2cai8zOcNK7svvvgiGCdAsfrqq7M8atWq1VprrcW3kM8nCLDvzYvGzKxTp06slko+senC4kX0w1IASrrsNaZ1uxxJnAYUL4hgjHnOOussKmdgocPt1avXyJEjGXxY6DNSiUQ5y4cB2oIXmzCDb9ddd916660Z02HpH+QJU1uSf/DgwSeddBKCA4tAtAFUHs7sI5zKKGpwb/rAzVLnkCFDkOnYaJk8CGs2bwmHxwkbuU8//fTmm29OPB/4UaNGoQY84IADjBaRUcesgkpYfQZjibE3YMAAdJAUYTRiJGbqz8UvAp3tCgyTUJDOnTsXm1fEN5IaeQ3L/GIExRmatm3bItN5xVq3bs3nsEWLFmaJzChlNcyXgCURnwR0aHwkWH+j9UIpyvqb/TMW5TyUxaqK93ebbbbZfvvtd9xxRz4YaUG0Sry3i4FiKWhPTYtVf+2mwi+DjMHEvIyhaVjj9bv55pt5aZmwoHitKM1NTmb9jOPogAT5GdMsLHgrGP3Ri1fLacfNnlqtTvf4tNp1pzx6DezQMpllkFTrR95u5rxs9rDHCyCmZubCTGbZZ2K3icfMYYOxwXSYnV5kZV1DKzrNyeZE3GMMjS4La9c5c+bwhUMQsyrio4VaLNjTSrZRzLLZR2FJxDeGL82MGTNYTP/whz/s2bMnUzc+DMk2Z6/NJsEtJe2vhz3VUm1OkzLCrxH9zz77bF5gtONmT/XHY1rt+uNINQcI8D1je4PT78zEsWTlnApiN1gDBdkaE8BK++9//zvW0nyBOJ/PIU2WVpy569y5cwMIkOhPAOSMCAvWqtOnT6/mkiEBPpOuwo6bPTVpWr6tL612v6VAoaQRQBWDLTJzIybahxxyCOK1T58+aGySbid+fSyw2KXDsIpddNYfP/nJT4466iiv6iCJ/vi9FZSUsAigqCtgx82eWldDdWVOq926iFTmiAjgtwrrVfbGsH3gbCO73BELppiNUxHsmbMgYBuG3TX27XwQI+NOH6iqTiEgBFJGAFvk008/nW1V9i3wQsFsOhdyH9RwmPHnP/8ZXylsSGCkxwFpNlQSR1OiP3FIU66QgW6crqRMh5rPMAJYpPDUS2D5uMLEszyy3mp95L/hhhvYoMbUEqHJcYTGaM+TZYQDdJhjYVXBBwCjoOCEXVKtSPQnhWRW6sFuIXyusi6y0DZi5FBXEWVOFwG6Gw1VyVPTDzPW+sZpT3TisUspV46jSzniiCMwNoMAVCvRa/OXk41cNkvZxX3hhRdwdoKZpr+2GlAzFticucPpFlAfc8wxQJ1UoxL9SSGpeoRACggYG+uVVuPf/nCSqJGkBC7BG9loxbbYxcVK8gc/+AEe6Lp06VIxTx4jsQHFcg9rWk7VYWKbCAspiP6SGUoT+JeesHORSFdVq4RNIRSavO0cz8HammycLvnVr37FMRxMBYxrraFDhwazPE5jXnvtteQsd9EcNIH9PlYQnOEihszUT23UYARNkC3FgB1wl1SYshdPketqTbPOCz8cNcIbMyezmCfScQTMIKEfJ0yYYCrBmH333XfHgP3II480qmQs/csdL1OQUcRQ+cMf/mAKouTBlydV4Z/ZWBIHLsExMKOG3/zmN2bsPfPMMxRBs3TFFVeQH9Oaq666ygwqNO/GATgUcgaqGl91xfPpO/TQQy+66CJcWtVVMC+Z8aWKYRI7ARyhSIBmXuYYDw1bSrmkWqrNaZIdDUemXnrpJepnTKCrQbmJYYDxssBrhqEYMyDcutEELlkCX4yETznlFDN6KBJ20cxcifP9HNbllCb+mfk88PZydBNjA84Gc+SEV92R4HBxOzIuqeFWkg3bqUq2rSi1me7GcjF4Hn/8cQrijRVS+UVRQIBOpAdx08ZxPFKxGyESGc2AIYl/Eegca2JSSYWB42XOBpKNKcXvf/97shGmrBkJN910kxHijDQzlji2Gow9AgwhLhEi/7hx4xg5eIagOWrAzMY4dyOGbIy3G2+8kWzuDzeOXXPNNe71ZLwGLuTgbiV3Ir2fxaez9fhDgMP0vGBYMtAEU3vjq5kwrzSnLpn9oYq1tD58+HAmXywn2UQy7zlyH0f/zOOY7uEElApxC4GGkbkGe025dsxiwSHvSexqBiwwHowbV4Qv03PicfDHxx5HC+wccj0nnUskWhEzO2bSzWwAvyDM2TmOy47o3nvvTTZ2EUhCNOMtivyEzTBjfoDRofHqU/H84L333ousZ/vXWNSQHzJM2OQ3CmucHDC9YIKC0X1AfOzAn/70J4ZutcvmYlebwYK33HILSyh6h25yIS8FhY8LuSpbggCLXJyBmEimbMb7LhM05D6RLOd5H0qKoA4KYliYm2z8GhfNVMjBQj4DxnKDFTRTDAYZp/P5PBgfjUFxBTKCADru4DGSGsKMEx4C6IiR4ATMESFj28NoMcSzsc8gMZu0aI1QduEChO89c3M2SznsarLhx8YE0NUYX378ixMCExn8MvaMQwL2V83YY4sSUWUymIIYLPKt4kPC+OSbZPE4ElRbM8AcyCxlauZsAhmAjnWYIyMS/Y4AplwclyOYfxkicMTIa0m44iHAwJiPmV1AdLmLZl7dxYsXU4NZnlM563p8UTEN5DAkL1hQVoGMIxD2r4dAL6EWV5Qm5p///CcLBeN4p8TxMh+PYHQFBjx8KvD9Z8oa3+DhmsvHHl8CrG5MHuongAehgw8+GHUinxYsFwcNGhSuIV4YnWTwMYtXQ45KofFHredIsES/I4ApF2cQMEnn9AevE7MeXqSKBDHP4l5WhD5nW/BhUjGPieQCAB62gtHA4mSKzMYzBB7Y2Rgw28iW4kpKBQF6KvxEUaGwbYs0Ryhz0hVryMDxMh8MxApzfCb+xDO6+JfvBLf1GtYYCah0KMtGMSePavKL9okpKmOPdYmphEa7devG9wNlI9tR1QZtzZqDDCi+obDBdk1B640PsGGDgs6xXYl+RwBTLs4bixaeWRtOZXGaiD63IkG4UEfPw4hhtkXminnCE0OyMYcyCmLecPw1ourhBcMapGJZRaaFgOk1ZuLhh4l8CT3hzjVhdm5Q/aPZ//jjj9HpMX5Y1eF1mQ0eRotxvMzowscZ0pk9HqPGoVrGGDMAyuL6my9EoK4JNxFuHcMw6mHsMU2BSJYF/IuWn+uGENY0ijVaOH+MsGm6ONfGwan7UikFHz70E0IkRgfntEgD+GXKj+rGONGthhImHGhyOCFS7RWtVpA9ADYAUAiwGqiWJ168HRmX1Hj0RCllpypKDdnJgwRHA4PT72BIVHS8zMYPlvvh3mcfiHu7GEso62uyw5KUkYnuiI8KK0g+GFz5QCnm/kxH+ITUrCFKBlhgHVOQqQl6V5y7OUrRmFLY/gK4pEbp5nzlsaORL16SpdaOjEtqsnSGa7NTFc6psEHALCa44IXDIlinYC7s4yIX+oXlLA6QiwA7BzK4ZsBR9EvhU4ShIh6FQGoIYF6MKwJWnCwduEfIh9w3vKGSMkcNUmO1IQ3DY6B8c2lQs34X9CKV1TyxGkx2ZBxTqzXqO95xLuabvKZaP6MFHRT7xhxWcN8CzSxK7MlzNJqNd3ZKHEda88wyKcKEgAsCji9GvKYRQPEKqpQ7Auwkc4aARQYrjCbpywGhj4kUPMKpO1xS+LhjqBqEgBDIBAK4G+KsANsJWBNxu3omaEqCCHiBI/iCO3hMospmEv2JwFjoSi677LJC8y/m60HA92jBYAkXdRikcsr9ggsuSMo3XD0sJpkX+uECXuAIvuAuqdol+pNCsqD1cPSfc7/48Coo/2K7HgQaNlqwH+WkMUfbMErmsILxT1UPpennhWYoh364gBdjEZsgWRL9CYJZxKo4E4R/GM58Gi8xRYRAPEdGoJGjhRPs3HI1f/58NmA4RMbsZNSoUZEpTTMjdEItNEM59MMFvCROkER/4pAWrkLO+uLkh0OhnPk0TqQLB4EYjoxAg0cLR8m4qwunERxRxn81reO9HP8Txn1pZKq9Z4QeqII2KIROqIVmKA988CVOgUR/4pAWsUKu7sNJCybbKCVvv/32IkIgniMj0PjRwiW9SFWuB5g5cybLU1xUcciAjVOu7a3odzoyK64ZaR0aoAR6oAraoBA6oRaaXWu3lpddvxWeJBJZtaViaJgE7bXrwJKaTIEfQdz+cH6HgYsBcs3CdmT8pdYkLHYGO82xq20yBV1Gix2EepHHZxwu76dMmcKxWLxUcU0NljO4GGL60rVr1ygOKuz0VExlZTxv3jyamzVrFs7suCCB5jiEjIc7/KLX5RG9Xn7L6YkplewNu6SWk5j3GDsaeeeu5GWGHdw7c5UHN4HwUtm5syPjL9VOlUuqnWaXmptGWZfRYkfABXk+A9xjynQb76dz587FkpLLBrCdx5wGl4i4KsKTOe7t2HFFOnOStnXr1kzSWbsEPs/xiI7GBp9I3ELDLQVUyJ2XOBPF++miRYtQ3eCwCEdYmOtwYzCO85jd47u0e/fudYn7MAIu/Jp6JPrDeHoJu3eSF7ISqrT8ZaZizlVyiQfuuniXLE7lQMZOhWW1ZEe1Zqq9XX+pFo78NZqdml1Gi50Le4/by5ancvU5BjbIa6Q2Hu6Q4MhxfBAh0/Fwx4OIx/dc0Ju0jkdSPglckcSDQGfY87Xgm4HbO74ffEXY4jY3I5U3FyPGnV+d5o0Bu4rUQIBT5viVxURhww035M5YzBUqFgjenIqpDO6K8YlE2ptOpInySrxyVN5cXmIijpZGsoOM5sFLWs1GGUg57VZt89bsXGWIiQDXwvTt25crHotwY2pMjFTsfwjkdLTkVO6Dumb9/xt6+usBAS7LxuaHuzhmz549fvx4Dy2oyqaDgEZLQ/uSBUuMBxItpVxSLdXmNMmORpgpl5t3MBKgoVQemg5zUR7GjoJ9M3a3ypMsMfCSSqqlUcckO0f1Vq7REkYsWWzDNWcz7M6vtnnB0O/DkpDRE6WNyy+/nGy+nZxEocRHHq72Za/sr3/9K4Z0Ueq34+YvNQpt8fLYaa63To2WMGLJYhuuOZthd36l689mzzZBqvBDguk0Rp8333xzE2RPLCWKgEZLonBWqEyivwIoivKEAMcUzzzzzHPOOeeYY47x1ISqbTIIaLR47UqJfq/wqvJSBJjyc6k0G3o9evQoTdP/QuD/I6DR8v/xSPI/if4k0VRdURAYOHAgl04899xz99xzT5T8ylNkBDRaPPW+RL8nYFVtVQQ45IXGf6eddvrpT39aNdPKBPayqj32gi6p1Vp0j4cqeyUuZDfVstFHS1NFwBNfEv2egFW1lRE4++yzjzzySC5Qrekx0W5UV7n2hGLtTXtKTYj2JlVN9NHSpNhuCDM60tUQmNXISgT23Xdf3CXedNNNvNKCRAjYEdBosePjmCrR7wigikdFwNj1o+WPaNcftV7la4oIaLT47lUpfHwjXF/9HNG0q4OrpeITsVqS73jjjtHC5xNPPIEvQzzccpmX5L4FqHqTNFrqRUz5v0UgnuKS8paCLqmjR48OiMM1du/evfHrZGmrYUk4477rrrvw11pvi3Y06q0ta/nx4mB35HD11VeDwAEHHJAs5XZU/aUmy0W4NjvN4Zz5DfsbLUVAL9zv7vxmbtYPe3CFy18eDMDxgo3vX+5PIDLdB6HP5VP47E6XjHy13r9/f3y3XXjhhfLdlq+OS4VajZZGwp450W+Y54oDHlz+jhs3DkNANgaJ524zdAvcncZpoKeeesrkfOihh7bYYov27dufccYZTMyffvppSpkk8hx++OGE77jjjnPPPZcTpPjgJmCKUGrChAkm54gRI7ibDb/h+M/58ssvqYcmmONzfRqR5q7Bww47jMz777//u+++O2bMGEMJdXLzjqlEvyUI8M3mpmmWcYMHDy5J0r9CoAQBjZYSQLz/G15ERA9DliWzS+rDDz9cUhyFJgpixPHGG2+M5H3yyScvvvhidEEsC1566SUy33bbbY888gj7Qvfdd9/EiRPJZmhjpskta4QvvfRSsvHLVfcE+Kg88MADxx9/vEmliKmEg+OUxSsWE3xiqHDSpEnM9AkvX76cMAEEGRf3EHjwwQfJz0VrN954owUKkshsz5Dr1IpL+FdffZXLifjQck2dJ+7sqPpL9cQO1dpp9tduI2v2N1qKgF64p9z5jSmV7A27pJaL/iFDhuDvF+FOtUyxDf+Ifs56cAcIlxqbGAQx8/Rqon/XXXc12ZD7fEsIcxsnFXLL2kEHHYR8N6nUgMQ3ov+xxx4j0kzqWXC8//775DfXbBKAKi5Z5go3bvI0Zav9krlaUhOIL3+ZwZAdXS5o9MqdHVV/qf6YstPsr91G1uxvtBQBvXBPufObUYUPjAUP1xnj8REhywcAbY+JRz/DrJ8kbgIxMfvssw8T+aAUAVQ3wb+bb765CXNnJlN1wuwi8Mt9yvPnz0daGTMYvgFco2xyMmklYFr8/PPPTSS/HTt2vOGGG04//XSSTjnllFVXzQGGAfG+A6eeeioYHnfccYFGzl+LFsulVBq10BMlCZrt2fwxlVbNjRwtafGY2XazLrbQtLDZi+jv2rXrW2+9haQGSr5+s2bNQguELv7tt9824OITBhU8YSbyJob8JsBv8+bfnmDgBQviCay77rrsQzLT56E27hUxqUxdw9mCMNP/gw8+GAUUywKuaWYNESQVPMD+x/Dhw4cOHWp2R7yiEZ4BlYdTbLqcmERivHKUSuWNHC2pMJjxRjMq+pmJ8zBz5EQfup1jjz12l112AUpkCnN5rvsgjOjv06fP5MmTyYY4PumkkxDEHTp0WLp06dSpU7kVBDEUBX2WC3wzFi9evGzZstNOO40ZfcVSZnaP8pr1R7du3dD8MHZ79epFoxXzFypyxYoVm266KbcwvvDCC6yECsW7mK0XAY2WehHzkT9zot9MybusfPbbbz92YlHfo6Vp0aIFhiLI97Zt2yLxb731VibyWAVgw4O6v127dgj9AQMGoBRClO+xxx5IIrT2FSELz/oJY/PD7i4aIcyE0PZce+215aXIBg1sGGy99dZsYLLbjOUP+UeOHInxYnn+QsVwUIuVE+ovbJ+22WabQvEuZutFIJXRwtvKyj4gFcUAw/W6664LYiwBpnqjRo2yZLAkuZS1VJtMUrzVKG1bCrqkWqoliUk3Vj1oZsLZmOCXWJIwhUc7H85jD3/99ddvvPEGG7YE7DkZuCbDggULuEjIntmk2tGIUkOW83Tu3BkG+/Xrlyki7Zj7S/UHgp1mf+0mW7O/0WLHB7M9pnfB241JCPmxQ4vCHfblxhQwSuaSPC5lS6oq+dfOb0nmiv/aJHjFAibS3rBLqqXRnCbZ0cgpU2GysZoN/5uFsB1zf6n+eLfT7K/dxGv2NFrs+JgzodOnTzfsnHzyyZwWIoz7WHQGWP0dffTRzCCJQRnF4R5i0B9gxYchCXKfyjnWQyrnVIhHBc1cZ+HChcTcfffdl1xyCQYmJ554IvNCTheZsgj9uspSFddXYIFCc0AEGcRYHju/loJBkkR/AIWvgHsn+aKs6dZrx9xfqj9E7TT7azcvNdfEB+UwtuCwg1RFdg8bNmzJkiWUwlQPH1M4GjFm4mwQksqRIA4JkTpnzhw8kSDNZ8yYQZiYs846CxeE5Ef9yzLiyiuvJBJ/Mxh9XH/99QSwGjeRfAmily0/XWRHnkbtGWqmxixvb9gltSbFuctgRyN37OSCYDvm/lL9gWOn2V+7eam5Jj633HKL0fkg6MmMaTi3PwZaIJS9RKIoxngE7ZDhms8DmQOlzUUXXWTWCqRy0If8qPKR8nwYsDwkEgMHPjBopPkkcPIUvXT0suWni+zI1+TXXpzUzG3zwpIeISAEhECyCGAPgun2zJkzx44dy5wdef3aa68Rg+UeRhyYldAcOh9UQ9iRm6YxKunZs2dABvmNnSExnTp14he7Bn6xKzHmf3wbWrVqhaUDNiOYurVu3Tp6WQwaK54uCmpIPCDRnzikqlAICIHMIYAFIKbY+O/inNBRRx0FfdjsMcf/73GeTz7BLeO0adOwCeSIaHCoE68t5sy/YQaZHhwVYouYSOz9wnxSCe5eWAqgFEKPZFYGEctWO10Urj/ZsER/sniqtqwgwFSu2pMVEkVHYxFgL/eaa67h3A+zflrefffd2fhFuDNOMBY/9NBDmbzze++992K/h2afrV3umSCSEz8cJ8KmHMnOF4KyeJFBRbPGGmuEOWC/F9PwDTfckCtIiefUZ/SyEU8XhZtzDddUCVXMQKsV402kS6ql2pwm2dHIKVO5JtveIy6p/mCxU+Wv3bzUHAUf9l3JxpQ/YOqcc84hhgf9D1eHEs8eAPp6YtjsxU0kMRzeJJUFARp8Tg6ZJGKwDiL1qquuCipkp9ekUta4BYteFvN0FiUU52EtwtIhILJigGwV46NHrkLWlc3V98N30lLQJbU+OvKQ245GHjhoajTae8Ql1R9Sdqr8tZuXmmPjg74ecY92vmXLlgGz7AEg3I2bLyKZ8uNRhlNghFEH4beRM2Jh3zBBQbw9smIgtU2bNiYyelkkKhKfA0l8ZmAnqLNiIDa/QW02CR5kKg/YG3ZJLW8r7zF2NPLOXR7pt/eIS6o/NOxU+Ws3LzUXDR93fqXrz8vYFp1CQAgIgcQQkOhPDEpVJASEgBDICwIS/XnpKdEpBISAEEgMAYn+xKBURUJACAiBvCAg0Z+XnhKdQkAICIHEEJDoTwxKVSQEhIAQyAsC315bWC/FWBfVW0T5hUBGENDozUhHiIy0EIgp+i3nueBE71Va3al2oyCg0RsFJeVp2ghI4dO0+1fcCQEhIAQqICDRXwEURQkBISAEmjYCEv1Nu3/FnRAQAkKgAgIS/RVAUZQQEAL5QmDQoEH5ItiRWnd+Y7pvs9Ntdy1kT7XXnMfUovGbxz4K02zvL3tquJ5kw2m1mywX/mpbb731Zs+e3b59e39NZKdmXI1yS4y5Iyw2VZr1x4ZOBYWAEMgKAlydOHny5KxQ45kOOA2uiozdlER/bOhUUAgIgawgwMVYw4cPzwo1numAU3MRmEs7Ev0u6KmsEBACmUCgf//+3I01ePDgTFDjkwh4hFP4dWwk5pGumq2imqyZRxmEQDYR0OjNZr/YqRo6dOhee+3Vrl27E044wZ4zv6l33nnnsGHDHn/8cXcWvIh+nZZ07xjVkBYCGr1pIe/Y7mabbTZ69Ogjjjhi8eLF3JDuWFsGi19xxRUjRoyARzh1J08KH3cMVYMQEAKZQGDHHXfkbvRnnnmmT58+L7/8ciZoSoIIeIEj+II7eEyiymYS/YnAqEqEgBDIBAKdOnWaMGFCr169tt9++wsuuIB70jNBVlwioB8u4AWO4Avu4tZUWk6ivxQR/S8EhEDeETjvvPNeeeWV5cuXt23b9vzzz1+wYEHuOIJmKId+uIAXOEqWBYn+ZPFUbUJACGQCgY4dOw4ZMmT+/Pls2u+www6HHHLIqFGjMkFZLSKgE2qhGcqhHy7gpVahutO9nOa1UwE/9p00e/HcpRaN39x1UF0Ep9WbabVbFziZzczE+f777x85cuSMGTOQqn379kV13qJFi+wQvGLFiokTJ44bN27MmDFo8wcMGDBw4MBWrVr5ozAFKVy0QVw0fv0N1izUnFZvptVuFjBPkIY333xz7Nix48ePZ7+0d+/eGIP27NmzR48eCTZRV1XPPvvs1KlTMdacNGkS2vwDDzywX79+CSr0LcRI9FvASSZJL20yOGajlrR6M612s4F68lR89NFHjz766JQpU6ZNmzZnzpydd96ZufZ222231VZbde3adc0110y+yWbNPv7443nz5tHcrFmzWH9Mnz6d5nbbbbc999xz7733XnvttX00Wq1Oif5qyCQWr5c2MSgzUFFavZlWuxmA3DsJfAaef/75mTNnvvjii3PnzsWScq211sJ2ntl3hw4dNtpoo/XXX5+TYuy4Ip05Sdu6deuWLVuiL1pttdUMcV999RUam88+++zTTz9dtmwZFX7wwQfvvffe0qVLFy1a9M4777DaeP311zHX6dKly5Zbbrntttt269ate/fuDRb3YSjTEf1hCooQLtTeRtPuUERwWgxqFDUM+YULF2Jgg7xGai9ZsgQJjhz/8MMPkemfrHwQ8V988UXQI4yK1VdfnU9Cm5UPAn2dddbha8E3Y4MNNuD7wVekc+fOm2yyScNYqNlQCqK/Jk3KIASEgBDIBQJI/xRnAy4QSfS7oKeyQkAICIFcIiC7/lx2m4gWAkJACLggINHvgp7KCgEhIARyiYBEfy67TUQLASEgBFwQkOh3QU9lhYAQEAK5RECiP5fdJqKFgBAQAi4ISPS7oKeyQkAICIFcIiDRn8tuE9FCQAgIARcEJPpd0FNZISAEhEAuEZDoz2W3iWghIASEgAsCEv0u6KmsEBACQiCXCEj057LbRLQQEAJCwAUBiX4X9FRWCAgBIZBLBCT6c9ltIloICAEh4IKARL8LeiorBISAEMglAhL9uew2ES0EhIAQcEFAot8FPZUVAkJACOQSAYn+XHabiBYCQkAIuCAg0e+CnsoKASEgBHKJgER/LrtNRAsBISAEXBCQ6HdBT2WFgBAQArlEQKI/l90mooWAEBACLghI9Lugp7JCQAgIgVwiINGfy24T0UJACAgBFwQk+l3QU1khIASEQC4RkOjPZbeJaCEgBISACwIS/S7oqawQEAJCIJcISPTnsttEtBAQAkLABYH/AzFaA2FwdiyIAAAAAElFTkSuQmCC)\n", + "\n", + "Then when we ask a question, we retrieve the relevant passages from the vector store and use langchain and OpenAI to provide a summary for the question." + ], + "metadata": { + "id": "tZnIXBfrRpex" + } + }, + { + "cell_type": "markdown", + "source": [ + "## Install packages and import modules\n", + "\n", + "To get started, we'll need to connect to our Elastic deployment using the Python client.\n", + "Because we're using an Elastic Cloud deployment, we'll use the **Cloud ID** to identify our deployment.\n", + "\n", + "First we need to install the `elasticsearch` Python client." + ], + "metadata": { + "id": "GyAst2W-VpHb" + } + }, + { + "cell_type": "code", + "source": [ + "!pip install -qU langchain jq openai elasticsearch tiktoken" + ], + "metadata": { + "id": "33A-cP-XvFCr" + }, + "execution_count": null, + "outputs": [] + }, { "cell_type": "code", "source": [ @@ -19,15 +62,20 @@ "outputs": [] }, { - "cell_type": "code", + "cell_type": "markdown", "source": [ - "!pip install -qU langchain jq openai elasticsearch tiktoken" + "## Create Elastic Cloud deployment\n", + "\n", + "If you don't have an Elastic Cloud deployment, sign up [here](https://cloud.elastic.co/registration?fromURI=%2Fhome) for a free trial.\n", + "\n", + "- Go to the [Create deployment](https://cloud.elastic.co/deployments/create) page\n", + "- Select **Create deployment**\n", + "\n", + "Now we can instantiate the [Elasticsearch python client](https://www.elastic.co/guide/en/elasticsearch/client/python-api/current/index.html), providing the cloud id and password in your deployment." ], "metadata": { - "id": "33A-cP-XvFCr" - }, - "execution_count": null, - "outputs": [] + "id": "qtEOCsCLWCZp" + } }, { "cell_type": "code", @@ -54,7 +102,7 @@ "embeddings = OpenAIEmbeddings(openai_api_key=\"\")\n", "\n", "client = Elasticsearch(\n", - " cloud_id=\"My_deployment:dXMtY2VudHJhbDEuZ2NwLmNsb3VkLmVzLmlvOjQ0MyQxYTU2YWQyMTU4N2M0NGQzOTMwOTMyZWI5ZmExZDhlOCRiNGZkMDBhYTNlZjI0ODdiYmU5OGQ5N2YyNTBlYWUyYw==\",\n", + " cloud_id=\"\",\n", " basic_auth=(\"elastic\", \"\")\n", ")\n" ], @@ -64,6 +112,16 @@ "execution_count": null, "outputs": [] }, + { + "cell_type": "markdown", + "source": [ + "### Setup the Index\n", + "Next define the mapping for the passages. Langchain relies on two fields: text and vector." + ], + "metadata": { + "id": "kRPxrJnXWfMD" + } + }, { "cell_type": "code", "source": [ @@ -92,25 +150,27 @@ "outputs": [] }, { - "cell_type": "code", + "cell_type": "markdown", "source": [ - "# get the embeddings from openAI\n", - "\n", - "texts = []\n", - "for passage in docs:\n", - " texts.append(passage.page_content)\n", + "### Enrich Passages with OpenAI Model\n", "\n", - "textEmbeddings = embeddings.embed_documents(texts)\n" + "Next we are going to enrich each passage with an embedding from OpenAI." ], "metadata": { - "id": "Qc1LXk-rOzNR" - }, - "execution_count": 132, - "outputs": [] + "id": "RmCUl0hxW4lG" + } }, { "cell_type": "code", "source": [ + "# get the embeddings from openAI\n", + "\n", + "texts = []\n", + "for passage in docs:\n", + " texts.append(passage.page_content)\n", + "\n", + "textEmbeddings = embeddings.embed_documents(texts)\n", + "\n", "# persist the passage documents into elasticsearch\n", "\n", "actions = []\n", @@ -123,19 +183,19 @@ " \"metadata\": passage.metadata\n", " })\n", "\n", - "client.bulk(operations=actions)\n", - "\n" + "client.bulk(operations=actions)\n" ], "metadata": { - "id": "4hkOZAIqQ5t5" + "id": "Qc1LXk-rOzNR" }, - "execution_count": null, + "execution_count": 132, "outputs": [] }, { "cell_type": "markdown", "source": [ - "Querying" + "## Asking a question\n", + "Now that we have the passages stored in Elasticsearch, we can now ask a question to get the relevant passages." ], "metadata": { "id": "rXJH_MiWejv7" @@ -157,25 +217,25 @@ "llm = OpenAI(openai_api_key=\"\")\n", "\n", "qa = RetrievalQA.from_chain_type(\n", - " llm=llm, chain_type=\"stuff\", retriever=retriever, return_source_documents=True\n", + " llm=llm,\n", + " chain_type=\"stuff\",\n", + " retriever=retriever,\n", + " return_source_documents=True\n", ")\n", "\n", "ans = qa({\"query\": \"what is the nasa sales team?\"})\n", "\n", - "print(\"answer\")\n", - "print(ans[\"result\"])\n", - "\n", - "print(\"sources\")\n", + "print(ans['answer'])\n", "for doc in ans[\"source_documents\"]:\n", " print(doc.metadata[\"name\"])\n", " print(doc.page_content)" ], "metadata": { - "id": "OobeBT6rek7Q", - "outputId": "532e02ba-afb3-46fe-ca2a-35a2ad4a525a", "colab": { "base_uri": "https://localhost:8080/" - } + }, + "id": "OobeBT6rek7Q", + "outputId": "532e02ba-afb3-46fe-ca2a-35a2ad4a525a" }, "execution_count": 134, "outputs": [ From 46833f1783a0606b2cb73b9d5ef3a471d44d705d Mon Sep 17 00:00:00 2001 From: Joseph McElroy Date: Tue, 4 Jul 2023 20:40:41 +0100 Subject: [PATCH 12/54] updates to cosine --- .../generative-ai/question-answering.ipynb | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/colab-notebooks-examples/generative-ai/question-answering.ipynb b/colab-notebooks-examples/generative-ai/question-answering.ipynb index 4b53f7f7..2155eb81 100644 --- a/colab-notebooks-examples/generative-ai/question-answering.ipynb +++ b/colab-notebooks-examples/generative-ai/question-answering.ipynb @@ -99,11 +99,11 @@ "text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=0)\n", "docs = text_splitter.create_documents(content, metadatas=metadata)\n", "\n", - "embeddings = OpenAIEmbeddings(openai_api_key=\"\")\n", + "embeddings = OpenAIEmbeddings(openai_api_key=\"OPENAI_KEY\")\n", "\n", "client = Elasticsearch(\n", - " cloud_id=\"\",\n", - " basic_auth=(\"elastic\", \"\")\n", + " cloud_id=\"CLOUD_ID\",\n", + " basic_auth=(\"elastic\", \"PASSWORD\")\n", ")\n" ], "metadata": { @@ -133,8 +133,8 @@ " \"vector\": {\n", " \"type\": \"dense_vector\",\n", " \"dims\": 1536,\n", - " \"index\": \"true\",\n", - " \"similarity\": \"dot_product\"\n", + " \"index\": True,\n", + " \"similarity\": \"cosine\"\n", " }\n", " }\n", " }\n", @@ -214,7 +214,7 @@ "\n", "retriever = db.as_retriever()\n", "\n", - "llm = OpenAI(openai_api_key=\"\")\n", + "llm = OpenAI(openai_api_key=\"OPENAI_KEY\")\n", "\n", "qa = RetrievalQA.from_chain_type(\n", " llm=llm,\n", From 4d1ff9e2da7a07f17c0eb62b90d0af96eb4fb9ff Mon Sep 17 00:00:00 2001 From: Joseph McElroy Date: Tue, 4 Jul 2023 20:43:39 +0100 Subject: [PATCH 13/54] updates to q/a --- .../generative-ai/question-answering.ipynb | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/colab-notebooks-examples/generative-ai/question-answering.ipynb b/colab-notebooks-examples/generative-ai/question-answering.ipynb index 2155eb81..0e0fd81e 100644 --- a/colab-notebooks-examples/generative-ai/question-answering.ipynb +++ b/colab-notebooks-examples/generative-ai/question-answering.ipynb @@ -176,10 +176,9 @@ "actions = []\n", "for i, passage in enumerate(docs):\n", " actions.append({\"index\": {\"_index\": \"workplace_index\"}})\n", - " passageEmbedding = textEmbeddings[i]\n", " actions.append({\n", " \"text\": passage.page_content,\n", - " \"vector\":passageEmbedding,\n", + " \"vector\": textEmbeddings[i],\n", " \"metadata\": passage.metadata\n", " })\n", "\n", From a40a54638c0aebd4a456ce5e20bcbe97273024f7 Mon Sep 17 00:00:00 2001 From: Joseph McElroy Date: Tue, 4 Jul 2023 21:00:09 +0100 Subject: [PATCH 14/54] updates to q/a --- .../generative-ai/question-answering.ipynb | 80 ++++++++++++++----- 1 file changed, 62 insertions(+), 18 deletions(-) diff --git a/colab-notebooks-examples/generative-ai/question-answering.ipynb b/colab-notebooks-examples/generative-ai/question-answering.ipynb index 0e0fd81e..b98a4e11 100644 --- a/colab-notebooks-examples/generative-ai/question-answering.ipynb +++ b/colab-notebooks-examples/generative-ai/question-answering.ipynb @@ -58,7 +58,7 @@ "metadata": { "id": "J8-93TiJsNyK" }, - "execution_count": 29, + "execution_count": 2, "outputs": [] }, { @@ -77,6 +77,39 @@ "id": "qtEOCsCLWCZp" } }, + { + "cell_type": "code", + "source": [ + "from elasticsearch import Elasticsearch\n", + "\n", + "client = Elasticsearch(\n", + " cloud_id=\"CLOUD_ID\",\n", + " basic_auth=(\"elastic\", \"PASSWORD\")\n", + ")\n" + ], + "metadata": { + "id": "a-t1mglib54F" + }, + "execution_count": 3, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "### Split Documents into Passages\n", + "\n", + "With the workplace dataset, we need to split the content of each document into smaller passages. Models have a limit number of tokens length that they can handle. By splitting up long documents into smaller chunks, we can get round that limitation.\n", + "\n", + "Also if the whole document was represented by a single vector, it may lead to the inability to surface relevant content.\n", + "\n", + "Here we are chunking documents into 500 token passages.\n", + "\n", + "Here we are using a simple splitter but Langchain offers more advanced splitters to reduce the chace of context being lost." + ], + "metadata": { + "id": "p0cQFDl1b9v4" + } + }, { "cell_type": "code", "source": [ @@ -84,7 +117,6 @@ "from langchain.text_splitter import CharacterTextSplitter\n", "from langchain.vectorstores import ElasticVectorSearch\n", "from langchain.embeddings import OpenAIEmbeddings\n", - "from elasticsearch import Elasticsearch\n", "\n", "metadata = []\n", "content = []\n", @@ -99,12 +131,7 @@ "text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=0)\n", "docs = text_splitter.create_documents(content, metadatas=metadata)\n", "\n", - "embeddings = OpenAIEmbeddings(openai_api_key=\"OPENAI_KEY\")\n", - "\n", - "client = Elasticsearch(\n", - " cloud_id=\"CLOUD_ID\",\n", - " basic_auth=(\"elastic\", \"PASSWORD\")\n", - ")\n" + "embeddings = OpenAIEmbeddings(openai_api_key=\"OPENAI_KEY\")" ], "metadata": { "id": "dbHEoTF6vBXE" @@ -187,7 +214,7 @@ "metadata": { "id": "Qc1LXk-rOzNR" }, - "execution_count": 132, + "execution_count": null, "outputs": [] }, { @@ -224,7 +251,9 @@ "\n", "ans = qa({\"query\": \"what is the nasa sales team?\"})\n", "\n", - "print(ans['answer'])\n", + "print(\"---- answer ----\")\n", + "print(ans[\"result\"])\n", + "print(\"---- sources ----\")\n", "for doc in ans[\"source_documents\"]:\n", " print(doc.metadata[\"name\"])\n", " print(doc.page_content)" @@ -234,17 +263,17 @@ "base_uri": "https://localhost:8080/" }, "id": "OobeBT6rek7Q", - "outputId": "532e02ba-afb3-46fe-ca2a-35a2ad4a525a" + "outputId": "ba7b3a7a-253e-4e7f-83b9-cec07ebdac09" }, - "execution_count": 134, + "execution_count": 12, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ - "answer\n", - " The NASA Sales team is the North America South America regional sales team. It is composed of dedicated account managers, sales representatives, and support staff, and is led by Laura Martinez (Area Vice-President of North America) and Gary Johnson (Area Vice-President of South America).\n", - "sources\n", + "---- answer ----\n", + " The NASA Sales Team is a regional sales team consisting of dedicated account managers, sales representatives, and support staff, led by their respective Area Vice-Presidents (Laura Martinez and Gary Johnson). They are responsible for identifying and pursuing new business opportunities, nurturing existing client relationships, and ensuring customer satisfaction.\n", + "---- sources ----\n", "Sales Organization Overview\n", "Our sales organization is structured to effectively serve our customers and achieve our business objectives across multiple regions. The organization is divided into the following main regions:\n", "\n", @@ -268,10 +297,25 @@ "client.indices.delete(index=\"workplace_index\")" ], "metadata": { - "id": "SOeP4-DLnwDB" + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "SOeP4-DLnwDB", + "outputId": "acff2056-eec6-46e5-cddc-9af9050c046e" }, - "execution_count": null, - "outputs": [] + "execution_count": 6, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "ObjectApiResponse({'acknowledged': True})" + ] + }, + "metadata": {}, + "execution_count": 6 + } + ] } ], "metadata": { From 0a4df8846316e9db5b8704e081c116851743c316 Mon Sep 17 00:00:00 2001 From: Joseph McElroy Date: Wed, 5 Jul 2023 09:56:48 +0100 Subject: [PATCH 15/54] remove notebook --- .../generative-ai/summarise-search-results-with-openai.ipynb | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 colab-notebooks-examples/generative-ai/summarise-search-results-with-openai.ipynb diff --git a/colab-notebooks-examples/generative-ai/summarise-search-results-with-openai.ipynb b/colab-notebooks-examples/generative-ai/summarise-search-results-with-openai.ipynb deleted file mode 100644 index e69de29b..00000000 From 8105bd8235542afd45130d6fad725791c05e3ec4 Mon Sep 17 00:00:00 2001 From: Yan Savitski Date: Wed, 5 Jul 2023 15:03:55 +0200 Subject: [PATCH 16/54] Quering Filtering --- .../01-keyword-querying-filtering.ipynb | 191 ++++++++++++++++++ 1 file changed, 191 insertions(+) diff --git a/colab-notebooks-examples/search/01-keyword-querying-filtering.ipynb b/colab-notebooks-examples/search/01-keyword-querying-filtering.ipynb index e69de29b..5df6b1da 100644 --- a/colab-notebooks-examples/search/01-keyword-querying-filtering.ipynb +++ b/colab-notebooks-examples/search/01-keyword-querying-filtering.ipynb @@ -0,0 +1,191 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [], + "authorship_tag": "ABX9TyNcb0Lf3LidUnBrYcY/mSC9", + "include_colab_link": true + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "cSj1acY8uPR2", + "outputId": "2c4f9385-17a8-4de0-a843-da7d0d3babb9" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\u001b[?25l \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.0/393.8 kB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m393.8/393.8 kB\u001b[0m \u001b[31m22.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.0/86.0 kB\u001b[0m \u001b[31m9.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.2/7.2 MB\u001b[0m \u001b[31m71.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m61.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m268.5/268.5 kB\u001b[0m \u001b[31m23.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m59.5/59.5 kB\u001b[0m \u001b[31m6.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.8/7.8 MB\u001b[0m \u001b[31m82.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m69.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h Building wheel for sentence-transformers (setup.py) ... \u001b[?25l\u001b[?25hdone\n" + ] + } + ], + "source": [ + "!pip install -qU elasticsearch sentence-transformers==2.2.2\n", + "from elasticsearch import Elasticsearch\n", + "# Create the client instance\n", + "client = Elasticsearch(\n", + " cloud_id=\"\",\n", + " basic_auth=(\"elastic\", \"\")\n", + ")\n", + "print(client.info())" + ] + }, + { + "cell_type": "markdown", + "source": [ + "[Quick Start](https://github.com/yansavitski/elasticsearch-labs/blob/notebooks-guides/colab-notebooks-examples/search/00-quick-start.ipynb) || **Keyword Quering Filtering** || [Hubrid search with RRF](https://github.com/yansavitski/elasticsearch-labs/blob/notebooks-guides/colab-notebooks-examples/search/02-hybrid-search-with-rrf.ipynb) || [ELSER](https://github.com/yansavitski/elasticsearch-labs/blob/notebooks-guides/colab-notebooks-examples/search/03-ELSER.ipynb)" + ], + "metadata": { + "id": "0wgbLWl2udLQ" + } + }, + { + "cell_type": "markdown", + "source": [ + "# Keyword Quering Filtering\n", + "\n", + "This interactive notebook will introduce you to the Elasticsearch queries, using the official Elasticsearch Python client. Before getting start this section we highly recomented firstly finish [quick start](https://github.com/yansavitski/elasticsearch-labs/blob/notebooks-guides/colab-notebooks-examples/search/00-quick-start.ipynb)" + ], + "metadata": { + "id": "83LdOUCwwHzs" + } + }, + { + "cell_type": "markdown", + "source": [ + "## Querying\n", + "In the query context, a query clause answers the question “How well does this document match this query clause?” Besides deciding whether or not the document matches, the query clause also calculates a relevance score in the _score metadata field.\n", + "\n", + "### Full text queries\n", + "\n", + "The full text queries enable you to search analyzed text fields such as the body of an email. The query string is processed using the same analyzer that was applied to the field during indexing.\n", + "\n", + "* **match**.\n", + " The standard query for performing full text queries, including fuzzy matching and phrase or proximity queries.\n", + "* **multi-match**.\n", + " The multi-field version of the match query.\n", + "\n", + "### Compound query\n", + "* **boosting query**.\n", + " Return documents which match a positive query, but reduce the score of documents which also match a negative query." + ], + "metadata": { + "id": "22onltbgxxGm" + } + }, + { + "cell_type": "markdown", + "source": [ + "### Match query\n", + "Returns documents that match a provided text, number, date or boolean value. The provided text is analyzed before matching.\n", + "\n", + "The match query is the standard query for performing a full-text search, including options for fuzzy matching.\n", + "\n", + "[Read more](https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-match-query.html#match-query-ex-request)\n", + "\n" + ], + "metadata": { + "id": "clXQwoFQ6x61" + } + }, + { + "cell_type": "code", + "source": [ + "def pretty_response(response):\n", + " for hit in response['hits']['hits']:\n", + " id = hit['_id']\n", + " publication_date = hit['_source']['publish_date']\n", + " score = hit['_score']\n", + " title = hit['_source']['title']\n", + " summary = hit['_source']['summary']\n", + " pretty_output = (f\"\\nID: {id}\\nPublication date: {publication_date}\\nTitle: {title}\\nSummary: {summary}\\nScore: {score}\")\n", + " print(pretty_output)\n", + "\n", + "response = client.search(index=\"book_index\", query={\"match\": {\"summary\": \"guide\"}})\n", + "\n", + "pretty_response(response)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "q_OE0XVx6_qX", + "outputId": "ce30d7f3-e683-4712-879f-00c68bfa15a0" + }, + "execution_count": 12, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n", + "ID: 3cXgIYkBfxlbyhU5Krfc\n", + "Publication date: 2019-10-29\n", + "Title: The Pragmatic Programmer: Your Journey to Mastery\n", + "Summary: A guide to pragmatic programming for software engineers and developers\n", + "Score: 0.7042277\n", + "\n", + "ID: 3sXgIYkBfxlbyhU5Krfc\n", + "Publication date: 2019-05-03\n", + "Title: Python Crash Course\n", + "Summary: A fast-paced, no-nonsense guide to programming in Python\n", + "Score: 0.7042277\n", + "\n", + "ID: 5MXgIYkBfxlbyhU5Krfd\n", + "Publication date: 2011-05-13\n", + "Title: The Clean Coder: A Code of Conduct for Professional Programmers\n", + "Summary: A guide to professional conduct in the field of software engineering\n", + "Score: 0.6771651\n", + "\n", + "ID: 4MXgIYkBfxlbyhU5Krfc\n", + "Publication date: 2008-08-11\n", + "Title: Clean Code: A Handbook of Agile Software Craftsmanship\n", + "Summary: A guide to writing code that is easy to read, understand and maintain\n", + "Score: 0.62883455\n", + "\n", + "ID: 48XgIYkBfxlbyhU5Krfd\n", + "Publication date: 1994-10-31\n", + "Title: Design Patterns: Elements of Reusable Object-Oriented Software\n", + "Summary: Guide to design patterns that can be used in any object-oriented language\n", + "Score: 0.62883455\n" + ] + } + ] + } + ] +} \ No newline at end of file From 58d41dd57e86650ad1e8e6673e2dd82020b7ea4e Mon Sep 17 00:00:00 2001 From: Yan Savitski Date: Wed, 5 Jul 2023 15:08:08 +0200 Subject: [PATCH 17/54] Update quering filtering --- .../01-keyword-querying-filtering.ipynb | 67 ++++++------------- 1 file changed, 22 insertions(+), 45 deletions(-) diff --git a/colab-notebooks-examples/search/01-keyword-querying-filtering.ipynb b/colab-notebooks-examples/search/01-keyword-querying-filtering.ipynb index 5df6b1da..09428148 100644 --- a/colab-notebooks-examples/search/01-keyword-querying-filtering.ipynb +++ b/colab-notebooks-examples/search/01-keyword-querying-filtering.ipynb @@ -4,7 +4,7 @@ "metadata": { "colab": { "provenance": [], - "authorship_tag": "ABX9TyNcb0Lf3LidUnBrYcY/mSC9", + "authorship_tag": "ABX9TyNrwNQDSZ82rnrfpRNNISWT", "include_colab_link": true }, "kernelspec": { @@ -85,6 +85,24 @@ "id": "83LdOUCwwHzs" } }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "cSj1acY8uPR2" + }, + "outputs": [], + "source": [ + "!pip install -qU elasticsearch sentence-transformers==2.2.2\n", + "from elasticsearch import Elasticsearch\n", + "# Create the client instance\n", + "client = Elasticsearch(\n", + " cloud_id=\"\",\n", + " basic_auth=(\"elastic\", \"\")\n", + ")\n", + "print(client.info())" + ] + }, { "cell_type": "markdown", "source": [ @@ -141,51 +159,10 @@ "pretty_response(response)" ], "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "q_OE0XVx6_qX", - "outputId": "ce30d7f3-e683-4712-879f-00c68bfa15a0" + "id": "q_OE0XVx6_qX" }, - "execution_count": 12, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "\n", - "ID: 3cXgIYkBfxlbyhU5Krfc\n", - "Publication date: 2019-10-29\n", - "Title: The Pragmatic Programmer: Your Journey to Mastery\n", - "Summary: A guide to pragmatic programming for software engineers and developers\n", - "Score: 0.7042277\n", - "\n", - "ID: 3sXgIYkBfxlbyhU5Krfc\n", - "Publication date: 2019-05-03\n", - "Title: Python Crash Course\n", - "Summary: A fast-paced, no-nonsense guide to programming in Python\n", - "Score: 0.7042277\n", - "\n", - "ID: 5MXgIYkBfxlbyhU5Krfd\n", - "Publication date: 2011-05-13\n", - "Title: The Clean Coder: A Code of Conduct for Professional Programmers\n", - "Summary: A guide to professional conduct in the field of software engineering\n", - "Score: 0.6771651\n", - "\n", - "ID: 4MXgIYkBfxlbyhU5Krfc\n", - "Publication date: 2008-08-11\n", - "Title: Clean Code: A Handbook of Agile Software Craftsmanship\n", - "Summary: A guide to writing code that is easy to read, understand and maintain\n", - "Score: 0.62883455\n", - "\n", - "ID: 48XgIYkBfxlbyhU5Krfd\n", - "Publication date: 1994-10-31\n", - "Title: Design Patterns: Elements of Reusable Object-Oriented Software\n", - "Summary: Guide to design patterns that can be used in any object-oriented language\n", - "Score: 0.62883455\n" - ] - } - ] + "execution_count": null, + "outputs": [] } ] } \ No newline at end of file From 2b178fe9c421f063403bc335ed5a01b1c323051d Mon Sep 17 00:00:00 2001 From: Yan Savitski Date: Wed, 5 Jul 2023 15:11:52 +0200 Subject: [PATCH 18/54] Update filtering --- .../search/01-keyword-querying-filtering.ipynb | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/colab-notebooks-examples/search/01-keyword-querying-filtering.ipynb b/colab-notebooks-examples/search/01-keyword-querying-filtering.ipynb index 09428148..d8146180 100644 --- a/colab-notebooks-examples/search/01-keyword-querying-filtering.ipynb +++ b/colab-notebooks-examples/search/01-keyword-querying-filtering.ipynb @@ -4,8 +4,7 @@ "metadata": { "colab": { "provenance": [], - "authorship_tag": "ABX9TyNrwNQDSZ82rnrfpRNNISWT", - "include_colab_link": true + "authorship_tag": "ABX9TyN01omVTQq7mKw4jc4XyLDm" }, "kernelspec": { "name": "python3", @@ -79,6 +78,8 @@ "source": [ "# Keyword Quering Filtering\n", "\n", + "\"Open\n", + "\n", "This interactive notebook will introduce you to the Elasticsearch queries, using the official Elasticsearch Python client. Before getting start this section we highly recomented firstly finish [quick start](https://github.com/yansavitski/elasticsearch-labs/blob/notebooks-guides/colab-notebooks-examples/search/00-quick-start.ipynb)" ], "metadata": { @@ -97,8 +98,8 @@ "from elasticsearch import Elasticsearch\n", "# Create the client instance\n", "client = Elasticsearch(\n", - " cloud_id=\"\",\n", - " basic_auth=(\"elastic\", \"\")\n", + " cloud_id=\"\",\n", + " basic_auth=(\"elastic\", \"\")\n", ")\n", "print(client.info())" ] From 598ae4e240333a25203360cab87fe5464479a4a7 Mon Sep 17 00:00:00 2001 From: Yan Savitski Date: Wed, 5 Jul 2023 15:26:17 +0200 Subject: [PATCH 19/54] Update file --- .../01-keyword-querying-filtering.ipynb | 40 ++----------------- 1 file changed, 4 insertions(+), 36 deletions(-) diff --git a/colab-notebooks-examples/search/01-keyword-querying-filtering.ipynb b/colab-notebooks-examples/search/01-keyword-querying-filtering.ipynb index d8146180..1d245ece 100644 --- a/colab-notebooks-examples/search/01-keyword-querying-filtering.ipynb +++ b/colab-notebooks-examples/search/01-keyword-querying-filtering.ipynb @@ -3,8 +3,7 @@ "nbformat_minor": 0, "metadata": { "colab": { - "provenance": [], - "authorship_tag": "ABX9TyN01omVTQq7mKw4jc4XyLDm" + "provenance": [] }, "kernelspec": { "name": "python3", @@ -15,44 +14,13 @@ } }, "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "view-in-github", - "colab_type": "text" - }, - "source": [ - "\"Open" - ] - }, { "cell_type": "code", "execution_count": null, "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "cSj1acY8uPR2", - "outputId": "2c4f9385-17a8-4de0-a843-da7d0d3babb9" + "id": "cSj1acY8uPR2" }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "\u001b[?25l \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.0/393.8 kB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m393.8/393.8 kB\u001b[0m \u001b[31m22.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.0/86.0 kB\u001b[0m \u001b[31m9.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.2/7.2 MB\u001b[0m \u001b[31m71.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m61.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m268.5/268.5 kB\u001b[0m \u001b[31m23.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m59.5/59.5 kB\u001b[0m \u001b[31m6.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.8/7.8 MB\u001b[0m \u001b[31m82.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m69.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25h Building wheel for sentence-transformers (setup.py) ... \u001b[?25l\u001b[?25hdone\n" - ] - } - ], + "outputs": [], "source": [ "!pip install -qU elasticsearch sentence-transformers==2.2.2\n", "from elasticsearch import Elasticsearch\n", @@ -90,7 +58,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "id": "cSj1acY8uPR2" + "id": "f7RLweh9OhNd" }, "outputs": [], "source": [ From ca628619f8f4f42265c6c67482912f7d4031f950 Mon Sep 17 00:00:00 2001 From: Yan Savitski Date: Wed, 5 Jul 2023 15:32:54 +0200 Subject: [PATCH 20/54] Use interactive fields for cloud connection --- .../01-keyword-querying-filtering.ipynb | 30 +++++-------------- 1 file changed, 8 insertions(+), 22 deletions(-) diff --git a/colab-notebooks-examples/search/01-keyword-querying-filtering.ipynb b/colab-notebooks-examples/search/01-keyword-querying-filtering.ipynb index 1d245ece..01d1e78c 100644 --- a/colab-notebooks-examples/search/01-keyword-querying-filtering.ipynb +++ b/colab-notebooks-examples/search/01-keyword-querying-filtering.ipynb @@ -3,7 +3,8 @@ "nbformat_minor": 0, "metadata": { "colab": { - "provenance": [] + "provenance": [], + "authorship_tag": "ABX9TyPOqgOPOA1DSGVkeag6R90n" }, "kernelspec": { "name": "python3", @@ -14,24 +15,6 @@ } }, "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "cSj1acY8uPR2" - }, - "outputs": [], - "source": [ - "!pip install -qU elasticsearch sentence-transformers==2.2.2\n", - "from elasticsearch import Elasticsearch\n", - "# Create the client instance\n", - "client = Elasticsearch(\n", - " cloud_id=\"\",\n", - " basic_auth=(\"elastic\", \"\")\n", - ")\n", - "print(client.info())" - ] - }, { "cell_type": "markdown", "source": [ @@ -58,16 +41,19 @@ "cell_type": "code", "execution_count": null, "metadata": { - "id": "f7RLweh9OhNd" + "id": "cSj1acY8uPR2" }, "outputs": [], "source": [ "!pip install -qU elasticsearch sentence-transformers==2.2.2\n", "from elasticsearch import Elasticsearch\n", "# Create the client instance\n", + "cloud_id = '\\u003Ccloud_id>' #@param {type:\"string\"}\n", + "elastic_username = 'elastic' #@param {type:\"string\"}\n", + "elastic_password = '\\u003Cpassword>' #@param {type: \"string\"}\n", "client = Elasticsearch(\n", - " cloud_id=\"\",\n", - " basic_auth=(\"elastic\", \"\")\n", + " cloud_id=cloud_id,\n", + " basic_auth=(elastic_username, elastic_password)\n", ")\n", "print(client.info())" ] From a112c0cb895075a2404bec75c0e9bece26a87b41 Mon Sep 17 00:00:00 2001 From: Yan Savitski Date: Wed, 5 Jul 2023 16:35:55 +0200 Subject: [PATCH 21/54] Improve request output --- .../01-keyword-querying-filtering.ipynb | 69 ++++++++++--------- 1 file changed, 36 insertions(+), 33 deletions(-) diff --git a/colab-notebooks-examples/search/01-keyword-querying-filtering.ipynb b/colab-notebooks-examples/search/01-keyword-querying-filtering.ipynb index 01d1e78c..a6b62f8e 100644 --- a/colab-notebooks-examples/search/01-keyword-querying-filtering.ipynb +++ b/colab-notebooks-examples/search/01-keyword-querying-filtering.ipynb @@ -4,7 +4,7 @@ "metadata": { "colab": { "provenance": [], - "authorship_tag": "ABX9TyPOqgOPOA1DSGVkeag6R90n" + "authorship_tag": "ABX9TyPCLSxlCFfxOgEWVF7dVflx" }, "kernelspec": { "name": "python3", @@ -15,6 +15,40 @@ } }, "cells": [ + { + "cell_type": "code", + "execution_count": 25, + "metadata": { + "id": "cSj1acY8uPR2" + }, + "outputs": [], + "source": [ + "#@title Prepare elasticsearch client { display-mode: \"form\" }\n", + "!pip install -qU elasticsearch sentence-transformers==2.2.2\n", + "from elasticsearch import Elasticsearch\n", + "import pandas as pd\n", + "# Create the client instance\n", + "cloud_id = '\\u003Ccloud_id>' #@param {type:\"string\"}\n", + "elastic_username = 'elastic' #@param {type:\"string\"}\n", + "elastic_password = '\\u003Cpassword>' #@param {type: \"string\"}\n", + "client = Elasticsearch(\n", + " cloud_id=cloud_id,\n", + " basic_auth=(elastic_username, elastic_password)\n", + ")\n", + "\n", + "def pretty_response_transform(response):\n", + " result = []\n", + " for hit in response['hits']['hits']:\n", + " result.append({\n", + " 'id' : hit['_id'],\n", + " 'publication_date' : hit['_source']['publish_date'],\n", + " 'score' : hit['_score'],\n", + " 'title' : hit['_source']['title'],\n", + " 'summary' : hit['_source']['summary']\n", + " })\n", + " return result" + ] + }, { "cell_type": "markdown", "source": [ @@ -37,27 +71,6 @@ "id": "83LdOUCwwHzs" } }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "cSj1acY8uPR2" - }, - "outputs": [], - "source": [ - "!pip install -qU elasticsearch sentence-transformers==2.2.2\n", - "from elasticsearch import Elasticsearch\n", - "# Create the client instance\n", - "cloud_id = '\\u003Ccloud_id>' #@param {type:\"string\"}\n", - "elastic_username = 'elastic' #@param {type:\"string\"}\n", - "elastic_password = '\\u003Cpassword>' #@param {type: \"string\"}\n", - "client = Elasticsearch(\n", - " cloud_id=cloud_id,\n", - " basic_auth=(elastic_username, elastic_password)\n", - ")\n", - "print(client.info())" - ] - }, { "cell_type": "markdown", "source": [ @@ -99,19 +112,9 @@ { "cell_type": "code", "source": [ - "def pretty_response(response):\n", - " for hit in response['hits']['hits']:\n", - " id = hit['_id']\n", - " publication_date = hit['_source']['publish_date']\n", - " score = hit['_score']\n", - " title = hit['_source']['title']\n", - " summary = hit['_source']['summary']\n", - " pretty_output = (f\"\\nID: {id}\\nPublication date: {publication_date}\\nTitle: {title}\\nSummary: {summary}\\nScore: {score}\")\n", - " print(pretty_output)\n", - "\n", "response = client.search(index=\"book_index\", query={\"match\": {\"summary\": \"guide\"}})\n", "\n", - "pretty_response(response)" + "pd.DataFrame.from_records(pretty_response_transform(response))" ], "metadata": { "id": "q_OE0XVx6_qX" From db63d490eb017f0145596309e542158e71a88785 Mon Sep 17 00:00:00 2001 From: Yan Savitski Date: Wed, 5 Jul 2023 17:11:54 +0200 Subject: [PATCH 22/54] Add different types of queries --- .../01-keyword-querying-filtering.ipynb | 940 +++++++++++++++++- 1 file changed, 933 insertions(+), 7 deletions(-) diff --git a/colab-notebooks-examples/search/01-keyword-querying-filtering.ipynb b/colab-notebooks-examples/search/01-keyword-querying-filtering.ipynb index a6b62f8e..13adeed5 100644 --- a/colab-notebooks-examples/search/01-keyword-querying-filtering.ipynb +++ b/colab-notebooks-examples/search/01-keyword-querying-filtering.ipynb @@ -4,7 +4,7 @@ "metadata": { "colab": { "provenance": [], - "authorship_tag": "ABX9TyPCLSxlCFfxOgEWVF7dVflx" + "authorship_tag": "ABX9TyP9TEKGqhDbAR44WkL6Ryvw" }, "kernelspec": { "name": "python3", @@ -87,6 +87,9 @@ " The multi-field version of the match query.\n", "\n", "### Compound query\n", + "\n", + "Compound queries wrap other compound or leaf queries, either to combine their results and scores, to change their behaviour, or to switch from query to filter context.\n", + "\n", "* **boosting query**.\n", " Return documents which match a positive query, but reduce the score of documents which also match a negative query." ], @@ -98,9 +101,9 @@ "cell_type": "markdown", "source": [ "### Match query\n", - "Returns documents that match a provided text, number, date or boolean value. The provided text is analyzed before matching.\n", + "Returns documents that `match` a provided text, number, date or boolean value. The provided text is analyzed before matching.\n", "\n", - "The match query is the standard query for performing a full-text search, including options for fuzzy matching.\n", + "The `match` query is the standard query for performing a full-text search, including options for fuzzy matching.\n", "\n", "[Read more](https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-match-query.html#match-query-ex-request)\n", "\n" @@ -112,15 +115,938 @@ { "cell_type": "code", "source": [ - "response = client.search(index=\"book_index\", query={\"match\": {\"summary\": \"guide\"}})\n", + "response = client.search(index=\"book_index\", query={\n", + " \"match\": {\n", + " \"summary\": {\n", + " \"query\": \"guide\"\n", + " }\n", + " }\n", + " })\n", + "\n", + "pd.DataFrame.from_records(pretty_response_transform(response))" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 280 + }, + "id": "q_OE0XVx6_qX", + "outputId": "771bf9d6-a770-43e7-9f19-5ab1b40f6960" + }, + "execution_count": 52, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " id publication_date score \\\n", + "0 3cXgIYkBfxlbyhU5Krfc 2019-10-29 0.704228 \n", + "1 3sXgIYkBfxlbyhU5Krfc 2019-05-03 0.704228 \n", + "2 5MXgIYkBfxlbyhU5Krfd 2011-05-13 0.677165 \n", + "3 4MXgIYkBfxlbyhU5Krfc 2008-08-11 0.628835 \n", + "4 48XgIYkBfxlbyhU5Krfd 1994-10-31 0.628835 \n", + "\n", + " title \\\n", + "0 The Pragmatic Programmer: Your Journey to Mastery \n", + "1 Python Crash Course \n", + "2 The Clean Coder: A Code of Conduct for Profess... \n", + "3 Clean Code: A Handbook of Agile Software Craft... \n", + "4 Design Patterns: Elements of Reusable Object-O... \n", + "\n", + " summary \n", + "0 A guide to pragmatic programming for software ... \n", + "1 A fast-paced, no-nonsense guide to programming... \n", + "2 A guide to professional conduct in the field o... \n", + "3 A guide to writing code that is easy to read, ... \n", + "4 Guide to design patterns that can be used in a... " + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idpublication_datescoretitlesummary
03cXgIYkBfxlbyhU5Krfc2019-10-290.704228The Pragmatic Programmer: Your Journey to MasteryA guide to pragmatic programming for software ...
13sXgIYkBfxlbyhU5Krfc2019-05-030.704228Python Crash CourseA fast-paced, no-nonsense guide to programming...
25MXgIYkBfxlbyhU5Krfd2011-05-130.677165The Clean Coder: A Code of Conduct for Profess...A guide to professional conduct in the field o...
34MXgIYkBfxlbyhU5Krfc2008-08-110.628835Clean Code: A Handbook of Agile Software Craft...A guide to writing code that is easy to read, ...
448XgIYkBfxlbyhU5Krfd1994-10-310.628835Design Patterns: Elements of Reusable Object-O...Guide to design patterns that can be used in a...
\n", + "
\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "
\n", + "
\n", + " " + ], + "application/vnd.google.colaboratory.module+javascript": "\n import \"https://ssl.gstatic.com/colaboratory/data_table/881c4a0d49046431/data_table.js\";\n\n const table = window.createDataTable({\n data: [[{\n 'v': 0,\n 'f': \"0\",\n },\n\"3cXgIYkBfxlbyhU5Krfc\",\n\"2019-10-29\",\n{\n 'v': 0.7042277,\n 'f': \"0.7042277\",\n },\n\"The Pragmatic Programmer: Your Journey to Mastery\",\n\"A guide to pragmatic programming for software engineers and developers\"],\n [{\n 'v': 1,\n 'f': \"1\",\n },\n\"3sXgIYkBfxlbyhU5Krfc\",\n\"2019-05-03\",\n{\n 'v': 0.7042277,\n 'f': \"0.7042277\",\n },\n\"Python Crash Course\",\n\"A fast-paced, no-nonsense guide to programming in Python\"],\n [{\n 'v': 2,\n 'f': \"2\",\n },\n\"5MXgIYkBfxlbyhU5Krfd\",\n\"2011-05-13\",\n{\n 'v': 0.6771651,\n 'f': \"0.6771651\",\n },\n\"The Clean Coder: A Code of Conduct for Professional Programmers\",\n\"A guide to professional conduct in the field of software engineering\"],\n [{\n 'v': 3,\n 'f': \"3\",\n },\n\"4MXgIYkBfxlbyhU5Krfc\",\n\"2008-08-11\",\n{\n 'v': 0.62883455,\n 'f': \"0.62883455\",\n },\n\"Clean Code: A Handbook of Agile Software Craftsmanship\",\n\"A guide to writing code that is easy to read, understand and maintain\"],\n [{\n 'v': 4,\n 'f': \"4\",\n },\n\"48XgIYkBfxlbyhU5Krfd\",\n\"1994-10-31\",\n{\n 'v': 0.62883455,\n 'f': \"0.62883455\",\n },\n\"Design Patterns: Elements of Reusable Object-Oriented Software\",\n\"Guide to design patterns that can be used in any object-oriented language\"]],\n columns: [[\"number\", \"index\"], [\"string\", \"id\"], [\"string\", \"publication_date\"], [\"number\", \"score\"], [\"string\", \"title\"], [\"string\", \"summary\"]],\n columnOptions: [{\"width\": \"1px\", \"className\": \"index_column\"}],\n rowsPerPage: 25,\n helpUrl: \"https://colab.research.google.com/notebooks/data_table.ipynb\",\n suppressOutputScrolling: true,\n minimumWidth: undefined,\n });\n \n function appendQuickchartButton(parentElement) {\n let quickchartButtonContainerElement = document.createElement('div');\n quickchartButtonContainerElement.innerHTML = `\n
\n \n
\n\n\n\n \n`;\n parentElement.appendChild(quickchartButtonContainerElement);\n \nfunction displayQuickchartButton(domScope) {\n let quickchartButtonEl =\n domScope.querySelector('#df-cf91b4fa-a3ee-4684-852b-c66ab245eefb button.colab-df-quickchart');\n quickchartButtonEl.style.display =\n google.colab.kernel.accessAllowed ? 'block' : 'none';\n}\n\n displayQuickchartButton(parentElement);\n }\n " + }, + "metadata": {}, + "execution_count": 52 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "### Multi-match query\n", + "\n", + "The `multi_match` query builds on the match query to allow multi-field queries:" + ], + "metadata": { + "id": "H-n6hoVsfAqc" + } + }, + { + "cell_type": "code", + "source": [ + "response = client.search(index=\"book_index\", query={\n", + " \"multi_match\": {\n", + " \"query\": \"javascript\",\n", + " \"fields\": [\"summary\", \"title\"]\n", + " }\n", + " })\n", "\n", "pd.DataFrame.from_records(pretty_response_transform(response))" ], "metadata": { - "id": "q_OE0XVx6_qX" + "id": "TRmGYM94gCtb", + "outputId": "bc6f9c42-9bc4-48a8-ddf8-be7be7e37609", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 111 + } }, - "execution_count": null, - "outputs": [] + "execution_count": 69, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " id publication_date score title \\\n", + "0 3sXgIYkBfxlbyhU5Krfc 2019-05-03 2.4939 Python Crash Course \n", + "\n", + " summary \n", + "0 A fast-paced, no-nonsense guide to programming... " + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idpublication_datescoretitlesummary
03sXgIYkBfxlbyhU5Krfc2019-05-032.4939Python Crash CourseA fast-paced, no-nonsense guide to programming...
\n", + "
\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "
\n", + "
\n", + " " + ], + "application/vnd.google.colaboratory.module+javascript": "\n import \"https://ssl.gstatic.com/colaboratory/data_table/881c4a0d49046431/data_table.js\";\n\n const table = window.createDataTable({\n data: [[{\n 'v': 0,\n 'f': \"0\",\n },\n\"3sXgIYkBfxlbyhU5Krfc\",\n\"2019-05-03\",\n{\n 'v': 2.4939003,\n 'f': \"2.4939003\",\n },\n\"Python Crash Course\",\n\"A fast-paced, no-nonsense guide to programming in Python\"]],\n columns: [[\"number\", \"index\"], [\"string\", \"id\"], [\"string\", \"publication_date\"], [\"number\", \"score\"], [\"string\", \"title\"], [\"string\", \"summary\"]],\n columnOptions: [{\"width\": \"1px\", \"className\": \"index_column\"}],\n rowsPerPage: 25,\n helpUrl: \"https://colab.research.google.com/notebooks/data_table.ipynb\",\n suppressOutputScrolling: true,\n minimumWidth: undefined,\n });\n \n function appendQuickchartButton(parentElement) {\n let quickchartButtonContainerElement = document.createElement('div');\n quickchartButtonContainerElement.innerHTML = `\n
\n \n
\n\n\n\n \n`;\n parentElement.appendChild(quickchartButtonContainerElement);\n \nfunction displayQuickchartButton(domScope) {\n let quickchartButtonEl =\n domScope.querySelector('#df-72b0a3de-0bf8-4ffd-8ee4-8487e8903803 button.colab-df-quickchart');\n quickchartButtonEl.style.display =\n google.colab.kernel.accessAllowed ? 'block' : 'none';\n}\n\n displayQuickchartButton(parentElement);\n }\n " + }, + "metadata": {}, + "execution_count": 69 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "### Boosting query\n", + "\n", + "Returns documents matching a `positive` query while reducing the `relevance score` of documents that also match a `negative` query.\n", + "\n", + "You can use the `boosting` query to demote certain documents without excluding them from the search results." + ], + "metadata": { + "id": "FnBeBIVKiPnS" + } + }, + { + "cell_type": "code", + "source": [ + "response = client.search(index=\"book_index\", query={\n", + " \"boosting\": {\n", + " \"positive\": {\n", + " \"term\": {\n", + " \"summary\": \"javascript\"\n", + " }\n", + " },\n", + " \"negative\": {\n", + " \"term\": {\n", + " \"summary\": \"introduction\"\n", + " }\n", + " },\n", + " \"negative_boost\": 0.5\n", + " }\n", + " })\n", + "\n", + "pd.DataFrame.from_records(pretty_response_transform(response))" + ], + "metadata": { + "id": "_aI7hnH0ixkG", + "outputId": "f1bb946a-953c-4d63-afa1-42d1d9778b63", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 166 + } + }, + "execution_count": 63, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " id publication_date score \\\n", + "0 5cXgIYkBfxlbyhU5Krfd 2008-05-15 1.254593 \n", + "1 4cXgIYkBfxlbyhU5Krfc 2015-03-27 0.818029 \n", + "\n", + " title \\\n", + "0 JavaScript: The Good Parts \n", + "1 You Don't Know JS: Up & Going \n", + "\n", + " summary \n", + "0 A deep dive into the parts of JavaScript that ... \n", + "1 Introduction to JavaScript and programming as ... " + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idpublication_datescoretitlesummary
05cXgIYkBfxlbyhU5Krfd2008-05-151.254593JavaScript: The Good PartsA deep dive into the parts of JavaScript that ...
14cXgIYkBfxlbyhU5Krfc2015-03-270.818029You Don't Know JS: Up & GoingIntroduction to JavaScript and programming as ...
\n", + "
\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "
\n", + "
\n", + " " + ], + "application/vnd.google.colaboratory.module+javascript": "\n import \"https://ssl.gstatic.com/colaboratory/data_table/881c4a0d49046431/data_table.js\";\n\n const table = window.createDataTable({\n data: [[{\n 'v': 0,\n 'f': \"0\",\n },\n\"5cXgIYkBfxlbyhU5Krfd\",\n\"2008-05-15\",\n{\n 'v': 1.2545931,\n 'f': \"1.2545931\",\n },\n\"JavaScript: The Good Parts\",\n\"A deep dive into the parts of JavaScript that are essential to writing maintainable code\"],\n [{\n 'v': 1,\n 'f': \"1\",\n },\n\"4cXgIYkBfxlbyhU5Krfc\",\n\"2015-03-27\",\n{\n 'v': 0.8180288,\n 'f': \"0.8180288\",\n },\n\"You Don't Know JS: Up & Going\",\n\"Introduction to JavaScript and programming as a whole\"]],\n columns: [[\"number\", \"index\"], [\"string\", \"id\"], [\"string\", \"publication_date\"], [\"number\", \"score\"], [\"string\", \"title\"], [\"string\", \"summary\"]],\n columnOptions: [{\"width\": \"1px\", \"className\": \"index_column\"}],\n rowsPerPage: 25,\n helpUrl: \"https://colab.research.google.com/notebooks/data_table.ipynb\",\n suppressOutputScrolling: true,\n minimumWidth: undefined,\n });\n \n function appendQuickchartButton(parentElement) {\n let quickchartButtonContainerElement = document.createElement('div');\n quickchartButtonContainerElement.innerHTML = `\n
\n \n
\n\n\n\n \n`;\n parentElement.appendChild(quickchartButtonContainerElement);\n \nfunction displayQuickchartButton(domScope) {\n let quickchartButtonEl =\n domScope.querySelector('#df-7325ffa0-ded2-46c6-9d3f-b04c2f070c04 button.colab-df-quickchart');\n quickchartButtonEl.style.display =\n google.colab.kernel.accessAllowed ? 'block' : 'none';\n}\n\n displayQuickchartButton(parentElement);\n }\n " + }, + "metadata": {}, + "execution_count": 63 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "### Prefix search\n", + "\n", + "Returns documents that contain a specific prefix in a provided field" + ], + "metadata": { + "id": "yXipv0xSk-nK" + } + }, + { + "cell_type": "code", + "source": [ + "response = client.search(index=\"book_index\", query={\n", + " \"prefix\": {\n", + " \"title\": {\n", + " \"value\": 'java'\n", + " }\n", + " }\n", + " })\n", + "\n", + "pd.DataFrame.from_records(pretty_response_transform(response))" + ], + "metadata": { + "id": "dCr1pwlqlOE7", + "outputId": "61b379bd-20c2-432e-d8a8-83377b72469a", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 149 + } + }, + "execution_count": 71, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " id publication_date score title \\\n", + "0 4sXgIYkBfxlbyhU5Krfc 2018-12-04 1.0 Eloquent JavaScript \n", + "1 5cXgIYkBfxlbyhU5Krfd 2008-05-15 1.0 JavaScript: The Good Parts \n", + "\n", + " summary \n", + "0 A modern introduction to programming \n", + "1 A deep dive into the parts of JavaScript that ... " + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idpublication_datescoretitlesummary
04sXgIYkBfxlbyhU5Krfc2018-12-041.0Eloquent JavaScriptA modern introduction to programming
15cXgIYkBfxlbyhU5Krfd2008-05-151.0JavaScript: The Good PartsA deep dive into the parts of JavaScript that ...
\n", + "
\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "
\n", + "
\n", + " " + ], + "application/vnd.google.colaboratory.module+javascript": "\n import \"https://ssl.gstatic.com/colaboratory/data_table/881c4a0d49046431/data_table.js\";\n\n const table = window.createDataTable({\n data: [[{\n 'v': 0,\n 'f': \"0\",\n },\n\"4sXgIYkBfxlbyhU5Krfc\",\n\"2018-12-04\",\n{\n 'v': 1.0,\n 'f': \"1.0\",\n },\n\"Eloquent JavaScript\",\n\"A modern introduction to programming\"],\n [{\n 'v': 1,\n 'f': \"1\",\n },\n\"5cXgIYkBfxlbyhU5Krfd\",\n\"2008-05-15\",\n{\n 'v': 1.0,\n 'f': \"1.0\",\n },\n\"JavaScript: The Good Parts\",\n\"A deep dive into the parts of JavaScript that are essential to writing maintainable code\"]],\n columns: [[\"number\", \"index\"], [\"string\", \"id\"], [\"string\", \"publication_date\"], [\"number\", \"score\"], [\"string\", \"title\"], [\"string\", \"summary\"]],\n columnOptions: [{\"width\": \"1px\", \"className\": \"index_column\"}],\n rowsPerPage: 25,\n helpUrl: \"https://colab.research.google.com/notebooks/data_table.ipynb\",\n suppressOutputScrolling: true,\n minimumWidth: undefined,\n });\n \n function appendQuickchartButton(parentElement) {\n let quickchartButtonContainerElement = document.createElement('div');\n quickchartButtonContainerElement.innerHTML = `\n
\n \n
\n\n\n\n \n`;\n parentElement.appendChild(quickchartButtonContainerElement);\n \nfunction displayQuickchartButton(domScope) {\n let quickchartButtonEl =\n domScope.querySelector('#df-9c37ee42-e6ce-4a80-a0fa-95ee3eb8a0bd button.colab-df-quickchart');\n quickchartButtonEl.style.display =\n google.colab.kernel.accessAllowed ? 'block' : 'none';\n}\n\n displayQuickchartButton(parentElement);\n }\n " + }, + "metadata": {}, + "execution_count": 71 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "### Fuzzy search\n", + "\n", + "Returns documents that contain terms similar to the search term, as measured by a Levenshtein edit distance.\n", + "\n", + "An edit distance is the number of one-character changes needed to turn one term into another. These changes can include:\n", + "\n", + "* Changing a character (box → fox)\n", + "* Removing a character (black → lack)\n", + "* Inserting a character (sic → sick)\n", + "* Transposing two adjacent characters (act → cat)\n", + "\n" + ], + "metadata": { + "id": "a606YcCmmLHW" + } + }, + { + "cell_type": "code", + "source": [ + "response = client.search(index=\"book_index\", query={\n", + " \"fuzzy\": {\n", + " \"title\": {\n", + " \"value\": 'pyvascript'\n", + " }\n", + " }\n", + " })\n", + "\n", + "pd.DataFrame.from_records(pretty_response_transform(response))" + ], + "metadata": { + "id": "dTMc-IxPmbtC", + "outputId": "57ce94e6-0335-4fc0-ce37-db14568c34a1", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 149 + } + }, + "execution_count": 78, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " id publication_date score \\\n", + "0 4sXgIYkBfxlbyhU5Krfc 2018-12-04 1.624602 \n", + "1 5cXgIYkBfxlbyhU5Krfd 2008-05-15 1.365127 \n", + "\n", + " title \\\n", + "0 Eloquent JavaScript \n", + "1 JavaScript: The Good Parts \n", + "\n", + " summary \n", + "0 A modern introduction to programming \n", + "1 A deep dive into the parts of JavaScript that ... " + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idpublication_datescoretitlesummary
04sXgIYkBfxlbyhU5Krfc2018-12-041.624602Eloquent JavaScriptA modern introduction to programming
15cXgIYkBfxlbyhU5Krfd2008-05-151.365127JavaScript: The Good PartsA deep dive into the parts of JavaScript that ...
\n", + "
\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "
\n", + "
\n", + " " + ], + "application/vnd.google.colaboratory.module+javascript": "\n import \"https://ssl.gstatic.com/colaboratory/data_table/881c4a0d49046431/data_table.js\";\n\n const table = window.createDataTable({\n data: [[{\n 'v': 0,\n 'f': \"0\",\n },\n\"4sXgIYkBfxlbyhU5Krfc\",\n\"2018-12-04\",\n{\n 'v': 1.6246022,\n 'f': \"1.6246022\",\n },\n\"Eloquent JavaScript\",\n\"A modern introduction to programming\"],\n [{\n 'v': 1,\n 'f': \"1\",\n },\n\"5cXgIYkBfxlbyhU5Krfd\",\n\"2008-05-15\",\n{\n 'v': 1.3651271,\n 'f': \"1.3651271\",\n },\n\"JavaScript: The Good Parts\",\n\"A deep dive into the parts of JavaScript that are essential to writing maintainable code\"]],\n columns: [[\"number\", \"index\"], [\"string\", \"id\"], [\"string\", \"publication_date\"], [\"number\", \"score\"], [\"string\", \"title\"], [\"string\", \"summary\"]],\n columnOptions: [{\"width\": \"1px\", \"className\": \"index_column\"}],\n rowsPerPage: 25,\n helpUrl: \"https://colab.research.google.com/notebooks/data_table.ipynb\",\n suppressOutputScrolling: true,\n minimumWidth: undefined,\n });\n \n function appendQuickchartButton(parentElement) {\n let quickchartButtonContainerElement = document.createElement('div');\n quickchartButtonContainerElement.innerHTML = `\n
\n \n
\n\n\n\n \n`;\n parentElement.appendChild(quickchartButtonContainerElement);\n \nfunction displayQuickchartButton(domScope) {\n let quickchartButtonEl =\n domScope.querySelector('#df-b0299e91-f365-4d5c-82a7-4944ff0b2d6d button.colab-df-quickchart');\n quickchartButtonEl.style.display =\n google.colab.kernel.accessAllowed ? 'block' : 'none';\n}\n\n displayQuickchartButton(parentElement);\n }\n " + }, + "metadata": {}, + "execution_count": 78 + } + ] } ] } \ No newline at end of file From 8fd7495e087d28d565e4e044bb5d1fd91ae6a994 Mon Sep 17 00:00:00 2001 From: Liam Thompson Date: Wed, 5 Jul 2023 17:23:02 +0200 Subject: [PATCH 23/54] Add ELSER getting started notebook --- .../search/03-ELSER.ipynb | 798 ++++++++++++++++++ 1 file changed, 798 insertions(+) diff --git a/colab-notebooks-examples/search/03-ELSER.ipynb b/colab-notebooks-examples/search/03-ELSER.ipynb index e69de29b..9ff90a3b 100644 --- a/colab-notebooks-examples/search/03-ELSER.ipynb +++ b/colab-notebooks-examples/search/03-ELSER.ipynb @@ -0,0 +1,798 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "s49gpkvZ7q53" + }, + "source": [ + "# Semantic Search using ELSER text expansion\n", + "\n", + "Learn how to use the [Elastic Learned Sparse Encoder](https://www.elastic.co/guide/en/machine-learning/current/ml-nlp-elser.html) for text expansion-powered semantic search." + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "Y01AXpELkygt" + }, + "source": [ + "# 🧰 Requirements\n", + "\n", + "For this example, you will need:\n", + "\n", + "- Python 3.6 or later\n", + "- An Elastic deployment with minimum **4GB machine learning node**\n", + " - We'll be using [Elastic Cloud](https://www.elastic.co/guide/en/cloud/current/ec-getting-started.html) for this example (available with a [free trial](https://cloud.elastic.co/registration?elektra=en-ess-sign-up-page))\n", + "- The [ELSER](https://www.elastic.co/guide/en/machine-learning/8.8/ml-nlp-elser.html) model installed on your Elastic deployment\n", + "- The [Elastic Python client](https://www.elastic.co/guide/en/elasticsearch/client/python-api/current/installation.html)\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "N4pI1-eIvWrI" + }, + "source": [ + "# Create Elastic Cloud deployment\n", + "\n", + "If you don't have an Elastic Cloud deployment, sign up [here](https://cloud.elastic.co/registration?fromURI=%2Fhome) for a free trial.\n", + "\n", + "- Go to the [Create deployment](https://cloud.elastic.co/deployments/create) page\n", + " - Under **Advanced settings**, go to **Machine Learning instances**\n", + " - You'll need at least **4GB** RAM per zone for this tutorial\n", + " - Select **Create deployment**" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "nSw1R8e28F_E" + }, + "source": [ + "# Setup ELSER\n", + "To use ELSER, you must have the [appropriate subscription]() level\n", + "for semantic search or the trial period activated.\n", + "\n", + "Follow these [instructions](https://www.elastic.co/guide/en/machine-learning/8.8/ml-nlp-elser.html#trained-model) to download and deploy ELSER in the Kibana UI or using the Dev Tools **Console**.\n", + "\n", + "(Console commands in comments 👇)\n", + "\n", + "" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "gaTFHLJC-Mgi" + }, + "source": [ + "# Install packages and initialize the Elasticsearch Python client\n", + "\n", + "To get started, we'll need to connect to our Elastic deployment using the Python client.\n", + "Because we're using an Elastic Cloud deployment, we'll use the **Cloud ID** to identify our deployment.\n", + "\n", + "First we need to `pip` install the following packages:\n", + "\n", + "- `elasticsearch`\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "K9Q1p2C9-wce", + "outputId": "204d5aee-571e-4363-be6e-f87d058f2d29" + }, + "outputs": [], + "source": [ + "!pip install elasticsearch" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "gEzq2Z1wBs3M" + }, + "source": [ + "[TODO: Update]\n", + "Next we need to import the `elasticsearch` module and the `getpass` module.\n", + "`getpass` is part of the Python standard library and is used to securely prompt for credentials." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "uP_GTVRi-d96" + }, + "outputs": [], + "source": [ + "from elasticsearch import Elasticsearch, helpers\n", + "from urllib.request import urlopen\n", + "import getpass\n", + "import requests\n", + "import json" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "AMSePFiZCRqX" + }, + "source": [ + "Now we can instantiate the Python Elasticsearch client.\n", + "First we prompt the user for their password and Cloud ID.\n", + "\n", + "🔐 NOTE: `getpass` enables us to securely prompt the user for credentials without echoing them to the terminal, or storing it in memory.\n", + "\n", + "Then we create a `client` object that instantiates an instance of the `Elasticsearch` class." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "h0MdAZ53CdKL", + "outputId": "96ea6f81-f935-4d51-c4a7-af5a896180f1" + }, + "outputs": [], + "source": [ + "# Found in the 'Manage Deployment' page\n", + "CLOUD_ID = getpass.getpass('Enter Elastic Cloud ID: ')\n", + "\n", + "# Password for the 'elastic' user generated by Elasticsearch\n", + "ELASTIC_PASSWORD = getpass.getpass('Enter Elastic password: ')\n", + "\n", + "# Create the client instance\n", + "client = Elasticsearch(\n", + " cloud_id=CLOUD_ID,\n", + " basic_auth=(\"elastic\", ELASTIC_PASSWORD)\n", + ")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "bRHbecNeEDL3" + }, + "source": [ + "Confirm that the client has connected with this test" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "rdiUKqZbEKfF", + "outputId": "43b6f1cd-a43e-4dbe-caa5-7fd170464881" + }, + "outputs": [], + "source": [ + "print(client.info())" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "enHQuT57DhD1" + }, + "source": [ + "Refer to https://www.elastic.co/guide/en/elasticsearch/client/python-api/current/connecting.html#connect-self-managed-new to learn how to connect to a self-managed deployment.\n", + "\n", + "Read https://www.elastic.co/guide/en/elasticsearch/client/python-api/current/connecting.html#connect-self-managed-new to learn how to connect using API keys.\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "TF_wxIAhD07a" + }, + "source": [ + "# Create Elasticsearch index with required mappings\n", + "\n", + "To use the ELSER model at index time, we'll need to create an index mapping that supports a [`text_expansion`](https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-text-expansion-query.html) query.\n", + "The mapping must include a field of type [`rank_features`](https://www.elastic.co/guide/en/elasticsearch/reference/current/rank-features.html) to work with our feature vectors of interest.\n", + "This field contains the token-weight pairs the ELSER model created based on the input text.\n", + "\n", + "Let's create an index named `elser-movies` with the mappings we need.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "cvYECABJJs_2", + "outputId": "18fb51e4-c4f6-4d1b-cb2d-bc6f8ec1aa84" + }, + "outputs": [], + "source": [ + "INDEX = 'elser-movies'\n", + "client.indices.create(\n", + " index=INDEX,\n", + " settings={\n", + " \"index\": {\n", + " \"number_of_shards\": 1,\n", + " \"number_of_replicas\": 1\n", + " }\n", + " },\n", + " mappings={\n", + " \"properties\": {\n", + " \"genre\": {\n", + " \"type\": \"text\",\n", + " \"fields\": {\n", + " \"keyword\": {\n", + " \"type\": \"keyword\",\n", + " \"ignore_above\": 256\n", + " }\n", + " }\n", + " },\n", + " \"keyScene\": {\n", + " \"type\": \"text\",\n", + " \"fields\": {\n", + " \"keyword\": {\n", + " \"type\": \"keyword\",\n", + " \"ignore_above\": 256\n", + " }\n", + " }\n", + " },\n", + " \"plot\": {\n", + " \"type\": \"text\",\n", + " \"fields\": {\n", + " \"keyword\": {\n", + " \"type\": \"keyword\",\n", + " \"ignore_above\": 256\n", + " }\n", + " }\n", + " },\n", + " \"released\": {\n", + " \"type\": \"integer\"\n", + " },\n", + " \"runtime\": {\n", + " \"type\": \"integer\"\n", + " },\n", + " \"title\": {\n", + " \"type\": \"text\",\n", + " \"fields\": {\n", + " \"keyword\": {\n", + " \"type\": \"keyword\",\n", + " \"ignore_above\": 256\n", + " }\n", + " }\n", + " },\n", + " \"ml.tokens\": {\n", + " \"type\": \"rank_features\"\n", + " },\n", + " \"keyScene\": {\n", + " \"type\": \"text\"\n", + " }\n", + " }\n", + "}\n", + ")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "ohcvdngCGJlo" + }, + "source": [] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "EmELvr_JK_22" + }, + "source": [ + "# Create an ingest pipeline with an inference processor to use ELSER\n", + "\n", + "In order to use ELSER on our Elastic Cloud deployment we'll need to create an ingest pipeline that contains an inference processor that runs the ELSER model.\n", + "Let's add that pipeline using the [`put_pipeline`](https://www.elastic.co/guide/en/elasticsearch/reference/master/put-pipeline-api.html) method." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "XhRng99KLQsd", + "outputId": "00ea73b5-45a4-472b-f4bc-2c2c790ab94d" + }, + "outputs": [], + "source": [ + "\n", + "client.ingest.put_pipeline(id=\"elser-v1-test\", body={\n", + " \"processors\": [\n", + " {\n", + " \"inference\": {\n", + " \"model_id\": \".elser_model_1\",\n", + " \"target_field\": \"ml\",\n", + " \"field_map\": {\n", + " \"keyScene\": \"text_field\",\n", + " \"plot\": \"text_field\"\n", + " },\n", + " \"inference_config\": {\n", + " \"text_expansion\": {\n", + " \"results_field\": \"tokens\"\n", + " }\n", + " }\n", + " }\n", + " }\n", + " ]\n", + "})" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "0wCH7YHLNW3i" + }, + "source": [ + "Let's note a few important parameters from that API call:\n", + "\n", + "- `inference`: A processor that performs inference using a machine learning model.\n", + "- `model_id`: Specifies the ID of the machine learning model to be used. In this example, the model ID is set to `.elser_model_1`.\n", + "- `target_field`: Defines the field where the inference result will be stored. Here, it is set to `ml`.\n", + "- `text_expansion`: Configures text expansion options for the inference process.\n", + "In this example, the inference results will be stored in a field named \"tokens\"." + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "WgWDMgf9NkHL" + }, + "source": [] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "U3vT2g5LVIQF" + }, + "source": [ + "# Create index and mapping for test data\n", + "\n", + "\n", + "We have some test data in a `json` file at this [URL](https://raw.githubusercontent.com/leemthompo/notebook-tests/main/12-movies.json).\n", + "Let's load that into our Elastic deployment.\n", + "First we'll create an index named `search-movies` to store that data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "X3ONJckPnUIT", + "outputId": "07ea0766-c226-4510-c910-893db89757ad" + }, + "outputs": [], + "source": [ + "client.indices.create(\n", + " index=\"search-movies\",\n", + " mappings= {\n", + " \"properties\": {\n", + " \"genre\": {\n", + " \"type\": \"text\",\n", + " \"fields\": {\n", + " \"keyword\": {\n", + " \"type\": \"keyword\",\n", + " \"ignore_above\": 256\n", + " }\n", + " }\n", + " },\n", + " \"keyScene\": {\n", + " \"type\": \"text\",\n", + " \"fields\": {\n", + " \"keyword\": {\n", + " \"type\": \"keyword\",\n", + " \"ignore_above\": 256\n", + " }\n", + " }\n", + " },\n", + " \"plot\": {\n", + " \"type\": \"text\",\n", + " \"fields\": {\n", + " \"keyword\": {\n", + " \"type\": \"keyword\",\n", + " \"ignore_above\": 256\n", + " }\n", + " }\n", + " },\n", + " \"released\": {\n", + " \"type\": \"integer\"\n", + " },\n", + " \"runtime\": {\n", + " \"type\": \"integer\"\n", + " },\n", + " \"title\": {\n", + " \"type\": \"text\",\n", + " \"fields\": {\n", + " \"keyword\": {\n", + " \"type\": \"keyword\",\n", + " \"ignore_above\": 256\n", + " }\n", + " }\n", + " }\n", + " }\n", + "})" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "lFHgRUYVpNKP" + }, + "source": [ + "# Upload sample data\n", + "\n", + "> ⚠ To use the UI to upload data, follow the approach described [here](https://www.elastic.co/guide/en/elasticsearch/reference/current/semantic-search-elser.html#load-data).\n", + "\n", + "Let's upload the JSON data.\n", + "The dataset provides information on twelve iconic films.\n", + "Each film's entry includes its title, runtime, plot summary, a key scene, genre classification, and release year." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "IBfqgdAcuKRG", + "outputId": "3b86daa1-ade1-4ff3-da81-4207fa814d30" + }, + "outputs": [], + "source": [ + "url = \"https://raw.githubusercontent.com/leemthompo/notebook-tests/main/12-movies.json\"\n", + "\n", + "# Send a request to the URL and get the response\n", + "response = urlopen(url)\n", + "\n", + "# Load the response data into a JSON object\n", + "data_json = json.loads(response.read())\n", + "\n", + "def create_index_body(doc):\n", + " \"\"\" Generate the body for an Elasticsearch document. \"\"\"\n", + " return {\n", + " \"_index\": \"search-movies\",\n", + " \"_source\": doc,\n", + " }\n", + "\n", + "# Prepare the documents to be indexed\n", + "documents = [create_index_body(doc) for doc in data_json]\n", + "\n", + "# Use helpers.bulk to index\n", + "helpers.bulk(client, documents)\n", + "\n", + "print(\"Done indexing documents into `search-movies` index!\")\n", + "\n", + "\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "73d3Td-1ubhv" + }, + "source": [ + "# Ingest the data through the inference ingest pipeline\n", + "\n", + "Create tokens from the text by reindexing the data throught the inference pipeline that uses ELSER as the inference model." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ysYobyC9uhn5", + "outputId": "27af8c88-9039-4ff8-a20f-9af9ffcff05c" + }, + "outputs": [], + "source": [ + "client.reindex(wait_for_completion=False,\n", + " source={\n", + " \"index\": \"search-movies\"\n", + " },\n", + " dest= {\n", + " \"index\": \"elser-movies\",\n", + " \"pipeline\": \"elser-v1-test\"\n", + " }\n", + ")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "tUDGeY7e2-I2" + }, + "source": [ + "# Confirm documents are indexed with additional fields\n", + "\n", + "A successful API call in the previous step returns a task ID to monitor the job's progress.\n", + "Use the [task management API](https://www.elastic.co/guide/en/elasticsearch/reference/current/tasks.html) to check progress.\n", + "You can also monitor this task using the **Trained Models** UI in Kibana, selecting the **Pipelines** tab under **ELSER**.\n", + "\n", + "Call the following, replacing `` with the task id returned in the previous step." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "2KXeXCc63WVw", + "outputId": "e8fee6dd-34a1-401d-c879-71fd54de3c90" + }, + "outputs": [], + "source": [ + "client.tasks.get(task_id='cxy4bU9ASFKpFgZUpa-jnA:19545263')" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "oCj3jHHML4Tn" + }, + "source": [ + "Inspect a new document to confirm that it now has an `\"ml\": {\"tokens\":...}` field that contains a list of new, additional terms.\n", + "These terms are the **text expansion** of the field(s) you targeted for ELSER inference.\n", + "ELSER essentially creates a tree of expanded terms to improve the semantic searchability of your documents.\n", + "We'll be able to search these documents using a `text_expansion` query.\n", + "\n", + "But first let's start with a simple keyword search, to see how ELSER delivers semantically relevant results out of the box." + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "_KahQAbPPd9l" + }, + "source": [ + "# Keyword match\n", + "\n", + "## Successful match\n", + "\n", + "Let's start by assuming a user queries the data set and hits an exact match.\n", + "BM25 is perfect for exact keyword matches.\n", + "Imagine our user remembers a movie where a child's spinning top was a recurring image.\n", + "They search for `spinning top` and because these exact words are used in the key scene description, we get a perfect hit.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "FsZkFhGaYnzD", + "outputId": "843c72f1-6a0c-43ce-c1e4-ad5e763ebc95" + }, + "outputs": [], + "source": [ + "response = client.search(\n", + " index=\"elser-movies\",\n", + " query= {\n", + " \"match\": {\n", + " \"keyScene\": \"spinning top\"\n", + " }\n", + " }\n", + ")\n", + "for hit in response['hits']['hits']:\n", + " doc_id = hit['_id']\n", + " score = hit['_score']\n", + " title = hit['_source']['title']\n", + " text = hit['_source']['keyScene']\n", + " print(f\"\\nTitle: {title}\\nKey scene description: {text}\\n\")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "Y01WHeOtbTZ-" + }, + "source": [ + "## Unsuccessful match\n", + "\n", + "Unfortunately, searches that rely on exact matches are brittle.\n", + "What if you can't remember the exact name of the thing you're searching for?\n", + "Who knows what a spinning top is anyway?\n", + "\n", + "Imagine I can only think of the word `child toy` to describe this apparatus?\n", + "A match query won't find any relevant documents." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "osifkhqidjYw", + "outputId": "6b917df6-b0af-4947-9280-98f7b17f2ff9" + }, + "outputs": [], + "source": [ + "response = client.search(\n", + " index=\"elser-movies\",\n", + " query= {\n", + " \"match\": {\n", + " \"keyScene\": \"child toy\"\n", + " }\n", + " }\n", + ")\n", + "hits = response['hits']['hits']\n", + "\n", + "if not hits:\n", + " print(\"No matches found\")\n", + "else:\n", + " for hit in hits:\n", + " doc_id = hit['_id']\n", + " score = hit['_score']\n", + " title = hit['_source']['title']\n", + " text = hit['_source']['keyScene']\n", + " print(f\"\\nTitle: {title}\\nKey scene description: {text}\\n\")\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "MPCVztOLeAk_" + }, + "source": [ + "So it turns out classical term matching strategies are very good, if you know precisely what you're looking for.\n", + "But they break down when a user has a hard time articulating what they're trying to find.\n", + "Here's where semantic search shines.\n", + "It helps capture a user's intent or meaning better, without relying on brittle term matches.\n", + "\n", + "Traditional dense vector based similarity strategies require you to generate embeddings for your data and then map queries into the same mathematical space as the data.\n", + "This works well but is time consuming and requires a lot of legwork.\n", + "The beauty of the Elastic Learned Sparse Encoder model is that it works out-of-the-box, without the need to fine tune on your data.\n", + "\n", + "The Elastic Learned Sparse Encoder creates a tree of expanded terms, adds them to your documents, improving their semantic searchability.\n", + "The fields that you targeted for inference are now enriched with a range of relevant synonyms and related terms, that increase the probability of a successful search." + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "Zy5GT2xb38oz" + }, + "source": [ + "# Semantic search with the `text_expansion` query\n", + "\n", + "Let's test out semantic search using the Elastic Learned Sparse Encoder, and see if we can improve our earlier unsuccessful search, using the query `child toy`.\n", + "\n", + "To perform semantic search using the Elastic Learned Sparse Encoder, you need the following:\n", + "- A `text_expansion` query\n", + "- Query text\n", + " - In this example we use `child toy`\n", + "- ELSER model ID" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "bAZRxja-5Q6X", + "outputId": "37a26a2c-4284-4e51-c34e-9a55edf77cb8" + }, + "outputs": [], + "source": [ + "response = client.search(index='elser-movies', size=3,\n", + " query={\n", + " \"text_expansion\": {\n", + " \"ml.tokens\": {\n", + " \"model_id\":\".elser_model_1\",\n", + " \"model_text\":\"child toy\"\n", + " \n", + " }\n", + " }\n", + "}\n", + ")\n", + "\n", + "for hit in response['hits']['hits']:\n", + " doc_id = hit['_id']\n", + " score = hit['_score']\n", + " title = hit['_source']['title']\n", + " text = hit['_source']['keyScene']\n", + " print(f\"Score: {score}\\nTitle: {title}\\nKey scene description: {text}\\n\")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "yYSJ7fnv5uWd" + }, + "source": [ + "Success! Out of the box ELSER has taken a fuzzy, but semantically similar query and found the correct match.\n", + "Our user has found the movie they're looking for!" + ] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.7" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} From 9b0037b9f25dfcabb656cf34a5c3e1784b90bc3d Mon Sep 17 00:00:00 2001 From: Liam Thompson Date: Wed, 5 Jul 2023 17:28:04 +0200 Subject: [PATCH 24/54] Add open in colab button --- colab-notebooks-examples/search/03-ELSER.ipynb | 3 +++ 1 file changed, 3 insertions(+) diff --git a/colab-notebooks-examples/search/03-ELSER.ipynb b/colab-notebooks-examples/search/03-ELSER.ipynb index 9ff90a3b..8b45887c 100644 --- a/colab-notebooks-examples/search/03-ELSER.ipynb +++ b/colab-notebooks-examples/search/03-ELSER.ipynb @@ -9,6 +9,9 @@ "source": [ "# Semantic Search using ELSER text expansion\n", "\n", + "[![Open In Colab](https://colab.research.google.com/github/joemcelroy/elasticsearch-labs/blob/notebooks-guides/colab-notebooks-examples/search/03-ELSER.ipynb)\n", + "\n", + "\n", "Learn how to use the [Elastic Learned Sparse Encoder](https://www.elastic.co/guide/en/machine-learning/current/ml-nlp-elser.html) for text expansion-powered semantic search." ] }, From 24ac34ac6370054ef3525b5ac527a59b1fe38b20 Mon Sep 17 00:00:00 2001 From: Liam Thompson Date: Wed, 5 Jul 2023 17:29:35 +0200 Subject: [PATCH 25/54] Fix badge --- colab-notebooks-examples/search/03-ELSER.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/colab-notebooks-examples/search/03-ELSER.ipynb b/colab-notebooks-examples/search/03-ELSER.ipynb index 8b45887c..13c11fea 100644 --- a/colab-notebooks-examples/search/03-ELSER.ipynb +++ b/colab-notebooks-examples/search/03-ELSER.ipynb @@ -9,7 +9,7 @@ "source": [ "# Semantic Search using ELSER text expansion\n", "\n", - "[![Open In Colab](https://colab.research.google.com/github/joemcelroy/elasticsearch-labs/blob/notebooks-guides/colab-notebooks-examples/search/03-ELSER.ipynb)\n", + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/joemcelroy/elasticsearch-labs/blob/notebooks-guides/colab-notebooks-examples/search/03-ELSER.ipynb)\n", "\n", "\n", "Learn how to use the [Elastic Learned Sparse Encoder](https://www.elastic.co/guide/en/machine-learning/current/ml-nlp-elser.html) for text expansion-powered semantic search." From f3702b749ad6a793e79d1387014580ef581b1780 Mon Sep 17 00:00:00 2001 From: Liam Thompson Date: Wed, 5 Jul 2023 17:35:26 +0200 Subject: [PATCH 26/54] Minor cleanup --- colab-notebooks-examples/search/03-ELSER.ipynb | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/colab-notebooks-examples/search/03-ELSER.ipynb b/colab-notebooks-examples/search/03-ELSER.ipynb index 13c11fea..b8c6cc8a 100644 --- a/colab-notebooks-examples/search/03-ELSER.ipynb +++ b/colab-notebooks-examples/search/03-ELSER.ipynb @@ -118,9 +118,8 @@ "id": "gEzq2Z1wBs3M" }, "source": [ - "[TODO: Update]\n", - "Next we need to import the `elasticsearch` module and the `getpass` module.\n", - "`getpass` is part of the Python standard library and is used to securely prompt for credentials." + "Next, we need to import the modules we need.\n", + "🔐 NOTE: `getpass` enables us to securely prompt the user for credentials without echoing them to the terminal, or storing it in memory." ] }, { @@ -133,9 +132,7 @@ "source": [ "from elasticsearch import Elasticsearch, helpers\n", "from urllib.request import urlopen\n", - "import getpass\n", - "import requests\n", - "import json" + "import getpass" ] }, { @@ -146,10 +143,8 @@ }, "source": [ "Now we can instantiate the Python Elasticsearch client.\n", - "First we prompt the user for their password and Cloud ID.\n", - "\n", - "🔐 NOTE: `getpass` enables us to securely prompt the user for credentials without echoing them to the terminal, or storing it in memory.\n", "\n", + "First we prompt the user for their password and Cloud ID.\n", "Then we create a `client` object that instantiates an instance of the `Elasticsearch` class." ] }, From 7ff44f24ee1331f7678571bbd98961b22aa82d30 Mon Sep 17 00:00:00 2001 From: Joseph McElroy Date: Thu, 6 Jul 2023 11:43:49 +0100 Subject: [PATCH 27/54] updates --- .../search/00-quick-start.ipynb | 5035 +---------------- 1 file changed, 18 insertions(+), 5017 deletions(-) diff --git a/colab-notebooks-examples/search/00-quick-start.ipynb b/colab-notebooks-examples/search/00-quick-start.ipynb index f3094668..deabf786 100644 --- a/colab-notebooks-examples/search/00-quick-start.ipynb +++ b/colab-notebooks-examples/search/00-quick-start.ipynb @@ -1,7 +1,6 @@ { "cells": [ { - "attachments": {}, "cell_type": "markdown", "id": "87773ce7", "metadata": { @@ -17,7 +16,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "a32202e2", "metadata": { @@ -33,7 +31,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "52a6a607", "metadata": { @@ -53,11 +50,7 @@ "execution_count": null, "id": "ffc5fa6f", "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, "id": "ffc5fa6f", - "outputId": "d9693f20-1482-4a4d-b74d-a876885f92fa", "scrolled": false }, "outputs": [], @@ -66,7 +59,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "d9cb4609", "metadata": { @@ -78,7 +70,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "099415ba", "metadata": { "id": "099415ba" @@ -90,7 +82,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "28AH8LhI-0UD", "metadata": { @@ -105,168 +96,7 @@ "execution_count": null, "id": "WHC3hHGW-wbI", "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 552, - "referenced_widgets": [ - "8a47d05dd32e452b89c1c62f15cb57a3", - "34d9035bbca449fba7a64a7353ce9200", - "230c71ac97db42aeb048c392154c7ad8", - "327fdab26f534c918c41f9b6eb8da61e", - "6a7bbaad9509474b8b77bfcd216c2e4a", - "cca1ec7ef9174e37a5e17b2a77b39aad", - "e5616536b1824a388fa9e1ed826307f9", - "254086255be24f2b8e0ccfcfb072f379", - "d7b8af67a3a8454c97d9a68caff040c7", - "22d2c55179744b4f8d57c15aa60bd0ef", - "face41511e7c4c748e123d176b219f13", - "b27dfc2df0f94537b977baa0211165f7", - "27a5814b873b463d8ec884fce5b3002b", - "d7e199cec93c42308436a13f0101798d", - "51023e9b26e348278fb5788c4b1d3cf1", - "e37dae5f929042eea3b021d81c7e89f9", - "f41fc8e604524a118ea2b863a01d5201", - "a407443370464f68b4f503a5df4b2fd4", - "a09c50472f244d15875780d53e4cb680", - "e1623d2abb5244708dc2d5ae9bce9e6f", - "9bac318fe82342c4a9a489034d54b85c", - "6aa05f91d1a24d8fbe6bd0083649c9d9", - "70e02ff9e19b4a9f991e0777b42b22c4", - "84727c7020574c809ca155efa0996217", - "0e0a2c012cdf402a8c7275d42fb47d16", - "fe235613377d4443815ad548d7b19a2c", - "30ad2a5963c04d188b64448513352576", - "0229a8c31ca04b5aa8f36f049b6333c0", - "775697558ad74bd0b01b8728617395df", - "fd06e080368345679c280ae286dcf118", - "c385ab3c1ffb4a9391feaf8017ac2124", - "2a76d9c1c37a45ab8f4d7da6541c11c3", - "3897a135ce4d413f95305560e9d5f51d", - "e41fb5a2fbcf485899df455800c29f66", - "637b076d35c247348d61e924f86c7509", - "fb5eba47d398448a991aaca68e4c33f0", - "5a549312926048079d979df5f31f6668", - "c7f8a26c31154c8f9bf65dddb4095c04", - "b999f7e8580d47bf956607b2dbf59604", - "835c285bb5bf41999c6e845d485ab845", - "2083d409adf345e3b6209bf9b4e8d5df", - "4d1b0ec8de0f466eba688a33a95b6dc8", - "8ae23bf62ca44f3e9953ec0b89705a42", - "6c57b2b268a44ad09c188941cc1fa47a", - "0a566ba9f07d4a56a69dcfc467c8cfa8", - "c825476eb2a24cc9b8707d23bd0589ed", - "d290678ad04e431ba4923c0dba777399", - "b105661ae91240a9ac645a531f87db35", - "44cd00539c8c487ba2b58819b96d90b8", - "00f331e549504656ac4d8f1f07e95768", - "e6864110fe5b4caf8e1b869162a6c45a", - "cafef97c29564483b15c21b647e2e0d1", - "6a664e104c2f462f838738ea966a0115", - "c987b5a057dc475bb7cb702afe198f74", - "b571138b82b146f6add4079c7c4a3fea", - "2097ee5afa034d5fa208191458925956", - "0c0e6363c3d44ad992727ad3031b685a", - "85667ca054aa431eb50fa26233244e65", - "dceed128f53f4726afe578574c19815d", - "862c408691204fb2902f588ea8a2f614", - "57f9e08a916f4c7584d1e5c625adf509", - "98f9d91b220d4c86aaa4a20994e858a8", - "25081b6df89c4f6890d7e359905b3b8a", - "6fad6161615f45688f3c44d6179ba204", - "7eed45dcf5004dccb734eb315415a535", - "a5cd7c8a692f43f7859b4df8334c3f0f", - "3d4d613296054330bedd8eac39978a18", - "b11f3b77d8604aed8051678c6f12b172", - "f7f3cae2779d41b9b08d299596c6026c", - "9db4b498a78e435db8afd06c6632a522", - "9513eacff50642e7b0579c7de57bdacf", - "6edf7e9504fe48be802d443b3a0dd3a5", - "22a5d61c70c545d095c54b4b066d4b87", - "b8769ba773c144428df49ed117a4ba2e", - "914892930f74452d96cd787c128c955b", - "d10b85816ca540c0a5456e4126dc99c6", - "c686e95be6db4030b77cfcc170aaf722", - "09af9df668ad43b4a8bd2c10d36a2aab", - "eb1d39f0a2254be7857baa26432a5e56", - "73cb794c915e493e9623d5f598a71029", - "4d21a6cce39141f594d5b5d71caa5a61", - "753703fac8c54e268e0a4eb373580a0b", - "2787caea3cbc4f029e766b35ccaa6013", - "ae251276d3c8482ab768130bde5b9de3", - "440fcc9ad14241dba8e724aac69803e9", - "b5608a8229a34e8e8405bf0420104ae4", - "8c4e637876534783959a3daa9254a65a", - "1181941459e04ce29dfaf48ae1e6dff7", - "ee53579d441b4b2a97176d161b479cd1", - "ae834f736104436593dd069cf3bdd5a2", - "3418257fe9114669b00bc242c620bfb3", - "24f54307ee3b499ab7519f713ce994e1", - "e0d94012952e43549dd4d158c30c91d6", - "fa8d7e78724341ca84f0f829e33d756d", - "4cbf54c836d74602b94388254b5bf8eb", - "640450c5f17646a5b42a1ab13218db23", - "8a280b8dae964aaea3be0a299e354b17", - "f22a4dfe0f6e44c2a0bc37b3fe488439", - "5c26c4be0a0240499d427e67d89cd869", - "1fe019c07e0c4f62adfc826f89eb2dbd", - "3b2a66e66c5c4e8a8bafd6d340a779b9", - "6c2891e1aae043afbba8430ae097b882", - "7743a69d92f94e8fa358b458fe8aa4a5", - "a76ea0a19ea74594a843c62aa6fb03e1", - "8a909360e5704220987b703ba4cb3dff", - "cc4c4534d0b54d0187d5a7ed82a0376e", - "8ff8b86348424eda9678a932fbbad132", - "d43e683a46ef4d8892635fc1da1bd7c0", - "3e4fbf2956b343218d2aec4d867e1c94", - "4bde08a13c41477ba6c7a4697ab599a0", - "54062dca2e8743a68da565014c44304c", - "79efd9353e4f4ee690de5d38bd98c7aa", - "28451c97f4fa4b8991186907a50738e8", - "50d576d87fae494182632cbed50372ac", - "a57b74c1c4b6497ba8e0dafad0af4ddf", - "08a1df059a4b4c26a87ccea8c5d45bbd", - "8a37a86d8ed44c1cb70eca68215c913c", - "c3b2c605e7e94299a5dadf771baf6f0a", - "69fa56f6dfde42f883b24069f65ed745", - "bff335481e214de5b02478938672df38", - "4e25e350551e49a2bf50e081fcb0184a", - "ac4fdd9c828d473384662742dca85aa7", - "c4f9e573288a46d19efae3d101dbbdef", - "9adc61965b944edc9abff9d0de7e745a", - "483b1513ebcd4e718e69202b30163973", - "04e89773ebeb42db87c64d7b9ec7cb26", - "3f50cc3742724ab2bf8434c341eee0bd", - "449d71ebd4df4ddbb755d30fdff25358", - "530177663d7f47d39c43beb3ce7fab78", - "1ad860cd423c4fd893066e6925579eb7", - "7c1fc9a54be14954acd59abde2fca97f", - "92790cf373b14a058b31fc0d1fcc5c94", - "b889cd1323134a9cb0b2eb81ceb3cde1", - "7e85d153ec15430689bc77ef6bf2b3c8", - "e706e32cd2a642eb9618dd0dd75d7664", - "7c252184b6c449d1bbba4807cde4672a", - "df86bc204d014d13990917c348b09769", - "eb16cb52b8cb438984c4c75c3074de84", - "9ea920d0083f46e2add318c845b3fddf", - "cb4add21dd514bf7a3d2e51726603e36", - "ac2387223ca04438a3e4bb59cd2f7019", - "2450d6be578e4eb6960111c26c55fa43", - "8d97796bc9964114853ce5803584d159", - "05c5fdaa4d624b44857f6b7d08426e6c", - "71e2f0ea4906451f9555a638f7e713b6", - "fdb9c374bc1e48688e439dc82103c545", - "f14f96faaf3444cda3da447b10d57cee", - "95007200cc10458882a9e81d055978b9", - "530c9177c7024e98a24d8db80651ab01", - "a5abdec4f10f4b5385f01722480139e3", - "6f25e1c5d19e474aaa3f4d5806a53445", - "4fd8f1d3cb944c2e8a0a806e8c89a26f", - "143c8f9e754240e6a5a8386da714cb1b", - "3d3d1beb5ed346209eac8f4b38325ae1" - ] - }, - "id": "WHC3hHGW-wbI", - "outputId": "e1afa0b3-6f39-47cd-da21-c6adadc7b7de" + "id": "WHC3hHGW-wbI" }, "outputs": [], "source": [ @@ -280,7 +110,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "0241694c", "metadata": { @@ -294,7 +123,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "f38e0397", "metadata": { "id": "f38e0397" @@ -309,7 +138,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "fcd165fa", "metadata": { @@ -320,7 +148,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "1462ebd8", "metadata": { @@ -335,11 +162,7 @@ "execution_count": null, "id": "25c618eb", "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "25c618eb", - "outputId": "f3ea04dc-a30a-4ea1-a393-addb35d29fac" + "id": "25c618eb" }, "outputs": [], "source": [ @@ -347,7 +170,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "61e1e6d8", "metadata": { @@ -376,11 +198,7 @@ "execution_count": null, "id": "6bc95238", "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "6bc95238", - "outputId": "2b6d0360-43e8-4cbe-fada-80bf475510ed" + "id": "6bc95238" }, "outputs": [], "source": [ @@ -394,11 +212,11 @@ " \"publish_date\": {\"type\": \"date\"},\n", " \"num_reviews\": {\"type\": \"integer\"},\n", " \"publisher\": {\"type\": \"keyword\"},\n", - " \"title_vector\": { \n", - " \"type\": \"dense_vector\", \n", - " \"dims\": 384, \n", - " \"index\": \"true\", \n", - " \"similarity\": \"dot_product\" \n", + " \"title_vector\": {\n", + " \"type\": \"dense_vector\",\n", + " \"dims\": 384,\n", + " \"index\": \"true\",\n", + " \"similarity\": \"dot_product\"\n", " }\n", " }\n", " }\n", @@ -409,7 +227,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "075f5eb6", "metadata": { @@ -418,9 +235,7 @@ "source": [ "### Index test data\n", "\n", - "Run the following command to upload some test data, containing information about 10 popular programming books.\n", - "\n", - "ℹ️ If you'd like to upload your own data from a URL, refer to the following [notebook](https://github.com/leemthompo/notebook-tests/blob/main/load-data-from-url.ipynb) for an example." + "Run the following command to upload some test data, containing information about 10 popular programming books." ] }, { @@ -428,11 +243,7 @@ "execution_count": null, "id": "008d723e", "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "008d723e", - "outputId": "430bbad4-404d-4f5d-dc23-d4e462b931fa" + "id": "008d723e" }, "outputs": [], "source": [ @@ -530,7 +341,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "cd8b03e0", "metadata": { @@ -564,7 +374,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "39bdefe0", "metadata": { @@ -578,7 +387,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "id": "Df7hwcIjYwMT", "metadata": { "colab": { @@ -664,28 +473,16 @@ } ], "source": [ - "def pretty_response(response):\n", - " for hit in response['hits']['hits']:\n", - " id = hit['_id']\n", - " publication_date = hit['_source']['publish_date']\n", - " score = hit['_score']\n", - " title = hit['_source']['title']\n", - " summary = hit['_source']['summary']\n", - " pretty_output = (f\"\\nID: {id}\\nPublication date: {publication_date}\\nTitle: {title}\\nSummary: {summary}\\nScore: {score}\")\n", - " print(pretty_output)\n", - "\n", "response = client.search(index=\"book_index\", body={\n", " \"knn\": {\n", " \"field\": \"title_vector\",\n", " \"query_vector\": model.encode(\"Best Project management books?\"),\n", - " \"k\": 10,\n", - " \"num_candidates\": 100\n", + " \"k\": 10,\n", + " \"num_candidates\": 100\n", " }\n", "})\n", "\n", - "pretty_response(response)\n", - "\n", - "\n" + "pretty_response(response)" ] }, { @@ -693,11 +490,7 @@ "execution_count": null, "id": "pWYkmofeaSk8", "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "pWYkmofeaSk8", - "outputId": "c8aa61e0-c33f-4a9b-e05d-09d2d5e75ef3" + "id": "pWYkmofeaSk8" }, "outputs": [], "source": [ @@ -725,4800 +518,8 @@ "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.16" - }, - "widgets": { - "application/vnd.jupyter.widget-state+json": { - "00f331e549504656ac4d8f1f07e95768": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "0229a8c31ca04b5aa8f36f049b6333c0": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "04e89773ebeb42db87c64d7b9ec7cb26": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "05c5fdaa4d624b44857f6b7d08426e6c": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_71e2f0ea4906451f9555a638f7e713b6", - "IPY_MODEL_fdb9c374bc1e48688e439dc82103c545", - "IPY_MODEL_f14f96faaf3444cda3da447b10d57cee" - ], - "layout": "IPY_MODEL_95007200cc10458882a9e81d055978b9" - } - }, - "08a1df059a4b4c26a87ccea8c5d45bbd": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "09af9df668ad43b4a8bd2c10d36a2aab": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_eb1d39f0a2254be7857baa26432a5e56", - "IPY_MODEL_73cb794c915e493e9623d5f598a71029", - "IPY_MODEL_4d21a6cce39141f594d5b5d71caa5a61" - ], - "layout": "IPY_MODEL_753703fac8c54e268e0a4eb373580a0b" - } - }, - "0a566ba9f07d4a56a69dcfc467c8cfa8": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_c825476eb2a24cc9b8707d23bd0589ed", - "IPY_MODEL_d290678ad04e431ba4923c0dba777399", - "IPY_MODEL_b105661ae91240a9ac645a531f87db35" - ], - "layout": "IPY_MODEL_44cd00539c8c487ba2b58819b96d90b8" - } - }, - "0c0e6363c3d44ad992727ad3031b685a": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_57f9e08a916f4c7584d1e5c625adf509", - "placeholder": "​", - "style": "IPY_MODEL_98f9d91b220d4c86aaa4a20994e858a8", - "value": "Downloading (…)125/data_config.json: 100%" - } - }, - "0e0a2c012cdf402a8c7275d42fb47d16": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_fd06e080368345679c280ae286dcf118", - "max": 10610, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_c385ab3c1ffb4a9391feaf8017ac2124", - "value": 10610 - } - }, - "1181941459e04ce29dfaf48ae1e6dff7": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "143c8f9e754240e6a5a8386da714cb1b": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "1ad860cd423c4fd893066e6925579eb7": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "1fe019c07e0c4f62adfc826f89eb2dbd": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_3b2a66e66c5c4e8a8bafd6d340a779b9", - "IPY_MODEL_6c2891e1aae043afbba8430ae097b882", - "IPY_MODEL_7743a69d92f94e8fa358b458fe8aa4a5" - ], - "layout": "IPY_MODEL_a76ea0a19ea74594a843c62aa6fb03e1" - } - }, - "2083d409adf345e3b6209bf9b4e8d5df": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "2097ee5afa034d5fa208191458925956": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_0c0e6363c3d44ad992727ad3031b685a", - "IPY_MODEL_85667ca054aa431eb50fa26233244e65", - "IPY_MODEL_dceed128f53f4726afe578574c19815d" - ], - "layout": "IPY_MODEL_862c408691204fb2902f588ea8a2f614" - } - }, - "22a5d61c70c545d095c54b4b066d4b87": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "22d2c55179744b4f8d57c15aa60bd0ef": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "230c71ac97db42aeb048c392154c7ad8": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_254086255be24f2b8e0ccfcfb072f379", - "max": 1175, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_d7b8af67a3a8454c97d9a68caff040c7", - "value": 1175 - } - }, - "2450d6be578e4eb6960111c26c55fa43": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "24f54307ee3b499ab7519f713ce994e1": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_f22a4dfe0f6e44c2a0bc37b3fe488439", - "placeholder": "​", - "style": "IPY_MODEL_5c26c4be0a0240499d427e67d89cd869", - "value": " 112/112 [00:00<00:00, 5.97kB/s]" - } - }, - "25081b6df89c4f6890d7e359905b3b8a": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "254086255be24f2b8e0ccfcfb072f379": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "2787caea3cbc4f029e766b35ccaa6013": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "27a5814b873b463d8ec884fce5b3002b": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_f41fc8e604524a118ea2b863a01d5201", - "placeholder": "​", - "style": "IPY_MODEL_a407443370464f68b4f503a5df4b2fd4", - "value": "Downloading (…)_Pooling/config.json: 100%" - } - }, - "28451c97f4fa4b8991186907a50738e8": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_c3b2c605e7e94299a5dadf771baf6f0a", - "max": 350, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_69fa56f6dfde42f883b24069f65ed745", - "value": 350 - } - }, - "2a76d9c1c37a45ab8f4d7da6541c11c3": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "30ad2a5963c04d188b64448513352576": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "327fdab26f534c918c41f9b6eb8da61e": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_22d2c55179744b4f8d57c15aa60bd0ef", - "placeholder": "​", - "style": "IPY_MODEL_face41511e7c4c748e123d176b219f13", - "value": " 1.18k/1.18k [00:00<00:00, 54.2kB/s]" - } - }, - "3418257fe9114669b00bc242c620bfb3": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_640450c5f17646a5b42a1ab13218db23", - "max": 112, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_8a280b8dae964aaea3be0a299e354b17", - "value": 112 - } - }, - "34d9035bbca449fba7a64a7353ce9200": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_cca1ec7ef9174e37a5e17b2a77b39aad", - "placeholder": "​", - "style": "IPY_MODEL_e5616536b1824a388fa9e1ed826307f9", - "value": "Downloading (…)e9125/.gitattributes: 100%" - } - }, - "3897a135ce4d413f95305560e9d5f51d": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "3b2a66e66c5c4e8a8bafd6d340a779b9": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_8a909360e5704220987b703ba4cb3dff", - "placeholder": "​", - "style": "IPY_MODEL_cc4c4534d0b54d0187d5a7ed82a0376e", - "value": "Downloading (…)e9125/tokenizer.json: 100%" - } - }, - "3d3d1beb5ed346209eac8f4b38325ae1": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "3d4d613296054330bedd8eac39978a18": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_b11f3b77d8604aed8051678c6f12b172", - "IPY_MODEL_f7f3cae2779d41b9b08d299596c6026c", - "IPY_MODEL_9db4b498a78e435db8afd06c6632a522" - ], - "layout": "IPY_MODEL_9513eacff50642e7b0579c7de57bdacf" - } - }, - "3e4fbf2956b343218d2aec4d867e1c94": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "3f50cc3742724ab2bf8434c341eee0bd": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "440fcc9ad14241dba8e724aac69803e9": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "449d71ebd4df4ddbb755d30fdff25358": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "44cd00539c8c487ba2b58819b96d90b8": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "483b1513ebcd4e718e69202b30163973": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_7c1fc9a54be14954acd59abde2fca97f", - "placeholder": "​", - "style": "IPY_MODEL_92790cf373b14a058b31fc0d1fcc5c94", - "value": " 13.2k/13.2k [00:00<00:00, 402kB/s]" - } - }, - "4bde08a13c41477ba6c7a4697ab599a0": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "4cbf54c836d74602b94388254b5bf8eb": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "4d1b0ec8de0f466eba688a33a95b6dc8": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "4d21a6cce39141f594d5b5d71caa5a61": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_8c4e637876534783959a3daa9254a65a", - "placeholder": "​", - "style": "IPY_MODEL_1181941459e04ce29dfaf48ae1e6dff7", - "value": " 53.0/53.0 [00:00<00:00, 2.38kB/s]" - } - }, - "4e25e350551e49a2bf50e081fcb0184a": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "4fd8f1d3cb944c2e8a0a806e8c89a26f": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "50d576d87fae494182632cbed50372ac": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_bff335481e214de5b02478938672df38", - "placeholder": "​", - "style": "IPY_MODEL_4e25e350551e49a2bf50e081fcb0184a", - "value": " 350/350 [00:00<00:00, 20.4kB/s]" - } - }, - "51023e9b26e348278fb5788c4b1d3cf1": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_9bac318fe82342c4a9a489034d54b85c", - "placeholder": "​", - "style": "IPY_MODEL_6aa05f91d1a24d8fbe6bd0083649c9d9", - "value": " 190/190 [00:00<00:00, 10.5kB/s]" - } - }, - "530177663d7f47d39c43beb3ce7fab78": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "530c9177c7024e98a24d8db80651ab01": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "54062dca2e8743a68da565014c44304c": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_79efd9353e4f4ee690de5d38bd98c7aa", - "IPY_MODEL_28451c97f4fa4b8991186907a50738e8", - "IPY_MODEL_50d576d87fae494182632cbed50372ac" - ], - "layout": "IPY_MODEL_a57b74c1c4b6497ba8e0dafad0af4ddf" - } - }, - "57f9e08a916f4c7584d1e5c625adf509": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "5a549312926048079d979df5f31f6668": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_8ae23bf62ca44f3e9953ec0b89705a42", - "placeholder": "​", - "style": "IPY_MODEL_6c57b2b268a44ad09c188941cc1fa47a", - "value": " 612/612 [00:00<00:00, 34.3kB/s]" - } - }, - "5c26c4be0a0240499d427e67d89cd869": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "637b076d35c247348d61e924f86c7509": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_b999f7e8580d47bf956607b2dbf59604", - "placeholder": "​", - "style": "IPY_MODEL_835c285bb5bf41999c6e845d485ab845", - "value": "Downloading (…)55de9125/config.json: 100%" - } - }, - "640450c5f17646a5b42a1ab13218db23": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "69fa56f6dfde42f883b24069f65ed745": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "6a664e104c2f462f838738ea966a0115": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "6a7bbaad9509474b8b77bfcd216c2e4a": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "6aa05f91d1a24d8fbe6bd0083649c9d9": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "6c2891e1aae043afbba8430ae097b882": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_8ff8b86348424eda9678a932fbbad132", - "max": 466247, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_d43e683a46ef4d8892635fc1da1bd7c0", - "value": 466247 - } - }, - "6c57b2b268a44ad09c188941cc1fa47a": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "6edf7e9504fe48be802d443b3a0dd3a5": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "6f25e1c5d19e474aaa3f4d5806a53445": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "6fad6161615f45688f3c44d6179ba204": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "70e02ff9e19b4a9f991e0777b42b22c4": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_84727c7020574c809ca155efa0996217", - "IPY_MODEL_0e0a2c012cdf402a8c7275d42fb47d16", - "IPY_MODEL_fe235613377d4443815ad548d7b19a2c" - ], - "layout": "IPY_MODEL_30ad2a5963c04d188b64448513352576" - } - }, - "71e2f0ea4906451f9555a638f7e713b6": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_530c9177c7024e98a24d8db80651ab01", - "placeholder": "​", - "style": "IPY_MODEL_a5abdec4f10f4b5385f01722480139e3", - "value": "Downloading (…)5de9125/modules.json: 100%" - } - }, - "73cb794c915e493e9623d5f598a71029": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_440fcc9ad14241dba8e724aac69803e9", - "max": 53, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_b5608a8229a34e8e8405bf0420104ae4", - "value": 53 - } - }, - "753703fac8c54e268e0a4eb373580a0b": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "7743a69d92f94e8fa358b458fe8aa4a5": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_3e4fbf2956b343218d2aec4d867e1c94", - "placeholder": "​", - "style": "IPY_MODEL_4bde08a13c41477ba6c7a4697ab599a0", - "value": " 466k/466k [00:00<00:00, 905kB/s]" - } - }, - "775697558ad74bd0b01b8728617395df": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "79efd9353e4f4ee690de5d38bd98c7aa": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_08a1df059a4b4c26a87ccea8c5d45bbd", - "placeholder": "​", - "style": "IPY_MODEL_8a37a86d8ed44c1cb70eca68215c913c", - "value": "Downloading (…)okenizer_config.json: 100%" - } - }, - "7c1fc9a54be14954acd59abde2fca97f": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "7c252184b6c449d1bbba4807cde4672a": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_2450d6be578e4eb6960111c26c55fa43", - "placeholder": "​", - "style": "IPY_MODEL_8d97796bc9964114853ce5803584d159", - "value": " 232k/232k [00:00<00:00, 1.37MB/s]" - } - }, - "7e85d153ec15430689bc77ef6bf2b3c8": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_eb16cb52b8cb438984c4c75c3074de84", - "placeholder": "​", - "style": "IPY_MODEL_9ea920d0083f46e2add318c845b3fddf", - "value": "Downloading (…)7e55de9125/vocab.txt: 100%" - } - }, - "7eed45dcf5004dccb734eb315415a535": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "835c285bb5bf41999c6e845d485ab845": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "84727c7020574c809ca155efa0996217": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_0229a8c31ca04b5aa8f36f049b6333c0", - "placeholder": "​", - "style": "IPY_MODEL_775697558ad74bd0b01b8728617395df", - "value": "Downloading (…)7e55de9125/README.md: 100%" - } - }, - "85667ca054aa431eb50fa26233244e65": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_25081b6df89c4f6890d7e359905b3b8a", - "max": 39265, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_6fad6161615f45688f3c44d6179ba204", - "value": 39265 - } - }, - "862c408691204fb2902f588ea8a2f614": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "8a280b8dae964aaea3be0a299e354b17": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "8a37a86d8ed44c1cb70eca68215c913c": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "8a47d05dd32e452b89c1c62f15cb57a3": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_34d9035bbca449fba7a64a7353ce9200", - "IPY_MODEL_230c71ac97db42aeb048c392154c7ad8", - "IPY_MODEL_327fdab26f534c918c41f9b6eb8da61e" - ], - "layout": "IPY_MODEL_6a7bbaad9509474b8b77bfcd216c2e4a" - } - }, - "8a909360e5704220987b703ba4cb3dff": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "8ae23bf62ca44f3e9953ec0b89705a42": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "8c4e637876534783959a3daa9254a65a": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "8d97796bc9964114853ce5803584d159": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "8ff8b86348424eda9678a932fbbad132": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "914892930f74452d96cd787c128c955b": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "92790cf373b14a058b31fc0d1fcc5c94": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "95007200cc10458882a9e81d055978b9": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "9513eacff50642e7b0579c7de57bdacf": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "98f9d91b220d4c86aaa4a20994e858a8": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "9adc61965b944edc9abff9d0de7e745a": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_530177663d7f47d39c43beb3ce7fab78", - "max": 13156, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_1ad860cd423c4fd893066e6925579eb7", - "value": 13156 - } - }, - "9bac318fe82342c4a9a489034d54b85c": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "9db4b498a78e435db8afd06c6632a522": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_d10b85816ca540c0a5456e4126dc99c6", - "placeholder": "​", - "style": "IPY_MODEL_c686e95be6db4030b77cfcc170aaf722", - "value": " 90.9M/90.9M [00:01<00:00, 85.9MB/s]" - } - }, - "9ea920d0083f46e2add318c845b3fddf": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "a09c50472f244d15875780d53e4cb680": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "a407443370464f68b4f503a5df4b2fd4": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "a57b74c1c4b6497ba8e0dafad0af4ddf": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "a5abdec4f10f4b5385f01722480139e3": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "a5cd7c8a692f43f7859b4df8334c3f0f": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "a76ea0a19ea74594a843c62aa6fb03e1": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "ac2387223ca04438a3e4bb59cd2f7019": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "ac4fdd9c828d473384662742dca85aa7": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_c4f9e573288a46d19efae3d101dbbdef", - "IPY_MODEL_9adc61965b944edc9abff9d0de7e745a", - "IPY_MODEL_483b1513ebcd4e718e69202b30163973" - ], - "layout": "IPY_MODEL_04e89773ebeb42db87c64d7b9ec7cb26" - } - }, - "ae251276d3c8482ab768130bde5b9de3": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "ae834f736104436593dd069cf3bdd5a2": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_fa8d7e78724341ca84f0f829e33d756d", - "placeholder": "​", - "style": "IPY_MODEL_4cbf54c836d74602b94388254b5bf8eb", - "value": "Downloading (…)cial_tokens_map.json: 100%" - } - }, - "b105661ae91240a9ac645a531f87db35": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_c987b5a057dc475bb7cb702afe198f74", - "placeholder": "​", - "style": "IPY_MODEL_b571138b82b146f6add4079c7c4a3fea", - "value": " 116/116 [00:00<00:00, 4.55kB/s]" - } - }, - "b11f3b77d8604aed8051678c6f12b172": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_6edf7e9504fe48be802d443b3a0dd3a5", - "placeholder": "​", - "style": "IPY_MODEL_22a5d61c70c545d095c54b4b066d4b87", - "value": "Downloading pytorch_model.bin: 100%" - } - }, - "b27dfc2df0f94537b977baa0211165f7": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_27a5814b873b463d8ec884fce5b3002b", - "IPY_MODEL_d7e199cec93c42308436a13f0101798d", - "IPY_MODEL_51023e9b26e348278fb5788c4b1d3cf1" - ], - "layout": "IPY_MODEL_e37dae5f929042eea3b021d81c7e89f9" - } - }, - "b5608a8229a34e8e8405bf0420104ae4": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "b571138b82b146f6add4079c7c4a3fea": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "b8769ba773c144428df49ed117a4ba2e": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "b889cd1323134a9cb0b2eb81ceb3cde1": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_7e85d153ec15430689bc77ef6bf2b3c8", - "IPY_MODEL_e706e32cd2a642eb9618dd0dd75d7664", - "IPY_MODEL_7c252184b6c449d1bbba4807cde4672a" - ], - "layout": "IPY_MODEL_df86bc204d014d13990917c348b09769" - } - }, - "b999f7e8580d47bf956607b2dbf59604": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "bff335481e214de5b02478938672df38": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "c385ab3c1ffb4a9391feaf8017ac2124": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "c3b2c605e7e94299a5dadf771baf6f0a": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "c4f9e573288a46d19efae3d101dbbdef": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_3f50cc3742724ab2bf8434c341eee0bd", - "placeholder": "​", - "style": "IPY_MODEL_449d71ebd4df4ddbb755d30fdff25358", - "value": "Downloading (…)9125/train_script.py: 100%" - } - }, - "c686e95be6db4030b77cfcc170aaf722": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "c7f8a26c31154c8f9bf65dddb4095c04": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "c825476eb2a24cc9b8707d23bd0589ed": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_00f331e549504656ac4d8f1f07e95768", - "placeholder": "​", - "style": "IPY_MODEL_e6864110fe5b4caf8e1b869162a6c45a", - "value": "Downloading (…)ce_transformers.json: 100%" - } - }, - "c987b5a057dc475bb7cb702afe198f74": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "cafef97c29564483b15c21b647e2e0d1": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "cb4add21dd514bf7a3d2e51726603e36": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "cc4c4534d0b54d0187d5a7ed82a0376e": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "cca1ec7ef9174e37a5e17b2a77b39aad": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "d10b85816ca540c0a5456e4126dc99c6": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "d290678ad04e431ba4923c0dba777399": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_cafef97c29564483b15c21b647e2e0d1", - "max": 116, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_6a664e104c2f462f838738ea966a0115", - "value": 116 - } - }, - "d43e683a46ef4d8892635fc1da1bd7c0": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "d7b8af67a3a8454c97d9a68caff040c7": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "d7e199cec93c42308436a13f0101798d": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_a09c50472f244d15875780d53e4cb680", - "max": 190, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_e1623d2abb5244708dc2d5ae9bce9e6f", - "value": 190 - } - }, - "dceed128f53f4726afe578574c19815d": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_7eed45dcf5004dccb734eb315415a535", - "placeholder": "​", - "style": "IPY_MODEL_a5cd7c8a692f43f7859b4df8334c3f0f", - "value": " 39.3k/39.3k [00:00<00:00, 228kB/s]" - } - }, - "df86bc204d014d13990917c348b09769": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "e0d94012952e43549dd4d158c30c91d6": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "e1623d2abb5244708dc2d5ae9bce9e6f": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "e37dae5f929042eea3b021d81c7e89f9": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "e41fb5a2fbcf485899df455800c29f66": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_637b076d35c247348d61e924f86c7509", - "IPY_MODEL_fb5eba47d398448a991aaca68e4c33f0", - "IPY_MODEL_5a549312926048079d979df5f31f6668" - ], - "layout": "IPY_MODEL_c7f8a26c31154c8f9bf65dddb4095c04" - } - }, - "e5616536b1824a388fa9e1ed826307f9": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "e6864110fe5b4caf8e1b869162a6c45a": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "e706e32cd2a642eb9618dd0dd75d7664": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_cb4add21dd514bf7a3d2e51726603e36", - "max": 231508, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_ac2387223ca04438a3e4bb59cd2f7019", - "value": 231508 - } - }, - "eb16cb52b8cb438984c4c75c3074de84": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "eb1d39f0a2254be7857baa26432a5e56": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_2787caea3cbc4f029e766b35ccaa6013", - "placeholder": "​", - "style": "IPY_MODEL_ae251276d3c8482ab768130bde5b9de3", - "value": "Downloading (…)nce_bert_config.json: 100%" - } - }, - "ee53579d441b4b2a97176d161b479cd1": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_ae834f736104436593dd069cf3bdd5a2", - "IPY_MODEL_3418257fe9114669b00bc242c620bfb3", - "IPY_MODEL_24f54307ee3b499ab7519f713ce994e1" - ], - "layout": "IPY_MODEL_e0d94012952e43549dd4d158c30c91d6" - } - }, - "f14f96faaf3444cda3da447b10d57cee": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_143c8f9e754240e6a5a8386da714cb1b", - "placeholder": "​", - "style": "IPY_MODEL_3d3d1beb5ed346209eac8f4b38325ae1", - "value": " 349/349 [00:00<00:00, 14.8kB/s]" - } - }, - "f22a4dfe0f6e44c2a0bc37b3fe488439": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "f41fc8e604524a118ea2b863a01d5201": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "f7f3cae2779d41b9b08d299596c6026c": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_b8769ba773c144428df49ed117a4ba2e", - "max": 90888945, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_914892930f74452d96cd787c128c955b", - "value": 90888945 - } - }, - "fa8d7e78724341ca84f0f829e33d756d": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "face41511e7c4c748e123d176b219f13": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "fb5eba47d398448a991aaca68e4c33f0": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_2083d409adf345e3b6209bf9b4e8d5df", - "max": 612, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_4d1b0ec8de0f466eba688a33a95b6dc8", - "value": 612 - } - }, - "fd06e080368345679c280ae286dcf118": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "fdb9c374bc1e48688e439dc82103c545": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_6f25e1c5d19e474aaa3f4d5806a53445", - "max": 349, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_4fd8f1d3cb944c2e8a0a806e8c89a26f", - "value": 349 - } - }, - "fe235613377d4443815ad548d7b19a2c": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_2a76d9c1c37a45ab8f4d7da6541c11c3", - "placeholder": "​", - "style": "IPY_MODEL_3897a135ce4d413f95305560e9d5f51d", - "value": " 10.6k/10.6k [00:00<00:00, 446kB/s]" - } - } - } } }, "nbformat": 4, "nbformat_minor": 5 -} +} \ No newline at end of file From 8c9187f18701cb4c522e15a792c54c83d725c453 Mon Sep 17 00:00:00 2001 From: Yan Savitski Date: Thu, 6 Jul 2023 14:37:48 +0200 Subject: [PATCH 28/54] Remove intall log --- .../01-keyword-querying-filtering.ipynb | 868 ++++++++++++++++-- 1 file changed, 769 insertions(+), 99 deletions(-) diff --git a/colab-notebooks-examples/search/01-keyword-querying-filtering.ipynb b/colab-notebooks-examples/search/01-keyword-querying-filtering.ipynb index 13adeed5..6417b00e 100644 --- a/colab-notebooks-examples/search/01-keyword-querying-filtering.ipynb +++ b/colab-notebooks-examples/search/01-keyword-querying-filtering.ipynb @@ -4,7 +4,7 @@ "metadata": { "colab": { "provenance": [], - "authorship_tag": "ABX9TyP9TEKGqhDbAR44WkL6Ryvw" + "authorship_tag": "ABX9TyNRZjawSxbwLumvwiVvjXfX" }, "kernelspec": { "name": "python3", @@ -17,7 +17,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 25, + "execution_count": null, "metadata": { "id": "cSj1acY8uPR2" }, @@ -27,6 +27,9 @@ "!pip install -qU elasticsearch sentence-transformers==2.2.2\n", "from elasticsearch import Elasticsearch\n", "import pandas as pd\n", + "from google.colab import data_table\n", + "\n", + "data_table.enable_dataframe_formatter()\n", "# Create the client instance\n", "cloud_id = '\\u003Ccloud_id>' #@param {type:\"string\"}\n", "elastic_username = 'elastic' #@param {type:\"string\"}\n", @@ -84,14 +87,7 @@ "* **match**.\n", " The standard query for performing full text queries, including fuzzy matching and phrase or proximity queries.\n", "* **multi-match**.\n", - " The multi-field version of the match query.\n", - "\n", - "### Compound query\n", - "\n", - "Compound queries wrap other compound or leaf queries, either to combine their results and scores, to change their behaviour, or to switch from query to filter context.\n", - "\n", - "* **boosting query**.\n", - " Return documents which match a positive query, but reduce the score of documents which also match a negative query." + " The multi-field version of the match query." ], "metadata": { "id": "22onltbgxxGm" @@ -128,12 +124,12 @@ "metadata": { "colab": { "base_uri": "https://localhost:8080/", - "height": 280 + "height": 197 }, "id": "q_OE0XVx6_qX", - "outputId": "771bf9d6-a770-43e7-9f19-5ab1b40f6960" + "outputId": "6a1d7760-5fb9-4809-e060-e35a398ed3c4" }, - "execution_count": 52, + "execution_count": 41, "outputs": [ { "output_type": "execute_result", @@ -162,7 +158,7 @@ ], "text/html": [ "\n", - "
\n", + "
\n", "
\n", "
\n", "\n\n \n`;\n parentElement.appendChild(quickchartButtonContainerElement);\n \nfunction displayQuickchartButton(domScope) {\n let quickchartButtonEl =\n domScope.querySelector('#df-cf91b4fa-a3ee-4684-852b-c66ab245eefb button.colab-df-quickchart');\n quickchartButtonEl.style.display =\n google.colab.kernel.accessAllowed ? 'block' : 'none';\n}\n\n displayQuickchartButton(parentElement);\n }\n " + "application/vnd.google.colaboratory.module+javascript": "\n import \"https://ssl.gstatic.com/colaboratory/data_table/881c4a0d49046431/data_table.js\";\n\n const table = window.createDataTable({\n data: [[{\n 'v': 0,\n 'f': \"0\",\n },\n\"3cXgIYkBfxlbyhU5Krfc\",\n\"2019-10-29\",\n{\n 'v': 0.7042277,\n 'f': \"0.7042277\",\n },\n\"The Pragmatic Programmer: Your Journey to Mastery\",\n\"A guide to pragmatic programming for software engineers and developers\"],\n [{\n 'v': 1,\n 'f': \"1\",\n },\n\"3sXgIYkBfxlbyhU5Krfc\",\n\"2019-05-03\",\n{\n 'v': 0.7042277,\n 'f': \"0.7042277\",\n },\n\"Python Crash Course\",\n\"A fast-paced, no-nonsense guide to programming in Python\"],\n [{\n 'v': 2,\n 'f': \"2\",\n },\n\"5MXgIYkBfxlbyhU5Krfd\",\n\"2011-05-13\",\n{\n 'v': 0.6771651,\n 'f': \"0.6771651\",\n },\n\"The Clean Coder: A Code of Conduct for Professional Programmers\",\n\"A guide to professional conduct in the field of software engineering\"],\n [{\n 'v': 3,\n 'f': \"3\",\n },\n\"4MXgIYkBfxlbyhU5Krfc\",\n\"2008-08-11\",\n{\n 'v': 0.62883455,\n 'f': \"0.62883455\",\n },\n\"Clean Code: A Handbook of Agile Software Craftsmanship\",\n\"A guide to writing code that is easy to read, understand and maintain\"],\n [{\n 'v': 4,\n 'f': \"4\",\n },\n\"48XgIYkBfxlbyhU5Krfd\",\n\"1994-10-31\",\n{\n 'v': 0.62883455,\n 'f': \"0.62883455\",\n },\n\"Design Patterns: Elements of Reusable Object-Oriented Software\",\n\"Guide to design patterns that can be used in any object-oriented language\"]],\n columns: [[\"number\", \"index\"], [\"string\", \"id\"], [\"string\", \"publication_date\"], [\"number\", \"score\"], [\"string\", \"title\"], [\"string\", \"summary\"]],\n columnOptions: [{\"width\": \"1px\", \"className\": \"index_column\"}],\n rowsPerPage: 25,\n helpUrl: \"https://colab.research.google.com/notebooks/data_table.ipynb\",\n suppressOutputScrolling: true,\n minimumWidth: undefined,\n });\n \n function appendQuickchartButton(parentElement) {\n let quickchartButtonContainerElement = document.createElement('div');\n quickchartButtonContainerElement.innerHTML = `\n
\n \n
\n\n\n\n \n`;\n parentElement.appendChild(quickchartButtonContainerElement);\n \nfunction displayQuickchartButton(domScope) {\n let quickchartButtonEl =\n domScope.querySelector('#df-fe653608-986c-4ebe-b427-0ec37ccb087b button.colab-df-quickchart');\n quickchartButtonEl.style.display =\n google.colab.kernel.accessAllowed ? 'block' : 'none';\n}\n\n displayQuickchartButton(parentElement);\n }\n " }, "metadata": {}, - "execution_count": 52 + "execution_count": 41 } ] }, @@ -322,7 +318,9 @@ "source": [ "### Multi-match query\n", "\n", - "The `multi_match` query builds on the match query to allow multi-field queries:" + "The `multi_match` query builds on the match query to allow multi-field queries\n", + "\n", + "[Read more](https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-multi-match-query.html)" ], "metadata": { "id": "H-n6hoVsfAqc" @@ -341,28 +339,37 @@ "pd.DataFrame.from_records(pretty_response_transform(response))" ], "metadata": { - "id": "TRmGYM94gCtb", - "outputId": "bc6f9c42-9bc4-48a8-ddf8-be7be7e37609", "colab": { "base_uri": "https://localhost:8080/", - "height": 111 - } + "height": 154 + }, + "id": "TRmGYM94gCtb", + "outputId": "dc58b19f-e585-4d0a-d065-ac3fc18ae123" }, - "execution_count": 69, + "execution_count": 50, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ - " id publication_date score title \\\n", - "0 3sXgIYkBfxlbyhU5Krfc 2019-05-03 2.4939 Python Crash Course \n", + " id publication_date score \\\n", + "0 4sXgIYkBfxlbyhU5Krfc 2018-12-04 2.030753 \n", + "1 5cXgIYkBfxlbyhU5Krfd 2008-05-15 1.706409 \n", + "2 4cXgIYkBfxlbyhU5Krfc 2015-03-27 1.636058 \n", + "\n", + " title \\\n", + "0 Eloquent JavaScript \n", + "1 JavaScript: The Good Parts \n", + "2 You Don't Know JS: Up & Going \n", "\n", " summary \n", - "0 A fast-paced, no-nonsense guide to programming... " + "0 A modern introduction to programming \n", + "1 A deep dive into the parts of JavaScript that ... \n", + "2 Introduction to JavaScript and programming as ... " ], "text/html": [ "\n", - "
\n", + "
\n", "
\n", "
\n", "\n\n \n`;\n parentElement.appendChild(quickchartButtonContainerElement);\n \nfunction displayQuickchartButton(domScope) {\n let quickchartButtonEl =\n domScope.querySelector('#df-72b0a3de-0bf8-4ffd-8ee4-8487e8903803 button.colab-df-quickchart');\n quickchartButtonEl.style.display =\n google.colab.kernel.accessAllowed ? 'block' : 'none';\n}\n\n displayQuickchartButton(parentElement);\n }\n " + "application/vnd.google.colaboratory.module+javascript": "\n import \"https://ssl.gstatic.com/colaboratory/data_table/881c4a0d49046431/data_table.js\";\n\n const table = window.createDataTable({\n data: [[{\n 'v': 0,\n 'f': \"0\",\n },\n\"4sXgIYkBfxlbyhU5Krfc\",\n\"2018-12-04\",\n{\n 'v': 2.0307527,\n 'f': \"2.0307527\",\n },\n\"Eloquent JavaScript\",\n\"A modern introduction to programming\"],\n [{\n 'v': 1,\n 'f': \"1\",\n },\n\"5cXgIYkBfxlbyhU5Krfd\",\n\"2008-05-15\",\n{\n 'v': 1.7064086,\n 'f': \"1.7064086\",\n },\n\"JavaScript: The Good Parts\",\n\"A deep dive into the parts of JavaScript that are essential to writing maintainable code\"],\n [{\n 'v': 2,\n 'f': \"2\",\n },\n\"4cXgIYkBfxlbyhU5Krfc\",\n\"2015-03-27\",\n{\n 'v': 1.6360576,\n 'f': \"1.6360576\",\n },\n\"You Don't Know JS: Up & Going\",\n\"Introduction to JavaScript and programming as a whole\"]],\n columns: [[\"number\", \"index\"], [\"string\", \"id\"], [\"string\", \"publication_date\"], [\"number\", \"score\"], [\"string\", \"title\"], [\"string\", \"summary\"]],\n columnOptions: [{\"width\": \"1px\", \"className\": \"index_column\"}],\n rowsPerPage: 25,\n helpUrl: \"https://colab.research.google.com/notebooks/data_table.ipynb\",\n suppressOutputScrolling: true,\n minimumWidth: undefined,\n });\n \n function appendQuickchartButton(parentElement) {\n let quickchartButtonContainerElement = document.createElement('div');\n quickchartButtonContainerElement.innerHTML = `\n
\n \n
\n\n\n\n \n`;\n parentElement.appendChild(quickchartButtonContainerElement);\n \nfunction displayQuickchartButton(domScope) {\n let quickchartButtonEl =\n domScope.querySelector('#df-d42669b9-d6a4-4402-a5be-2092a0f1ebbd button.colab-df-quickchart');\n quickchartButtonEl.style.display =\n google.colab.kernel.accessAllowed ? 'block' : 'none';\n}\n\n displayQuickchartButton(parentElement);\n }\n " }, "metadata": {}, - "execution_count": 69 + "execution_count": 50 } ] }, { "cell_type": "markdown", "source": [ - "### Boosting query\n", - "\n", - "Returns documents matching a `positive` query while reducing the `relevance score` of documents that also match a `negative` query.\n", - "\n", - "You can use the `boosting` query to demote certain documents without excluding them from the search results." + "Individual fields can be boosted with the caret (^) notation" ], "metadata": { "id": "FnBeBIVKiPnS" @@ -502,52 +521,46 @@ "cell_type": "code", "source": [ "response = client.search(index=\"book_index\", query={\n", - " \"boosting\": {\n", - " \"positive\": {\n", - " \"term\": {\n", - " \"summary\": \"javascript\"\n", - " }\n", - " },\n", - " \"negative\": {\n", - " \"term\": {\n", - " \"summary\": \"introduction\"\n", - " }\n", - " },\n", - " \"negative_boost\": 0.5\n", + " \"multi_match\": {\n", + " \"query\": \"javascript\",\n", + " \"fields\": [\"summary\", \"title^3\"]\n", " }\n", " })\n", "\n", "pd.DataFrame.from_records(pretty_response_transform(response))" ], "metadata": { - "id": "_aI7hnH0ixkG", - "outputId": "f1bb946a-953c-4d63-afa1-42d1d9778b63", "colab": { "base_uri": "https://localhost:8080/", - "height": 166 - } + "height": 154 + }, + "id": "_aI7hnH0ixkG", + "outputId": "2af27f3d-f9fd-4c7a-cab5-7cb06132582c" }, - "execution_count": 63, + "execution_count": 49, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " id publication_date score \\\n", - "0 5cXgIYkBfxlbyhU5Krfd 2008-05-15 1.254593 \n", - "1 4cXgIYkBfxlbyhU5Krfc 2015-03-27 0.818029 \n", + "0 4sXgIYkBfxlbyhU5Krfc 2018-12-04 6.092258 \n", + "1 5cXgIYkBfxlbyhU5Krfd 2008-05-15 5.119226 \n", + "2 4cXgIYkBfxlbyhU5Krfc 2015-03-27 1.636058 \n", "\n", " title \\\n", - "0 JavaScript: The Good Parts \n", - "1 You Don't Know JS: Up & Going \n", + "0 Eloquent JavaScript \n", + "1 JavaScript: The Good Parts \n", + "2 You Don't Know JS: Up & Going \n", "\n", " summary \n", - "0 A deep dive into the parts of JavaScript that ... \n", - "1 Introduction to JavaScript and programming as ... " + "0 A modern introduction to programming \n", + "1 A deep dive into the parts of JavaScript that ... \n", + "2 Introduction to JavaScript and programming as ... " ], "text/html": [ "\n", - "
\n", + "
\n", "
\n", "
\n", "\n\n \n`;\n parentElement.appendChild(quickchartButtonContainerElement);\n \nfunction displayQuickchartButton(domScope) {\n let quickchartButtonEl =\n domScope.querySelector('#df-7325ffa0-ded2-46c6-9d3f-b04c2f070c04 button.colab-df-quickchart');\n quickchartButtonEl.style.display =\n google.colab.kernel.accessAllowed ? 'block' : 'none';\n}\n\n displayQuickchartButton(parentElement);\n }\n " + "application/vnd.google.colaboratory.module+javascript": "\n import \"https://ssl.gstatic.com/colaboratory/data_table/881c4a0d49046431/data_table.js\";\n\n const table = window.createDataTable({\n data: [[{\n 'v': 0,\n 'f': \"0\",\n },\n\"4sXgIYkBfxlbyhU5Krfc\",\n\"2018-12-04\",\n{\n 'v': 6.0922585,\n 'f': \"6.0922585\",\n },\n\"Eloquent JavaScript\",\n\"A modern introduction to programming\"],\n [{\n 'v': 1,\n 'f': \"1\",\n },\n\"5cXgIYkBfxlbyhU5Krfd\",\n\"2008-05-15\",\n{\n 'v': 5.1192265,\n 'f': \"5.1192265\",\n },\n\"JavaScript: The Good Parts\",\n\"A deep dive into the parts of JavaScript that are essential to writing maintainable code\"],\n [{\n 'v': 2,\n 'f': \"2\",\n },\n\"4cXgIYkBfxlbyhU5Krfc\",\n\"2015-03-27\",\n{\n 'v': 1.6360576,\n 'f': \"1.6360576\",\n },\n\"You Don't Know JS: Up & Going\",\n\"Introduction to JavaScript and programming as a whole\"]],\n columns: [[\"number\", \"index\"], [\"string\", \"id\"], [\"string\", \"publication_date\"], [\"number\", \"score\"], [\"string\", \"title\"], [\"string\", \"summary\"]],\n columnOptions: [{\"width\": \"1px\", \"className\": \"index_column\"}],\n rowsPerPage: 25,\n helpUrl: \"https://colab.research.google.com/notebooks/data_table.ipynb\",\n suppressOutputScrolling: true,\n minimumWidth: undefined,\n });\n \n function appendQuickchartButton(parentElement) {\n let quickchartButtonContainerElement = document.createElement('div');\n quickchartButtonContainerElement.innerHTML = `\n
\n \n
\n\n\n\n \n`;\n parentElement.appendChild(quickchartButtonContainerElement);\n \nfunction displayQuickchartButton(domScope) {\n let quickchartButtonEl =\n domScope.querySelector('#df-85cba031-2c1b-41fe-95b1-be7944cc54f1 button.colab-df-quickchart');\n quickchartButtonEl.style.display =\n google.colab.kernel.accessAllowed ? 'block' : 'none';\n}\n\n displayQuickchartButton(parentElement);\n }\n " }, "metadata": {}, - "execution_count": 63 + "execution_count": 49 } ] }, @@ -683,7 +704,9 @@ "source": [ "### Prefix search\n", "\n", - "Returns documents that contain a specific prefix in a provided field" + "Returns documents that contain a specific prefix in a provided field\n", + "\n", + "[Read more](https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-prefix-query.html)" ], "metadata": { "id": "yXipv0xSk-nK" @@ -703,14 +726,14 @@ "pd.DataFrame.from_records(pretty_response_transform(response))" ], "metadata": { - "id": "dCr1pwlqlOE7", - "outputId": "61b379bd-20c2-432e-d8a8-83377b72469a", "colab": { "base_uri": "https://localhost:8080/", - "height": 149 - } + "height": 132 + }, + "id": "dCr1pwlqlOE7", + "outputId": "ae55cd66-0ded-4868-dac5-5815ea317c44" }, - "execution_count": 71, + "execution_count": 48, "outputs": [ { "output_type": "execute_result", @@ -726,7 +749,7 @@ ], "text/html": [ "\n", - "
\n", + "
\n", "
\n", "
\n", "\n\n \n`;\n parentElement.appendChild(quickchartButtonContainerElement);\n \nfunction displayQuickchartButton(domScope) {\n let quickchartButtonEl =\n domScope.querySelector('#df-9c37ee42-e6ce-4a80-a0fa-95ee3eb8a0bd button.colab-df-quickchart');\n quickchartButtonEl.style.display =\n google.colab.kernel.accessAllowed ? 'block' : 'none';\n}\n\n displayQuickchartButton(parentElement);\n }\n " + "application/vnd.google.colaboratory.module+javascript": "\n import \"https://ssl.gstatic.com/colaboratory/data_table/881c4a0d49046431/data_table.js\";\n\n const table = window.createDataTable({\n data: [[{\n 'v': 0,\n 'f': \"0\",\n },\n\"4sXgIYkBfxlbyhU5Krfc\",\n\"2018-12-04\",\n{\n 'v': 1.0,\n 'f': \"1.0\",\n },\n\"Eloquent JavaScript\",\n\"A modern introduction to programming\"],\n [{\n 'v': 1,\n 'f': \"1\",\n },\n\"5cXgIYkBfxlbyhU5Krfd\",\n\"2008-05-15\",\n{\n 'v': 1.0,\n 'f': \"1.0\",\n },\n\"JavaScript: The Good Parts\",\n\"A deep dive into the parts of JavaScript that are essential to writing maintainable code\"]],\n columns: [[\"number\", \"index\"], [\"string\", \"id\"], [\"string\", \"publication_date\"], [\"number\", \"score\"], [\"string\", \"title\"], [\"string\", \"summary\"]],\n columnOptions: [{\"width\": \"1px\", \"className\": \"index_column\"}],\n rowsPerPage: 25,\n helpUrl: \"https://colab.research.google.com/notebooks/data_table.ipynb\",\n suppressOutputScrolling: true,\n minimumWidth: undefined,\n });\n \n function appendQuickchartButton(parentElement) {\n let quickchartButtonContainerElement = document.createElement('div');\n quickchartButtonContainerElement.innerHTML = `\n
\n \n
\n\n\n\n \n`;\n parentElement.appendChild(quickchartButtonContainerElement);\n \nfunction displayQuickchartButton(domScope) {\n let quickchartButtonEl =\n domScope.querySelector('#df-14c8ac74-36d1-4dbe-b3a6-c23015d99d02 button.colab-df-quickchart');\n quickchartButtonEl.style.display =\n google.colab.kernel.accessAllowed ? 'block' : 'none';\n}\n\n displayQuickchartButton(parentElement);\n }\n " }, "metadata": {}, - "execution_count": 71 + "execution_count": 48 } ] }, @@ -870,6 +893,8 @@ "* Removing a character (black → lack)\n", "* Inserting a character (sic → sick)\n", "* Transposing two adjacent characters (act → cat)\n", + "\n", + "[Read more](https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-fuzzy-query.html)\n", "\n" ], "metadata": { @@ -890,14 +915,14 @@ "pd.DataFrame.from_records(pretty_response_transform(response))" ], "metadata": { - "id": "dTMc-IxPmbtC", - "outputId": "57ce94e6-0335-4fc0-ce37-db14568c34a1", "colab": { "base_uri": "https://localhost:8080/", - "height": 149 - } + "height": 132 + }, + "id": "dTMc-IxPmbtC", + "outputId": "9acf74fd-bc16-45df-80f3-49504860b10a" }, - "execution_count": 78, + "execution_count": 47, "outputs": [ { "output_type": "execute_result", @@ -917,7 +942,7 @@ ], "text/html": [ "\n", - "
\n", + "
\n", "
\n", "
\n", "\n", + "\n", + " \n", + "
\n", + "
\n", + " " + ], + "application/vnd.google.colaboratory.module+javascript": "\n import \"https://ssl.gstatic.com/colaboratory/data_table/881c4a0d49046431/data_table.js\";\n\n const table = window.createDataTable({\n data: [[{\n 'v': 0,\n 'f': \"0\",\n },\n\"4sXgIYkBfxlbyhU5Krfc\",\n\"2018-12-04\",\n{\n 'v': 1.6246022,\n 'f': \"1.6246022\",\n },\n\"Eloquent JavaScript\",\n\"A modern introduction to programming\"],\n [{\n 'v': 1,\n 'f': \"1\",\n },\n\"5cXgIYkBfxlbyhU5Krfd\",\n\"2008-05-15\",\n{\n 'v': 1.3651271,\n 'f': \"1.3651271\",\n },\n\"JavaScript: The Good Parts\",\n\"A deep dive into the parts of JavaScript that are essential to writing maintainable code\"]],\n columns: [[\"number\", \"index\"], [\"string\", \"id\"], [\"string\", \"publication_date\"], [\"number\", \"score\"], [\"string\", \"title\"], [\"string\", \"summary\"]],\n columnOptions: [{\"width\": \"1px\", \"className\": \"index_column\"}],\n rowsPerPage: 25,\n helpUrl: \"https://colab.research.google.com/notebooks/data_table.ipynb\",\n suppressOutputScrolling: true,\n minimumWidth: undefined,\n });\n \n function appendQuickchartButton(parentElement) {\n let quickchartButtonContainerElement = document.createElement('div');\n quickchartButtonContainerElement.innerHTML = `\n
\n \n
\n\n\n\n \n`;\n parentElement.appendChild(quickchartButtonContainerElement);\n \nfunction displayQuickchartButton(domScope) {\n let quickchartButtonEl =\n domScope.querySelector('#df-39be6dc9-f7bc-45e8-811b-98a2b550ecf2 button.colab-df-quickchart');\n quickchartButtonEl.style.display =\n google.colab.kernel.accessAllowed ? 'block' : 'none';\n}\n\n displayQuickchartButton(parentElement);\n }\n " + }, + "metadata": {}, + "execution_count": 47 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "## Filtering\n", + "\n", + "In a filter context, a query clause answers the question *“Does this document match this query clause?”* The answer is a simple Yes or No — no scores are calculated. Filter context is mostly used for filtering structured data, e.g.\n", + "* Does this `timestamp` fall into the range 2015 to 2016?\n", + "* Is the `status` field set to `\"published\"`?\n", + "\n", + "Filter context is in effect whenever a query clause is passed to a `filter` parameter, such as the `filter` or `must_not` parameters in the `bool` query.\n", + "\n", + "[Read more](https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-bool-query.html)" + ], + "metadata": { + "id": "PG9TYqL-8H29" + } + }, + { + "cell_type": "markdown", + "source": [ + "### **bool.must**\n", + "The clause (query) must appear in matching documents and will contribute to the score." + ], + "metadata": { + "id": "7do0lmxA_v25" + } + }, + { + "cell_type": "code", + "source": [ + "response = client.search(index=\"book_index\", query={\n", + " \"bool\": {\n", + " \"must\": [{\n", + " \"term\": {\n", + " \"summary\": \"guide\"\n", + " }\n", + " }, {\n", + " \"term\": {\n", + " \"summary\": \"code\"\n", + " }\n", + " }]\n", + " }\n", + " })\n", + "\n", + "pd.DataFrame.from_records(pretty_response_transform(response))" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 111 + }, + "id": "8_C-JHRQFDl7", + "outputId": "be59d18b-5e20-4db0-8697-2e7746251742" + }, + "execution_count": 46, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " id publication_date score \\\n", + "0 4MXgIYkBfxlbyhU5Krfc 2008-08-11 1.97297 \n", + "\n", + " title \\\n", + "0 Clean Code: A Handbook of Agile Software Craft... \n", + "\n", + " summary \n", + "0 A guide to writing code that is easy to read, ... " + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idpublication_datescoretitlesummary
04MXgIYkBfxlbyhU5Krfc2008-08-111.97297Clean Code: A Handbook of Agile Software Craft...A guide to writing code that is easy to read, ...
\n", + "
\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "
\n", + "
\n", + " " + ], + "application/vnd.google.colaboratory.module+javascript": "\n import \"https://ssl.gstatic.com/colaboratory/data_table/881c4a0d49046431/data_table.js\";\n\n const table = window.createDataTable({\n data: [[{\n 'v': 0,\n 'f': \"0\",\n },\n\"4MXgIYkBfxlbyhU5Krfc\",\n\"2008-08-11\",\n{\n 'v': 1.9729705,\n 'f': \"1.9729705\",\n },\n\"Clean Code: A Handbook of Agile Software Craftsmanship\",\n\"A guide to writing code that is easy to read, understand and maintain\"]],\n columns: [[\"number\", \"index\"], [\"string\", \"id\"], [\"string\", \"publication_date\"], [\"number\", \"score\"], [\"string\", \"title\"], [\"string\", \"summary\"]],\n columnOptions: [{\"width\": \"1px\", \"className\": \"index_column\"}],\n rowsPerPage: 25,\n helpUrl: \"https://colab.research.google.com/notebooks/data_table.ipynb\",\n suppressOutputScrolling: true,\n minimumWidth: undefined,\n });\n \n function appendQuickchartButton(parentElement) {\n let quickchartButtonContainerElement = document.createElement('div');\n quickchartButtonContainerElement.innerHTML = `\n
\n \n
\n\n\n\n \n`;\n parentElement.appendChild(quickchartButtonContainerElement);\n \nfunction displayQuickchartButton(domScope) {\n let quickchartButtonEl =\n domScope.querySelector('#df-3d2b4397-5161-4160-a602-92c29337f1d1 button.colab-df-quickchart');\n quickchartButtonEl.style.display =\n google.colab.kernel.accessAllowed ? 'block' : 'none';\n}\n\n displayQuickchartButton(parentElement);\n }\n " + }, + "metadata": {}, + "execution_count": 46 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "### **bool.should**\n", + "\n", + "The clause (query) should appear in the matching document." + ], + "metadata": { + "id": "eNlncytRIl9h" + } + }, + { + "cell_type": "code", + "source": [ + "response = client.search(index=\"book_index\", query={\n", + " \"bool\": {\n", + " \"should\": [{\n", + " \"term\": {\n", + " \"summary\": \"guide\"\n", + " }\n", + " }, {\n", + " \"term\": {\n", + " \"summary\": \"code\"\n", + " }\n", + " }]\n", + " }\n", + " })\n", + "\n", + "pd.DataFrame.from_records(pretty_response_transform(response))" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 219 + }, + "id": "GRm9T1vfIsmF", + "outputId": "d9fb6936-3ffb-4fff-9467-1f7ac7b41490" + }, + "execution_count": 44, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " id publication_date score \\\n", + "0 4MXgIYkBfxlbyhU5Krfc 2008-08-11 1.972970 \n", + "1 5cXgIYkBfxlbyhU5Krfd 2008-05-15 1.254593 \n", + "2 3cXgIYkBfxlbyhU5Krfc 2019-10-29 0.704228 \n", + "3 3sXgIYkBfxlbyhU5Krfc 2019-05-03 0.704228 \n", + "4 5MXgIYkBfxlbyhU5Krfd 2011-05-13 0.677165 \n", + "5 48XgIYkBfxlbyhU5Krfd 1994-10-31 0.628835 \n", + "\n", + " title \\\n", + "0 Clean Code: A Handbook of Agile Software Craft... \n", + "1 JavaScript: The Good Parts \n", + "2 The Pragmatic Programmer: Your Journey to Mastery \n", + "3 Python Crash Course \n", + "4 The Clean Coder: A Code of Conduct for Profess... \n", + "5 Design Patterns: Elements of Reusable Object-O... \n", + "\n", + " summary \n", + "0 A guide to writing code that is easy to read, ... \n", + "1 A deep dive into the parts of JavaScript that ... \n", + "2 A guide to pragmatic programming for software ... \n", + "3 A fast-paced, no-nonsense guide to programming... \n", + "4 A guide to professional conduct in the field o... \n", + "5 Guide to design patterns that can be used in a... " + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idpublication_datescoretitlesummary
04MXgIYkBfxlbyhU5Krfc2008-08-111.972970Clean Code: A Handbook of Agile Software Craft...A guide to writing code that is easy to read, ...
15cXgIYkBfxlbyhU5Krfd2008-05-151.254593JavaScript: The Good PartsA deep dive into the parts of JavaScript that ...
23cXgIYkBfxlbyhU5Krfc2019-10-290.704228The Pragmatic Programmer: Your Journey to MasteryA guide to pragmatic programming for software ...
33sXgIYkBfxlbyhU5Krfc2019-05-030.704228Python Crash CourseA fast-paced, no-nonsense guide to programming...
45MXgIYkBfxlbyhU5Krfd2011-05-130.677165The Clean Coder: A Code of Conduct for Profess...A guide to professional conduct in the field o...
548XgIYkBfxlbyhU5Krfd1994-10-310.628835Design Patterns: Elements of Reusable Object-O...Guide to design patterns that can be used in a...
\n", + "
\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "
\n", + "
\n", + " " + ], + "application/vnd.google.colaboratory.module+javascript": "\n import \"https://ssl.gstatic.com/colaboratory/data_table/881c4a0d49046431/data_table.js\";\n\n const table = window.createDataTable({\n data: [[{\n 'v': 0,\n 'f': \"0\",\n },\n\"4MXgIYkBfxlbyhU5Krfc\",\n\"2008-08-11\",\n{\n 'v': 1.9729705,\n 'f': \"1.9729705\",\n },\n\"Clean Code: A Handbook of Agile Software Craftsmanship\",\n\"A guide to writing code that is easy to read, understand and maintain\"],\n [{\n 'v': 1,\n 'f': \"1\",\n },\n\"5cXgIYkBfxlbyhU5Krfd\",\n\"2008-05-15\",\n{\n 'v': 1.2545931,\n 'f': \"1.2545931\",\n },\n\"JavaScript: The Good Parts\",\n\"A deep dive into the parts of JavaScript that are essential to writing maintainable code\"],\n [{\n 'v': 2,\n 'f': \"2\",\n },\n\"3cXgIYkBfxlbyhU5Krfc\",\n\"2019-10-29\",\n{\n 'v': 0.7042277,\n 'f': \"0.7042277\",\n },\n\"The Pragmatic Programmer: Your Journey to Mastery\",\n\"A guide to pragmatic programming for software engineers and developers\"],\n [{\n 'v': 3,\n 'f': \"3\",\n },\n\"3sXgIYkBfxlbyhU5Krfc\",\n\"2019-05-03\",\n{\n 'v': 0.7042277,\n 'f': \"0.7042277\",\n },\n\"Python Crash Course\",\n\"A fast-paced, no-nonsense guide to programming in Python\"],\n [{\n 'v': 4,\n 'f': \"4\",\n },\n\"5MXgIYkBfxlbyhU5Krfd\",\n\"2011-05-13\",\n{\n 'v': 0.6771651,\n 'f': \"0.6771651\",\n },\n\"The Clean Coder: A Code of Conduct for Professional Programmers\",\n\"A guide to professional conduct in the field of software engineering\"],\n [{\n 'v': 5,\n 'f': \"5\",\n },\n\"48XgIYkBfxlbyhU5Krfd\",\n\"1994-10-31\",\n{\n 'v': 0.62883455,\n 'f': \"0.62883455\",\n },\n\"Design Patterns: Elements of Reusable Object-Oriented Software\",\n\"Guide to design patterns that can be used in any object-oriented language\"]],\n columns: [[\"number\", \"index\"], [\"string\", \"id\"], [\"string\", \"publication_date\"], [\"number\", \"score\"], [\"string\", \"title\"], [\"string\", \"summary\"]],\n columnOptions: [{\"width\": \"1px\", \"className\": \"index_column\"}],\n rowsPerPage: 25,\n helpUrl: \"https://colab.research.google.com/notebooks/data_table.ipynb\",\n suppressOutputScrolling: true,\n minimumWidth: undefined,\n });\n \n function appendQuickchartButton(parentElement) {\n let quickchartButtonContainerElement = document.createElement('div');\n quickchartButtonContainerElement.innerHTML = `\n
\n \n
\n\n\n\n \n`;\n parentElement.appendChild(quickchartButtonContainerElement);\n \nfunction displayQuickchartButton(domScope) {\n let quickchartButtonEl =\n domScope.querySelector('#df-7b970c43-3212-4654-a3ff-1fbcd54e4493 button.colab-df-quickchart');\n quickchartButtonEl.style.display =\n google.colab.kernel.accessAllowed ? 'block' : 'none';\n}\n\n displayQuickchartButton(parentElement);\n }\n " + }, + "metadata": {}, + "execution_count": 44 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "### **bool.filter**\n", + "\n", + "The clause (query) must appear in matching documents. **However unlike `must` the `score` of the query will be ignored.** Filter clauses are executed in filter context, meaning that scoring is ignored and clauses are considered for caching." + ], + "metadata": { + "id": "PGTFXUIkJG4t" + } + }, + { + "cell_type": "code", + "source": [ + "response = client.search(index=\"book_index\", query={\n", + " \"bool\": {\n", + " \"filter\": [{\n", + " \"term\": {\n", + " \"summary\": \"guide\"\n", + " }\n", + " }]\n", + " }\n", + " })\n", + "\n", + "pd.DataFrame.from_records(pretty_response_transform(response))" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 197 + }, + "id": "6RH0OALLJPHv", + "outputId": "338419b0-3e60-4ac9-ddeb-67cac6202ca2" + }, + "execution_count": 45, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " id publication_date score \\\n", + "0 3cXgIYkBfxlbyhU5Krfc 2019-10-29 0.0 \n", + "1 3sXgIYkBfxlbyhU5Krfc 2019-05-03 0.0 \n", + "2 4MXgIYkBfxlbyhU5Krfc 2008-08-11 0.0 \n", + "3 48XgIYkBfxlbyhU5Krfd 1994-10-31 0.0 \n", + "4 5MXgIYkBfxlbyhU5Krfd 2011-05-13 0.0 \n", + "\n", + " title \\\n", + "0 The Pragmatic Programmer: Your Journey to Mastery \n", + "1 Python Crash Course \n", + "2 Clean Code: A Handbook of Agile Software Craft... \n", + "3 Design Patterns: Elements of Reusable Object-O... \n", + "4 The Clean Coder: A Code of Conduct for Profess... \n", + "\n", + " summary \n", + "0 A guide to pragmatic programming for software ... \n", + "1 A fast-paced, no-nonsense guide to programming... \n", + "2 A guide to writing code that is easy to read, ... \n", + "3 Guide to design patterns that can be used in a... \n", + "4 A guide to professional conduct in the field o... " + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idpublication_datescoretitlesummary
03cXgIYkBfxlbyhU5Krfc2019-10-290.0The Pragmatic Programmer: Your Journey to MasteryA guide to pragmatic programming for software ...
13sXgIYkBfxlbyhU5Krfc2019-05-030.0Python Crash CourseA fast-paced, no-nonsense guide to programming...
24MXgIYkBfxlbyhU5Krfc2008-08-110.0Clean Code: A Handbook of Agile Software Craft...A guide to writing code that is easy to read, ...
348XgIYkBfxlbyhU5Krfd1994-10-310.0Design Patterns: Elements of Reusable Object-O...Guide to design patterns that can be used in a...
45MXgIYkBfxlbyhU5Krfd2011-05-130.0The Clean Coder: A Code of Conduct for Profess...A guide to professional conduct in the field o...
\n", + "
\n", + " \n
\n\n\n\n \n`;\n parentElement.appendChild(quickchartButtonContainerElement);\n \nfunction displayQuickchartButton(domScope) {\n let quickchartButtonEl =\n domScope.querySelector('#df-7984fec6-fcda-46b7-9b9e-3e31a15d21ad button.colab-df-quickchart');\n quickchartButtonEl.style.display =\n google.colab.kernel.accessAllowed ? 'block' : 'none';\n}\n\n displayQuickchartButton(parentElement);\n }\n " }, "metadata": {}, - "execution_count": 78 + "execution_count": 45 } ] } From b97743759e7b376b02ce3290119ca8c8a86874d2 Mon Sep 17 00:00:00 2001 From: Yan Savitski Date: Thu, 6 Jul 2023 15:31:42 +0200 Subject: [PATCH 29/54] Fix link in nav --- .../search/01-keyword-querying-filtering.ipynb | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/colab-notebooks-examples/search/01-keyword-querying-filtering.ipynb b/colab-notebooks-examples/search/01-keyword-querying-filtering.ipynb index 6417b00e..94d8a4c8 100644 --- a/colab-notebooks-examples/search/01-keyword-querying-filtering.ipynb +++ b/colab-notebooks-examples/search/01-keyword-querying-filtering.ipynb @@ -4,7 +4,7 @@ "metadata": { "colab": { "provenance": [], - "authorship_tag": "ABX9TyNRZjawSxbwLumvwiVvjXfX" + "authorship_tag": "ABX9TyNRIISv0/ilwAn8BxsNs0U0" }, "kernelspec": { "name": "python3", @@ -55,7 +55,7 @@ { "cell_type": "markdown", "source": [ - "[Quick Start](https://github.com/yansavitski/elasticsearch-labs/blob/notebooks-guides/colab-notebooks-examples/search/00-quick-start.ipynb) || **Keyword Quering Filtering** || [Hubrid search with RRF](https://github.com/yansavitski/elasticsearch-labs/blob/notebooks-guides/colab-notebooks-examples/search/02-hybrid-search-with-rrf.ipynb) || [ELSER](https://github.com/yansavitski/elasticsearch-labs/blob/notebooks-guides/colab-notebooks-examples/search/03-ELSER.ipynb)" + "[Quick Start](https://github.com/joemcelroy/elasticsearch-labs/blob/notebooks-guides/colab-notebooks-examples/search/00-quick-start.ipynb) || **Keyword Quering Filtering** || [Hubrid search with RRF](https://github.com/joemcelroy/elasticsearch-labs/blob/notebooks-guides/colab-notebooks-examples/search/02-hybrid-search-with-rrf.ipynb) || [ELSER](https://github.com/joemcelroy/elasticsearch-labs/blob/notebooks-guides/colab-notebooks-examples/search/03-ELSER.ipynb)" ], "metadata": { "id": "0wgbLWl2udLQ" @@ -728,7 +728,7 @@ "metadata": { "colab": { "base_uri": "https://localhost:8080/", - "height": 132 + "height": 133 }, "id": "dCr1pwlqlOE7", "outputId": "ae55cd66-0ded-4868-dac5-5815ea317c44" @@ -917,7 +917,7 @@ "metadata": { "colab": { "base_uri": "https://localhost:8080/", - "height": 132 + "height": 133 }, "id": "dTMc-IxPmbtC", "outputId": "9acf74fd-bc16-45df-80f3-49504860b10a" From 8a7d66ba1967e2dbb26bc71baa461fb4348016b9 Mon Sep 17 00:00:00 2001 From: Liam Thompson Date: Fri, 7 Jul 2023 11:27:47 +0200 Subject: [PATCH 30/54] Add RRF example, plus toy example walkthru --- .../search/02-hybrid-search-with-rrf.ipynb | 956 ++++++++++++++++++ 1 file changed, 956 insertions(+) diff --git a/colab-notebooks-examples/search/02-hybrid-search-with-rrf.ipynb b/colab-notebooks-examples/search/02-hybrid-search-with-rrf.ipynb index e69de29b..fa2c53d1 100644 --- a/colab-notebooks-examples/search/02-hybrid-search-with-rrf.ipynb +++ b/colab-notebooks-examples/search/02-hybrid-search-with-rrf.ipynb @@ -0,0 +1,956 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "s49gpkvZ7q53" + }, + "source": [ + "# Hybrid Search using RRF\n", + "\n", + "In this example we'll use the reciprocal rank fusion algorithm to combine the results of BM25 and kNN semantic search.\n", + "We'll use the same dataset we used in our [quickstart](https://github.com/joemcelroy/elasticsearch-labs/blob/notebooks-guides/colab-notebooks-examples/search/00-quick-start.ipynb) guide." + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "Y01AXpELkygt" + }, + "source": [ + "# 🧰 Requirements\n", + "\n", + "For this example, you will need:\n", + "\n", + "- Python 3.6 or later\n", + "- An Elastic deployment with minimum **4GB machine learning node**\n", + " - We'll be using [Elastic Cloud](https://www.elastic.co/guide/en/cloud/current/ec-getting-started.html) for this example (available with a [free trial](https://cloud.elastic.co/registration?elektra=en-ess-sign-up-page))\n", + "- The [ELSER](https://www.elastic.co/guide/en/machine-learning/8.8/ml-nlp-elser.html) model installed on your Elastic deployment\n", + "- The [Elastic Python client](https://www.elastic.co/guide/en/elasticsearch/client/python-api/current/installation.html)\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "N4pI1-eIvWrI" + }, + "source": [ + "# Create Elastic Cloud deployment\n", + "\n", + "If you don't have an Elastic Cloud deployment, sign up [here](https://cloud.elastic.co/registration?fromURI=%2Fhome) for a free trial.\n", + "\n", + "- Go to the [Create deployment](https://cloud.elastic.co/deployments/create) page\n", + " - Under **Advanced settings**, go to **Machine Learning instances**\n", + " - You'll need at least **4GB** RAM per zone for this tutorial\n", + " - Select **Create deployment**" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "gaTFHLJC-Mgi" + }, + "source": [ + "# Install packages and initialize the Elasticsearch Python client\n", + "\n", + "To get started, we'll need to connect to our Elastic deployment using the Python client.\n", + "Because we're using an Elastic Cloud deployment, we'll use the **Cloud ID** to identify our deployment.\n", + "\n", + "First we need to `pip` install the packages we need for this example." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "K9Q1p2C9-wce", + "outputId": "204d5aee-571e-4363-be6e-f87d058f2d29" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "fatal: destination path 'elasticsearch-py' already exists and is not an empty directory.\n", + "/Users/liamthompson/notebook-tests/elasticsearch-py\n", + "HEAD is now at 825e642b Bumps 8.8 to 8.8.2\n", + "zsh:1: parse error near `-m'\n", + "Requirement already satisfied: sentence_transformers in /Users/liamthompson/.pyenv/versions/3.9.7/lib/python3.9/site-packages (2.2.2)\n", + "Requirement already satisfied: transformers<5.0.0,>=4.6.0 in /Users/liamthompson/.pyenv/versions/3.9.7/lib/python3.9/site-packages (from sentence_transformers) (4.30.2)\n", + "Requirement already satisfied: tqdm in /Users/liamthompson/.pyenv/versions/3.9.7/lib/python3.9/site-packages (from sentence_transformers) (4.65.0)\n", + "Requirement already satisfied: torch>=1.6.0 in /Users/liamthompson/.pyenv/versions/3.9.7/lib/python3.9/site-packages (from sentence_transformers) (2.0.1)\n", + "Requirement already satisfied: torchvision in /Users/liamthompson/.pyenv/versions/3.9.7/lib/python3.9/site-packages (from sentence_transformers) (0.15.2)\n", + "Requirement already satisfied: numpy in /Users/liamthompson/.pyenv/versions/3.9.7/lib/python3.9/site-packages (from sentence_transformers) (1.25.0)\n", + "Requirement already satisfied: scikit-learn in /Users/liamthompson/.pyenv/versions/3.9.7/lib/python3.9/site-packages (from sentence_transformers) (1.3.0)\n", + "Requirement already satisfied: scipy in /Users/liamthompson/.pyenv/versions/3.9.7/lib/python3.9/site-packages (from sentence_transformers) (1.11.1)\n", + "Requirement already satisfied: nltk in /Users/liamthompson/.pyenv/versions/3.9.7/lib/python3.9/site-packages (from sentence_transformers) (3.8.1)\n", + "Requirement already satisfied: sentencepiece in /Users/liamthompson/.pyenv/versions/3.9.7/lib/python3.9/site-packages (from sentence_transformers) (0.1.99)\n", + "Requirement already satisfied: huggingface-hub>=0.4.0 in /Users/liamthompson/.pyenv/versions/3.9.7/lib/python3.9/site-packages (from sentence_transformers) (0.15.1)\n", + "Requirement already satisfied: pyyaml>=5.1 in /Users/liamthompson/.pyenv/versions/3.9.7/lib/python3.9/site-packages (from huggingface-hub>=0.4.0->sentence_transformers) (6.0)\n", + "Requirement already satisfied: typing-extensions>=3.7.4.3 in /Users/liamthompson/.pyenv/versions/3.9.7/lib/python3.9/site-packages (from huggingface-hub>=0.4.0->sentence_transformers) (4.6.3)\n", + "Requirement already satisfied: packaging>=20.9 in /Users/liamthompson/.pyenv/versions/3.9.7/lib/python3.9/site-packages (from huggingface-hub>=0.4.0->sentence_transformers) (23.1)\n", + "Requirement already satisfied: requests in /Users/liamthompson/.pyenv/versions/3.9.7/lib/python3.9/site-packages (from huggingface-hub>=0.4.0->sentence_transformers) (2.31.0)\n", + "Requirement already satisfied: fsspec in /Users/liamthompson/.pyenv/versions/3.9.7/lib/python3.9/site-packages (from huggingface-hub>=0.4.0->sentence_transformers) (2023.6.0)\n", + "Requirement already satisfied: filelock in /Users/liamthompson/.pyenv/versions/3.9.7/lib/python3.9/site-packages (from huggingface-hub>=0.4.0->sentence_transformers) (3.12.2)\n", + "Requirement already satisfied: jinja2 in /Users/liamthompson/.pyenv/versions/3.9.7/lib/python3.9/site-packages (from torch>=1.6.0->sentence_transformers) (3.1.2)\n", + "Requirement already satisfied: networkx in /Users/liamthompson/.pyenv/versions/3.9.7/lib/python3.9/site-packages (from torch>=1.6.0->sentence_transformers) (3.1)\n", + "Requirement already satisfied: sympy in /Users/liamthompson/.pyenv/versions/3.9.7/lib/python3.9/site-packages (from torch>=1.6.0->sentence_transformers) (1.12)\n", + "Requirement already satisfied: tokenizers!=0.11.3,<0.14,>=0.11.1 in /Users/liamthompson/.pyenv/versions/3.9.7/lib/python3.9/site-packages (from transformers<5.0.0,>=4.6.0->sentence_transformers) (0.13.3)\n", + "Requirement already satisfied: regex!=2019.12.17 in /Users/liamthompson/.pyenv/versions/3.9.7/lib/python3.9/site-packages (from transformers<5.0.0,>=4.6.0->sentence_transformers) (2023.6.3)\n", + "Requirement already satisfied: safetensors>=0.3.1 in /Users/liamthompson/.pyenv/versions/3.9.7/lib/python3.9/site-packages (from transformers<5.0.0,>=4.6.0->sentence_transformers) (0.3.1)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /Users/liamthompson/.pyenv/versions/3.9.7/lib/python3.9/site-packages (from jinja2->torch>=1.6.0->sentence_transformers) (2.1.3)\n", + "Requirement already satisfied: click in /Users/liamthompson/.pyenv/versions/3.9.7/lib/python3.9/site-packages (from nltk->sentence_transformers) (8.1.3)\n", + "Requirement already satisfied: joblib in /Users/liamthompson/.pyenv/versions/3.9.7/lib/python3.9/site-packages (from nltk->sentence_transformers) (1.3.1)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/liamthompson/.pyenv/versions/3.9.7/lib/python3.9/site-packages (from requests->huggingface-hub>=0.4.0->sentence_transformers) (1.26.16)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /Users/liamthompson/.pyenv/versions/3.9.7/lib/python3.9/site-packages (from requests->huggingface-hub>=0.4.0->sentence_transformers) (2023.5.7)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/liamthompson/.pyenv/versions/3.9.7/lib/python3.9/site-packages (from requests->huggingface-hub>=0.4.0->sentence_transformers) (3.1.0)\n", + "Requirement already satisfied: idna<4,>=2.5 in /Users/liamthompson/.pyenv/versions/3.9.7/lib/python3.9/site-packages (from requests->huggingface-hub>=0.4.0->sentence_transformers) (3.4)\n", + "Requirement already satisfied: threadpoolctl>=2.0.0 in /Users/liamthompson/.pyenv/versions/3.9.7/lib/python3.9/site-packages (from scikit-learn->sentence_transformers) (3.1.0)\n", + "Requirement already satisfied: mpmath>=0.19 in /Users/liamthompson/.pyenv/versions/3.9.7/lib/python3.9/site-packages (from sympy->torch>=1.6.0->sentence_transformers) (1.3.0)\n", + "Requirement already satisfied: pillow!=8.3.*,>=5.3.0 in /Users/liamthompson/.pyenv/versions/3.9.7/lib/python3.9/site-packages (from torchvision->sentence_transformers) (10.0.0)\n", + "\u001b[33mWARNING: You are using pip version 21.2.3; however, version 23.1.2 is available.\n", + "You should consider upgrading via the '/Users/liamthompson/.pyenv/versions/3.9.7/bin/python3.9 -m pip install --upgrade pip' command.\u001b[0m\n", + "Requirement already satisfied: torch in /Users/liamthompson/.pyenv/versions/3.9.7/lib/python3.9/site-packages (2.0.1)\n", + "Requirement already satisfied: jinja2 in /Users/liamthompson/.pyenv/versions/3.9.7/lib/python3.9/site-packages (from torch) (3.1.2)\n", + "Requirement already satisfied: networkx in /Users/liamthompson/.pyenv/versions/3.9.7/lib/python3.9/site-packages (from torch) (3.1)\n", + "Requirement already satisfied: filelock in /Users/liamthompson/.pyenv/versions/3.9.7/lib/python3.9/site-packages (from torch) (3.12.2)\n", + "Requirement already satisfied: typing-extensions in /Users/liamthompson/.pyenv/versions/3.9.7/lib/python3.9/site-packages (from torch) (4.6.3)\n", + "Requirement already satisfied: sympy in /Users/liamthompson/.pyenv/versions/3.9.7/lib/python3.9/site-packages (from torch) (1.12)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /Users/liamthompson/.pyenv/versions/3.9.7/lib/python3.9/site-packages (from jinja2->torch) (2.1.3)\n", + "Requirement already satisfied: mpmath>=0.19 in /Users/liamthompson/.pyenv/versions/3.9.7/lib/python3.9/site-packages (from sympy->torch) (1.3.0)\n", + "\u001b[33mWARNING: You are using pip version 21.2.3; however, version 23.1.2 is available.\n", + "You should consider upgrading via the '/Users/liamthompson/.pyenv/versions/3.9.7/bin/python3.9 -m pip install --upgrade pip' command.\u001b[0m\n" + ] + } + ], + "source": [ + "!git clone https://github.com/elastic/elasticsearch-py.git\n", + "%cd elasticsearch-py\n", + "!git checkout v8.8.2\n", + "!{sys.executable} -m pip install .\n", + "!pip install sentence_transformers\n", + "!pip install torch\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "gEzq2Z1wBs3M" + }, + "source": [ + "[TODO: Update]\n", + "Next we need to import the `elasticsearch` module and the `getpass` module.\n", + "`getpass` is part of the Python standard library and is used to securely prompt for credentials." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "uP_GTVRi-d96" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/liamthompson/.pyenv/versions/3.9.7/lib/python3.9/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n" + ] + } + ], + "source": [ + "from elasticsearch import Elasticsearch, helpers\n", + "from urllib.request import urlopen\n", + "import getpass\n", + "from sentence_transformers import SentenceTransformer\n", + "import torch\n", + "\n", + "device = 'cuda' if torch.cuda.is_available() else 'cpu'\n", + "\n", + "model = SentenceTransformer('all-MiniLM-L6-v2', device=device)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "AMSePFiZCRqX" + }, + "source": [ + "Now we can instantiate the Python Elasticsearch client.\n", + "First we prompt the user for their password and Cloud ID.\n", + "\n", + "🔐 NOTE: `getpass` enables us to securely prompt the user for credentials without echoing them to the terminal, or storing it in memory.\n", + "\n", + "Then we create a `client` object that instantiates an instance of the `Elasticsearch` class." + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "h0MdAZ53CdKL", + "outputId": "96ea6f81-f935-4d51-c4a7-af5a896180f1" + }, + "outputs": [], + "source": [ + "# Found in the 'Manage Deployment' page\n", + "CLOUD_ID = getpass.getpass('Enter Elastic Cloud ID: ')\n", + "\n", + "# Password for the 'elastic' user generated by Elasticsearch\n", + "ELASTIC_PASSWORD = getpass.getpass('Enter Elastic password: ')\n", + "\n", + "# Create the client instance\n", + "client = Elasticsearch(\n", + " cloud_id=CLOUD_ID,\n", + " basic_auth=(\"elastic\", ELASTIC_PASSWORD)\n", + ")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "bRHbecNeEDL3" + }, + "source": [ + "Confirm that the client has connected with this test" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "rdiUKqZbEKfF", + "outputId": "43b6f1cd-a43e-4dbe-caa5-7fd170464881" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'name': 'instance-0000000000', 'cluster_name': '9dd1e5c0b0d64796b8cf0746cf63d734', 'cluster_uuid': 'VeYvw6JhQcC3P-Q1-L9P_w', 'version': {'number': '8.9.0-SNAPSHOT', 'build_flavor': 'default', 'build_type': 'docker', 'build_hash': 'ac7d79178c3e57c935358453331efe9e9cc5104d', 'build_date': '2023-06-21T09:08:25.219504984Z', 'build_snapshot': True, 'lucene_version': '9.7.0', 'minimum_wire_compatibility_version': '7.17.0', 'minimum_index_compatibility_version': '7.0.0', 'transport_version': '8500019'}, 'tagline': 'You Know, for Search'}\n" + ] + } + ], + "source": [ + "print(client.info())" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "enHQuT57DhD1" + }, + "source": [ + "Refer to https://www.elastic.co/guide/en/elasticsearch/client/python-api/current/connecting.html#connect-self-managed-new to learn how to connect to a self-managed deployment.\n", + "\n", + "Read https://www.elastic.co/guide/en/elasticsearch/client/python-api/current/connecting.html#connect-self-managed-new to learn how to connect using API keys.\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "TF_wxIAhD07a" + }, + "source": [ + "# Create Elasticsearch index with required mappings\n", + "\n", + "We need to add a field to support dense vector storage and search.\n", + "Note the `title_vector` field below, which is used to store the dense vector representation of the `title` field." + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "cvYECABJJs_2", + "outputId": "18fb51e4-c4f6-4d1b-cb2d-bc6f8ec1aa84" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/var/folders/vz/v2f6_x6s0kg51j2vbm5rlhww0000gn/T/ipykernel_2383/1628078329.py:22: DeprecationWarning: The 'body' parameter is deprecated and will be removed in a future version. Instead use individual parameters.\n", + " client.indices.create(index='rrf_book_index', body=mapping)\n" + ] + }, + { + "ename": "BadRequestError", + "evalue": "BadRequestError(400, 'resource_already_exists_exception', 'index [rrf_book_index/Ip8zitwhSMe0OJtEwpuqzQ] already exists')", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mBadRequestError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[59], line 22\u001b[0m\n\u001b[1;32m 2\u001b[0m mapping \u001b[39m=\u001b[39m {\n\u001b[1;32m 3\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mmappings\u001b[39m\u001b[39m\"\u001b[39m: {\n\u001b[1;32m 4\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mproperties\u001b[39m\u001b[39m\"\u001b[39m: {\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 18\u001b[0m }\n\u001b[1;32m 19\u001b[0m }\n\u001b[1;32m 21\u001b[0m \u001b[39m# Create the index\u001b[39;00m\n\u001b[0;32m---> 22\u001b[0m client\u001b[39m.\u001b[39;49mindices\u001b[39m.\u001b[39;49mcreate(index\u001b[39m=\u001b[39;49m\u001b[39m'\u001b[39;49m\u001b[39mrrf_book_index\u001b[39;49m\u001b[39m'\u001b[39;49m, body\u001b[39m=\u001b[39;49mmapping)\n", + "File \u001b[0;32m~/.pyenv/versions/3.9.7/lib/python3.9/site-packages/elasticsearch/_sync/client/utils.py:414\u001b[0m, in \u001b[0;36m_rewrite_parameters..wrapper..wrapped\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 411\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mKeyError\u001b[39;00m:\n\u001b[1;32m 412\u001b[0m \u001b[39mpass\u001b[39;00m\n\u001b[0;32m--> 414\u001b[0m \u001b[39mreturn\u001b[39;00m api(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n", + "File \u001b[0;32m~/.pyenv/versions/3.9.7/lib/python3.9/site-packages/elasticsearch/_sync/client/indices.py:517\u001b[0m, in \u001b[0;36mcreate\u001b[0;34m(self, index, aliases, error_trace, filter_path, human, mappings, master_timeout, pretty, settings, timeout, wait_for_active_shards)\u001b[0m\n", + "File \u001b[0;32m~/.pyenv/versions/3.9.7/lib/python3.9/site-packages/elasticsearch/_sync/client/_base.py:389\u001b[0m, in \u001b[0;36mNamespacedClient.perform_request\u001b[0;34m(self, method, path, params, headers, body)\u001b[0m\n\u001b[1;32m 378\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mperform_request\u001b[39m(\n\u001b[1;32m 379\u001b[0m \u001b[39mself\u001b[39m,\n\u001b[1;32m 380\u001b[0m method: \u001b[39mstr\u001b[39m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 387\u001b[0m \u001b[39m# Use the internal clients .perform_request() implementation\u001b[39;00m\n\u001b[1;32m 388\u001b[0m \u001b[39m# so we take advantage of their transport options.\u001b[39;00m\n\u001b[0;32m--> 389\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_client\u001b[39m.\u001b[39;49mperform_request(\n\u001b[1;32m 390\u001b[0m method, path, params\u001b[39m=\u001b[39;49mparams, headers\u001b[39m=\u001b[39;49mheaders, body\u001b[39m=\u001b[39;49mbody\n\u001b[1;32m 391\u001b[0m )\n", + "File \u001b[0;32m~/.pyenv/versions/3.9.7/lib/python3.9/site-packages/elasticsearch/_sync/client/_base.py:320\u001b[0m, in \u001b[0;36mBaseClient.perform_request\u001b[0;34m(self, method, path, params, headers, body)\u001b[0m\n\u001b[1;32m 317\u001b[0m \u001b[39mexcept\u001b[39;00m (\u001b[39mValueError\u001b[39;00m, \u001b[39mKeyError\u001b[39;00m, \u001b[39mTypeError\u001b[39;00m):\n\u001b[1;32m 318\u001b[0m \u001b[39mpass\u001b[39;00m\n\u001b[0;32m--> 320\u001b[0m \u001b[39mraise\u001b[39;00m HTTP_EXCEPTIONS\u001b[39m.\u001b[39mget(meta\u001b[39m.\u001b[39mstatus, ApiError)(\n\u001b[1;32m 321\u001b[0m message\u001b[39m=\u001b[39mmessage, meta\u001b[39m=\u001b[39mmeta, body\u001b[39m=\u001b[39mresp_body\n\u001b[1;32m 322\u001b[0m )\n\u001b[1;32m 324\u001b[0m \u001b[39m# 'X-Elastic-Product: Elasticsearch' should be on every 2XX response.\u001b[39;00m\n\u001b[1;32m 325\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_verified_elasticsearch:\n\u001b[1;32m 326\u001b[0m \u001b[39m# If the header is set we mark the server as verified.\u001b[39;00m\n", + "\u001b[0;31mBadRequestError\u001b[0m: BadRequestError(400, 'resource_already_exists_exception', 'index [rrf_book_index/Ip8zitwhSMe0OJtEwpuqzQ] already exists')" + ] + } + ], + "source": [ + "# Define the mapping\n", + "mapping = {\n", + " \"mappings\": {\n", + " \"properties\": {\n", + " \"title\": {\"type\": \"text\"},\n", + " \"authors\": {\"type\": \"keyword\"},\n", + " \"summary\": {\"type\": \"text\"},\n", + " \"publish_date\": {\"type\": \"date\"},\n", + " \"num_reviews\": {\"type\": \"integer\"},\n", + " \"publisher\": {\"type\": \"keyword\"},\n", + " \"title_vector\": { \n", + " \"type\": \"dense_vector\", \n", + " \"dims\": 384, \n", + " \"index\": \"true\", \n", + " \"similarity\": \"dot_product\" \n", + " }\n", + " }\n", + " }\n", + "}\n", + "\n", + "# Create the index\n", + "client.indices.create(index='rrf_book_index', body=mapping)\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Dataset\n", + "\n", + "Let's index some data.\n", + "Note that we are embedding the `title` field using the sentence transformer model.\n", + "Once indexed, you'll see that your documents contain a `title_vector` field (`\"type\": \"dense_vector\"`) which contains a vector of floating point values.\n", + "This is the embedding of the `title` field in vector space.\n", + "We'll use this field to perform semantic search using kNN." + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "ObjectApiResponse({'took': 29, 'errors': False, 'items': [{'index': {'_index': 'rrf_book_index', '_id': '7c-QKokBaD3r4jKCZkdN', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 2, 'failed': 0}, '_seq_no': 10, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'rrf_book_index', '_id': '7s-QKokBaD3r4jKCZkdN', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 2, 'failed': 0}, '_seq_no': 11, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'rrf_book_index', '_id': '78-QKokBaD3r4jKCZkdN', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 2, 'failed': 0}, '_seq_no': 12, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'rrf_book_index', '_id': '8M-QKokBaD3r4jKCZkdN', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 2, 'failed': 0}, '_seq_no': 13, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'rrf_book_index', '_id': '8c-QKokBaD3r4jKCZkdN', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 2, 'failed': 0}, '_seq_no': 14, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'rrf_book_index', '_id': '8s-QKokBaD3r4jKCZkdN', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 2, 'failed': 0}, '_seq_no': 15, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'rrf_book_index', '_id': '88-QKokBaD3r4jKCZkdN', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 2, 'failed': 0}, '_seq_no': 16, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'rrf_book_index', '_id': '9M-QKokBaD3r4jKCZkdN', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 2, 'failed': 0}, '_seq_no': 17, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'rrf_book_index', '_id': '9c-QKokBaD3r4jKCZkdN', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 2, 'failed': 0}, '_seq_no': 18, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'rrf_book_index', '_id': '9s-QKokBaD3r4jKCZkdN', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 2, 'failed': 0}, '_seq_no': 19, '_primary_term': 1, 'status': 201}}]})" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "books = [\n", + " {\n", + " \"title\": \"The Pragmatic Programmer: Your Journey to Mastery\",\n", + " \"authors\": [\"andrew hunt\", \"david thomas\"],\n", + " \"summary\": \"A guide to pragmatic programming for software engineers and developers\",\n", + " \"publish_date\": \"2019-10-29\",\n", + " \"num_reviews\": 30,\n", + " \"publisher\": \"addison-wesley\"\n", + " },\n", + " {\n", + " \"title\": \"Python Crash Course\",\n", + " \"authors\": [\"eric matthes\"],\n", + " \"summary\": \"A fast-paced, no-nonsense guide to programming in Python\",\n", + " \"publish_date\": \"2019-05-03\",\n", + " \"num_reviews\": 42,\n", + " \"publisher\": \"no starch press\"\n", + " },\n", + " {\n", + " \"title\": \"Artificial Intelligence: A Modern Approach\",\n", + " \"authors\": [\"stuart russell\", \"peter norvig\"],\n", + " \"summary\": \"Comprehensive introduction to the theory and practice of artificial intelligence\",\n", + " \"publish_date\": \"2020-04-06\",\n", + " \"num_reviews\": 39,\n", + " \"publisher\": \"pearson\"\n", + " },\n", + " {\n", + " \"title\": \"Clean Code: A Handbook of Agile Software Craftsmanship\",\n", + " \"authors\": [\"robert c. martin\"],\n", + " \"summary\": \"A guide to writing code that is easy to read, understand and maintain\",\n", + " \"publish_date\": \"2008-08-11\",\n", + " \"num_reviews\": 55,\n", + " \"publisher\": \"prentice hall\"\n", + " },\n", + " {\n", + " \"title\": \"You Don't Know JS: Up & Going\",\n", + " \"authors\": [\"kyle simpson\"],\n", + " \"summary\": \"Introduction to JavaScript and programming as a whole\",\n", + " \"publish_date\": \"2015-03-27\",\n", + " \"num_reviews\": 36,\n", + " \"publisher\": \"oreilly\"\n", + " },\n", + " {\n", + " \"title\": \"Eloquent JavaScript\",\n", + " \"authors\": [\"marijn haverbeke\"],\n", + " \"summary\": \"A modern introduction to programming\",\n", + " \"publish_date\": \"2018-12-04\",\n", + " \"num_reviews\": 38,\n", + " \"publisher\": \"no starch press\"\n", + " },\n", + " {\n", + " \"title\": \"Design Patterns: Elements of Reusable Object-Oriented Software\",\n", + " \"authors\": [\"erich gamma\", \"richard helm\", \"ralph johnson\", \"john vlissides\"],\n", + " \"summary\": \"Guide to design patterns that can be used in any object-oriented language\",\n", + " \"publish_date\": \"1994-10-31\",\n", + " \"num_reviews\": 45,\n", + " \"publisher\": \"addison-wesley\"\n", + " },\n", + " {\n", + " \"title\": \"The Clean Coder: A Code of Conduct for Professional Programmers\",\n", + " \"authors\": [\"robert c. martin\"],\n", + " \"summary\": \"A guide to professional conduct in the field of software engineering\",\n", + " \"publish_date\": \"2011-05-13\",\n", + " \"num_reviews\": 20,\n", + " \"publisher\": \"prentice hall\"\n", + " },\n", + " {\n", + " \"title\": \"JavaScript: The Good Parts\",\n", + " \"authors\": [\"douglas crockford\"],\n", + " \"summary\": \"A deep dive into the parts of JavaScript that are essential to writing maintainable code\",\n", + " \"publish_date\": \"2008-05-15\",\n", + " \"num_reviews\": 51,\n", + " \"publisher\": \"oreilly\"\n", + " },\n", + " {\n", + " \"title\": \"Introduction to the Theory of Computation\",\n", + " \"authors\": [\"michael sipser\"],\n", + " \"summary\": \"Introduction to the theory of computation and complexity theory\",\n", + " \"publish_date\": \"2012-06-27\",\n", + " \"num_reviews\": 33,\n", + " \"publisher\": \"cengage learning\"\n", + " },\n", + "]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Index documents\n", + "\n", + "Our dataset is a Python list that contains dictionaries of movie titles and descriptions.\n", + "We'll use the `helpers.bulk` method to index our documents in batches.\n", + "\n", + "The following code iterates over the list of books and creates a list of actions to be performed.\n", + "Each action is a dictionary containing an \"index\" operation on our Elasticsearch index.\n", + "The book's title is encoded using our selected model, and the encoded vector is added to the book document.\n", + "The book document is then added to the list of actions.\n", + "\n", + "Finally, we call the `bulk` method, specifying the index name and the list of actions." + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "ObjectApiResponse({'took': 25, 'errors': False, 'items': [{'index': {'_index': 'rrf_book_index', '_id': 'KM-gK4kBaD3r4jKC2Ejk', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 2, 'failed': 0}, '_seq_no': 30, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'rrf_book_index', '_id': 'Kc-gK4kBaD3r4jKC2Ejk', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 2, 'failed': 0}, '_seq_no': 31, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'rrf_book_index', '_id': 'Ks-gK4kBaD3r4jKC2Ejk', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 2, 'failed': 0}, '_seq_no': 32, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'rrf_book_index', '_id': 'K8-gK4kBaD3r4jKC2Ejk', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 2, 'failed': 0}, '_seq_no': 33, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'rrf_book_index', '_id': 'LM-gK4kBaD3r4jKC2Ejk', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 2, 'failed': 0}, '_seq_no': 34, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'rrf_book_index', '_id': 'Lc-gK4kBaD3r4jKC2Ejk', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 2, 'failed': 0}, '_seq_no': 35, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'rrf_book_index', '_id': 'Ls-gK4kBaD3r4jKC2Ejk', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 2, 'failed': 0}, '_seq_no': 36, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'rrf_book_index', '_id': 'L8-gK4kBaD3r4jKC2Ejk', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 2, 'failed': 0}, '_seq_no': 37, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'rrf_book_index', '_id': 'MM-gK4kBaD3r4jKC2Ejk', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 2, 'failed': 0}, '_seq_no': 38, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'rrf_book_index', '_id': 'Mc-gK4kBaD3r4jKC2Ejk', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 2, 'failed': 0}, '_seq_no': 39, '_primary_term': 1, 'status': 201}}]})" + ] + }, + "execution_count": 70, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "actions = []\n", + "for book in books:\n", + " actions.append({\"index\": {\"_index\": \"rrf_book_index\"}})\n", + " titleEmbedding = model.encode(book[\"title\"]).tolist()\n", + " book[\"title_vector\"] = titleEmbedding\n", + " actions.append(book)\n", + "\n", + "client.bulk(index=\"rrf_book_index\", operations=actions)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "WgWDMgf9NkHL" + }, + "source": [ + "## Pretty printing Elasticsearch responses\n", + "\n", + "This is a helper function to print Elasticsearch responses in a readable format." + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "metadata": {}, + "outputs": [], + "source": [ + "def pretty_response(response):\n", + " for hit in response['hits']['hits']:\n", + " id = hit['_id']\n", + " publication_date = hit['_source']['publish_date']\n", + " score = hit['_score']\n", + " title = hit['_source']['title']\n", + " summary = hit['_source']['summary']\n", + " pretty_output = (f\"\\nID: {id}\\nPublication date: {publication_date}\\nTitle: {title}\\nSummary: {summary}\\nScore: {score}\")\n", + " print(pretty_output)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "MrBCHdH1u8Wd" + }, + "source": [ + "# Hybrid search using RRF\n", + "\n", + "## RRF overview\n", + "\n", + "[Reciprocal Rank Fusion (RRF)](https://www.elastic.co/guide/en/elasticsearch/reference/current/rrf.html) is a state-of-the-art ranking algorithm for combining results from different information retrieval strategies.\n", + "RRF consistently improves the combined results of different search algorithms.\n", + "It outperforms all other ranking algorithms, and often surpasses the best individual results, without calibration.\n", + "In brief, it enables best-in-class hybrid search out of the box.\n", + "\n", + "## How RRF works in Elasticsearch\n", + "\n", + "You can use RRF as part of a search to combine and rank documents using result sets from a combination of query and/or knn searches.\n", + "A minimum of 2 results sets is required for ranking from the specified sources.\n", + "Check out the [RRF API reference](https://www.elastic.co/guide/en/elasticsearch/reference/master/rrf.html#rrf-api) for full details information.\n", + "\n", + "In the following example, we'll use RRF to combine the results of a `match` query and a kNN semantic search.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 76, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/var/folders/vz/v2f6_x6s0kg51j2vbm5rlhww0000gn/T/ipykernel_2383/2934485565.py:22: DeprecationWarning: The 'body' parameter is deprecated and will be removed in a future version. Instead use individual parameters.\n", + " response = client.search(index=\"rrf_book_index\", body=body)\n" + ] + }, + { + "ename": "TypeError", + "evalue": "search() got an unexpected keyword argument 'rank'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[76], line 22\u001b[0m\n\u001b[1;32m 1\u001b[0m body \u001b[39m=\u001b[39m {\n\u001b[1;32m 2\u001b[0m \u001b[39m\"\u001b[39m\u001b[39msize\u001b[39m\u001b[39m\"\u001b[39m: \u001b[39m5\u001b[39m,\n\u001b[1;32m 3\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mquery\u001b[39m\u001b[39m\"\u001b[39m: {\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 19\u001b[0m }\n\u001b[1;32m 20\u001b[0m }\n\u001b[0;32m---> 22\u001b[0m response \u001b[39m=\u001b[39m client\u001b[39m.\u001b[39;49msearch(index\u001b[39m=\u001b[39;49m\u001b[39m\"\u001b[39;49m\u001b[39mrrf_book_index\u001b[39;49m\u001b[39m\"\u001b[39;49m, body\u001b[39m=\u001b[39;49mbody)\n\u001b[1;32m 24\u001b[0m \u001b[39mprint\u001b[39m(response)\n", + "File \u001b[0;32m~/.pyenv/versions/3.9.7/lib/python3.9/site-packages/elasticsearch/_sync/client/utils.py:414\u001b[0m, in \u001b[0;36m_rewrite_parameters..wrapper..wrapped\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 411\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mKeyError\u001b[39;00m:\n\u001b[1;32m 412\u001b[0m \u001b[39mpass\u001b[39;00m\n\u001b[0;32m--> 414\u001b[0m \u001b[39mreturn\u001b[39;00m api(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n", + "\u001b[0;31mTypeError\u001b[0m: search() got an unexpected keyword argument 'rank'" + ] + } + ], + "source": [ + "body = {\n", + " \"size\": 5,\n", + " \"query\": {\n", + " \"match\": {\n", + " \"summary\": \"shoes\"\n", + " },\n", + " \n", + " },\n", + " \"knn\": {\n", + " \"field\": \"title_vector\",\n", + " \"query_vector\" : model.encode(\"python programming\").tolist(), # generate embedding for query so it can be compared to `title_vector`\n", + " \"k\": 5,\n", + " \"num_candidates\": 10},\n", + " \"rank\": {\n", + " \"rrf\": {\n", + " \"window_size\": 5,\n", + " \"rank_constant\": 20\n", + " }\n", + " }\n", + "}\n", + "\n", + "response = client.search(index=\"rrf_book_index\", body=body)\n", + "\n", + "print(response)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In the above example, we first execute the kNN search to get its global top 5 results.\n", + "Then we execute the match query to get its global top 5 results.\n", + "Then we combine the knn search and match query results and rank them based on the RRF method to get the final top 2 results.\n", + "\n", + "ℹ️ Note that if `k` from a knn search is larger than `window_size`, the results are truncated to `window_size`.\n", + "If `k` is smaller than `window_size`, the results will be `k` size." + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## RRF toy example\n", + "\n", + "This very simple example demonstrates how RRF ranks documents from different search strategies.\n", + "We begin by creating a mapping for an index with a text field, a vector field, and an integer field along with indexing several documents. For this example we are going to use a vector with only a single dimension to make the ranking easier to explain." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "body = {\n", + " \"mappings\": {\n", + " \"properties\": {\n", + " \"text\" : {\n", + " \"type\" : \"text\"\n", + " },\n", + " \"vector\": {\n", + " \"type\": \"dense_vector\",\n", + " \"dims\": 1,\n", + " \"similarity\": \"l2_norm\",\n", + " \"index\": \"true\"\n", + "\n", + " },\n", + " \"integer\" : {\n", + " \"type\" : \"integer\"\n", + " }\n", + " }\n", + " }\n", + "}\n", + "\n", + "client.indices.create(index=\"example-index\", body=body)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Next let's index some documents." + ] + }, + { + "cell_type": "code", + "execution_count": 80, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "ObjectApiResponse({'took': 7, 'errors': False, 'items': [{'index': {'_index': 'example-index', '_id': 'UM8cLYkBaD3r4jKCTUjQ', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 2, 'failed': 0}, '_seq_no': 0, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'example-index', '_id': 'Uc8cLYkBaD3r4jKCTUjQ', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 2, 'failed': 0}, '_seq_no': 1, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'example-index', '_id': 'Us8cLYkBaD3r4jKCTUjQ', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 2, 'failed': 0}, '_seq_no': 2, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'example-index', '_id': 'U88cLYkBaD3r4jKCTUjQ', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 2, 'failed': 0}, '_seq_no': 3, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'example-index', '_id': 'VM8cLYkBaD3r4jKCTUjQ', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 2, 'failed': 0}, '_seq_no': 4, '_primary_term': 1, 'status': 201}}]})" + ] + }, + "execution_count": 80, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "doc1 = {\n", + " \"text\" : \"rrf\",\n", + " \"vector\" : [5],\n", + " \"integer\": 1\n", + "}\n", + "\n", + "doc2 ={\n", + " \"text\" : \"rrf rrf\",\n", + " \"vector\" : [4],\n", + " \"integer\": 2\n", + "}\n", + "\n", + "doc3 = {\n", + " \"text\" : \"rrf rrf rrf\",\n", + " \"vector\" : [3],\n", + " \"integer\": 1\n", + "}\n", + "\n", + "doc4 = {\n", + " \"text\" : \"rrf rrf rrf rrf\",\n", + " \"integer\": 2\n", + "}\n", + "\n", + "doc5 ={\n", + " \"vector\" : [0],\n", + " \"integer\": 1\n", + "}\n", + "\n", + "docs = [doc1, doc2, doc3, doc4, doc5]\n", + "\n", + "actions = []\n", + "for doc in docs:\n", + " actions.append({\"index\": {\"_index\": \"example-index\"}})\n", + " actions.append(doc)\n", + "\n", + "client.bulk(index=\"example-index\", operations=actions)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We now execute a search using RRF with a query, a kNN search, and a terms aggregation." + ] + }, + { + "cell_type": "code", + "execution_count": 81, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/var/folders/vz/v2f6_x6s0kg51j2vbm5rlhww0000gn/T/ipykernel_2383/3671365121.py:29: DeprecationWarning: The 'body' parameter is deprecated and will be removed in a future version. Instead use individual parameters.\n", + " response = client.search(index=\"example-index\", body=body)\n" + ] + }, + { + "ename": "TypeError", + "evalue": "search() got an unexpected keyword argument 'rank'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[81], line 29\u001b[0m\n\u001b[1;32m 1\u001b[0m body \u001b[39m=\u001b[39m {\n\u001b[1;32m 2\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mquery\u001b[39m\u001b[39m\"\u001b[39m: {\n\u001b[1;32m 3\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mterm\u001b[39m\u001b[39m\"\u001b[39m: {\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 26\u001b[0m }\n\u001b[1;32m 27\u001b[0m }\n\u001b[0;32m---> 29\u001b[0m response \u001b[39m=\u001b[39m client\u001b[39m.\u001b[39;49msearch(index\u001b[39m=\u001b[39;49m\u001b[39m\"\u001b[39;49m\u001b[39mexample-index\u001b[39;49m\u001b[39m\"\u001b[39;49m, body\u001b[39m=\u001b[39;49mbody)\n", + "File \u001b[0;32m~/.pyenv/versions/3.9.7/lib/python3.9/site-packages/elasticsearch/_sync/client/utils.py:414\u001b[0m, in \u001b[0;36m_rewrite_parameters..wrapper..wrapped\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 411\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mKeyError\u001b[39;00m:\n\u001b[1;32m 412\u001b[0m \u001b[39mpass\u001b[39;00m\n\u001b[0;32m--> 414\u001b[0m \u001b[39mreturn\u001b[39;00m api(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n", + "\u001b[0;31mTypeError\u001b[0m: search() got an unexpected keyword argument 'rank'" + ] + } + ], + "source": [ + "body = {\n", + " \"query\": {\n", + " \"term\": {\n", + " \"text\": \"rrf\"\n", + " }\n", + " },\n", + " \"knn\": {\n", + " \"field\": \"vector\",\n", + " \"query_vector\": [3],\n", + " \"k\": 5,\n", + " \"num_candidates\": 5\n", + " },\n", + " \"rank\": {\n", + " \"rrf\": {\n", + " \"window_size\": 5,\n", + " \"rank_constant\": 1\n", + " }\n", + " },\n", + " \"size\": 3,\n", + " \"aggs\": {\n", + " \"int_count\": {\n", + " \"terms\": {\n", + " \"field\": \"integer\"\n", + " }\n", + " }\n", + " }\n", + "}\n", + "\n", + "response = client.search(index=\"example-index\", body=body)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We receive a response with ranked hits and the terms aggregation result.\n", + "Note that _score is null, and we instead use _rank to show our top-ranked documents.\n", + "\n", + "Let’s break down how these hits were ranked.\n", + "We start by running the query and the kNN search separately to collect what their individual hits are.\n", + "\n", + "First, we look at the hits for the query.\n", + "\n", + "```json\n", + "\"hits\" : [\n", + " {\n", + " \"_index\" : \"example-index\",\n", + " \"_id\" : \"4\",\n", + " \"_score\" : 0.16152832, (1) \n", + " \"_source\" : {\n", + " \"integer\" : 2,\n", + " \"text\" : \"rrf rrf rrf rrf\"\n", + " }\n", + " },\n", + " {\n", + " \"_index\" : \"example-index\",\n", + " \"_id\" : \"3\", (2) \n", + " \"_score\" : 0.15876243,\n", + " \"_source\" : {\n", + " \"integer\" : 1,\n", + " \"vector\" : [3],\n", + " \"text\" : \"rrf rrf rrf\"\n", + " }\n", + " },\n", + " {\n", + " \"_index\" : \"example-index\",\n", + " \"_id\" : \"2\", (3) \n", + " \"_score\" : 0.15350538,\n", + " \"_source\" : {\n", + " \"integer\" : 2,\n", + " \"vector\" : [4],\n", + " \"text\" : \"rrf rrf\"\n", + " }\n", + " },\n", + " {\n", + " \"_index\" : \"example-index\",\n", + " \"_id\" : \"1\", (4)\n", + " \"_score\" : 0.13963442,\n", + " \"_source\" : {\n", + " \"integer\" : 1,\n", + " \"vector\" : [5],\n", + " \"text\" : \"rrf\"\n", + " }\n", + " }\n", + "]\n", + "```\n", + "\n", + "```markdown\n", + "<1> rank 1, `_id` 4\n", + "<2> rank 2, `_id` 3\n", + "<3> rank 3, `_id` 2\n", + "<4> rank 4, `_id` 1\n", + "```" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "Note that our first hit doesn’t have a value for the vector field.\n", + "\n", + "Now, we look at the results for the kNN search.\n", + "\n", + "```json\n", + "\"hits\" : [\n", + " {\n", + " \"_index\" : \"example-index\",\n", + " \"_id\" : \"3\", \n", + " \"_score\" : 1.0,\n", + " \"_source\" : {\n", + " \"integer\" : 1,\n", + " \"vector\" : [3],\n", + " \"text\" : \"rrf rrf rrf\"\n", + " }\n", + " },\n", + " {\n", + " \"_index\" : \"example-index\",\n", + " \"_id\" : \"2\", \n", + " \"_score\" : 0.5,\n", + " \"_source\" : {\n", + " \"integer\" : 2,\n", + " \"vector\" : [4],\n", + " \"text\" : \"rrf rrf\"\n", + " }\n", + " },\n", + " {\n", + " \"_index\" : \"example-index\",\n", + " \"_id\" : \"1\", \n", + " \"_score\" : 0.2,\n", + " \"_source\" : {\n", + " \"integer\" : 1,\n", + " \"vector\" : [5],\n", + " \"text\" : \"rrf\"\n", + " }\n", + " },\n", + " {\n", + " \"_index\" : \"example-index\",\n", + " \"_id\" : \"5\", \n", + " \"_score\" : 0.1,\n", + " \"_source\" : {\n", + " \"integer\" : 1,\n", + " \"vector\" : [0]\n", + " }\n", + " }\n", + "]```\n", + "\n", + "```markdown\n", + "<1> rank 1, `_id` 3\n", + "<2> rank 2, `_id` 2\n", + "<3> rank 3, `_id` 1\n", + "<4> rank 4, `_id` 5\n", + "```\n", + "\n", + "We can now take the two individually ranked result sets and apply the RRF formula to them to get our final ranking.\n", + "\n", + "```python\n", + "# doc | query | knn | score\n", + "_id: 1 = 1.0/(1+4) + 1.0/(1+3) = 0.4500\n", + "_id: 2 = 1.0/(1+3) + 1.0/(1+2) = 0.5833\n", + "_id: 3 = 1.0/(1+2) + 1.0/(1+1) = 0.8333\n", + "_id: 4 = 1.0/(1+1) = 0.5000\n", + "_id: 5 = 1.0/(1+4) = 0.2000\n", + "```\n", + "\n", + "We rank the documents based on the RRF formula with a `window_size` of `5`\n", + "truncating the bottom `2` docs in our RRF result set with a `size` of `3`.\n", + "We end with `_id: 3` as `_rank: 1`, `_id: 2` as `_rank: 2`, and\n", + "`_id: 4` as `_rank: 3`. This ranking matches the result set from the\n", + "original RRF search as expected." + ] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.7" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} From 1ff03e4c3957e9f11c45346454c0823f8de70ec0 Mon Sep 17 00:00:00 2001 From: Liam Thompson Date: Fri, 7 Jul 2023 11:40:49 +0200 Subject: [PATCH 31/54] Clear output, add button --- .../search/02-hybrid-search-with-rrf.ipynb | 219 +++--------------- 1 file changed, 26 insertions(+), 193 deletions(-) diff --git a/colab-notebooks-examples/search/02-hybrid-search-with-rrf.ipynb b/colab-notebooks-examples/search/02-hybrid-search-with-rrf.ipynb index fa2c53d1..312bbdb1 100644 --- a/colab-notebooks-examples/search/02-hybrid-search-with-rrf.ipynb +++ b/colab-notebooks-examples/search/02-hybrid-search-with-rrf.ipynb @@ -9,8 +9,13 @@ "source": [ "# Hybrid Search using RRF\n", "\n", + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/leemthompo/elasticsearch-labs/blob/notebooks-guides/colab-notebooks-examples/search/02-hybrid-search-with-rrf.ipynb)\n", + "\n", "In this example we'll use the reciprocal rank fusion algorithm to combine the results of BM25 and kNN semantic search.\n", - "We'll use the same dataset we used in our [quickstart](https://github.com/joemcelroy/elasticsearch-labs/blob/notebooks-guides/colab-notebooks-examples/search/00-quick-start.ipynb) guide." + "We'll use the same dataset we used in our [quickstart](https://github.com/joemcelroy/elasticsearch-labs/blob/notebooks-guides/colab-notebooks-examples/search/00-quick-start.ipynb) guide.\n", + "You can use RRF for hybrid search out of the box, without any additional configuration.\n", + "\n", + "We also provide a walkthrough of a toy example, which demonstrates how RRF ranking works at a basic level." ] }, { @@ -65,7 +70,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -73,63 +78,7 @@ "id": "K9Q1p2C9-wce", "outputId": "204d5aee-571e-4363-be6e-f87d058f2d29" }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "fatal: destination path 'elasticsearch-py' already exists and is not an empty directory.\n", - "/Users/liamthompson/notebook-tests/elasticsearch-py\n", - "HEAD is now at 825e642b Bumps 8.8 to 8.8.2\n", - "zsh:1: parse error near `-m'\n", - "Requirement already satisfied: sentence_transformers in /Users/liamthompson/.pyenv/versions/3.9.7/lib/python3.9/site-packages (2.2.2)\n", - "Requirement already satisfied: transformers<5.0.0,>=4.6.0 in /Users/liamthompson/.pyenv/versions/3.9.7/lib/python3.9/site-packages (from sentence_transformers) (4.30.2)\n", - "Requirement already satisfied: tqdm in /Users/liamthompson/.pyenv/versions/3.9.7/lib/python3.9/site-packages (from sentence_transformers) (4.65.0)\n", - "Requirement already satisfied: torch>=1.6.0 in /Users/liamthompson/.pyenv/versions/3.9.7/lib/python3.9/site-packages (from sentence_transformers) (2.0.1)\n", - "Requirement already satisfied: torchvision in /Users/liamthompson/.pyenv/versions/3.9.7/lib/python3.9/site-packages (from sentence_transformers) (0.15.2)\n", - "Requirement already satisfied: numpy in /Users/liamthompson/.pyenv/versions/3.9.7/lib/python3.9/site-packages (from sentence_transformers) (1.25.0)\n", - "Requirement already satisfied: scikit-learn in /Users/liamthompson/.pyenv/versions/3.9.7/lib/python3.9/site-packages (from sentence_transformers) (1.3.0)\n", - "Requirement already satisfied: scipy in /Users/liamthompson/.pyenv/versions/3.9.7/lib/python3.9/site-packages (from sentence_transformers) (1.11.1)\n", - "Requirement already satisfied: nltk in /Users/liamthompson/.pyenv/versions/3.9.7/lib/python3.9/site-packages (from sentence_transformers) (3.8.1)\n", - "Requirement already satisfied: sentencepiece in /Users/liamthompson/.pyenv/versions/3.9.7/lib/python3.9/site-packages (from sentence_transformers) (0.1.99)\n", - "Requirement already satisfied: huggingface-hub>=0.4.0 in /Users/liamthompson/.pyenv/versions/3.9.7/lib/python3.9/site-packages (from sentence_transformers) (0.15.1)\n", - "Requirement already satisfied: pyyaml>=5.1 in /Users/liamthompson/.pyenv/versions/3.9.7/lib/python3.9/site-packages (from huggingface-hub>=0.4.0->sentence_transformers) (6.0)\n", - "Requirement already satisfied: typing-extensions>=3.7.4.3 in /Users/liamthompson/.pyenv/versions/3.9.7/lib/python3.9/site-packages (from huggingface-hub>=0.4.0->sentence_transformers) (4.6.3)\n", - "Requirement already satisfied: packaging>=20.9 in /Users/liamthompson/.pyenv/versions/3.9.7/lib/python3.9/site-packages (from huggingface-hub>=0.4.0->sentence_transformers) (23.1)\n", - "Requirement already satisfied: requests in /Users/liamthompson/.pyenv/versions/3.9.7/lib/python3.9/site-packages (from huggingface-hub>=0.4.0->sentence_transformers) (2.31.0)\n", - "Requirement already satisfied: fsspec in /Users/liamthompson/.pyenv/versions/3.9.7/lib/python3.9/site-packages (from huggingface-hub>=0.4.0->sentence_transformers) (2023.6.0)\n", - "Requirement already satisfied: filelock in /Users/liamthompson/.pyenv/versions/3.9.7/lib/python3.9/site-packages (from huggingface-hub>=0.4.0->sentence_transformers) (3.12.2)\n", - "Requirement already satisfied: jinja2 in /Users/liamthompson/.pyenv/versions/3.9.7/lib/python3.9/site-packages (from torch>=1.6.0->sentence_transformers) (3.1.2)\n", - "Requirement already satisfied: networkx in /Users/liamthompson/.pyenv/versions/3.9.7/lib/python3.9/site-packages (from torch>=1.6.0->sentence_transformers) (3.1)\n", - "Requirement already satisfied: sympy in /Users/liamthompson/.pyenv/versions/3.9.7/lib/python3.9/site-packages (from torch>=1.6.0->sentence_transformers) (1.12)\n", - "Requirement already satisfied: tokenizers!=0.11.3,<0.14,>=0.11.1 in /Users/liamthompson/.pyenv/versions/3.9.7/lib/python3.9/site-packages (from transformers<5.0.0,>=4.6.0->sentence_transformers) (0.13.3)\n", - "Requirement already satisfied: regex!=2019.12.17 in /Users/liamthompson/.pyenv/versions/3.9.7/lib/python3.9/site-packages (from transformers<5.0.0,>=4.6.0->sentence_transformers) (2023.6.3)\n", - "Requirement already satisfied: safetensors>=0.3.1 in /Users/liamthompson/.pyenv/versions/3.9.7/lib/python3.9/site-packages (from transformers<5.0.0,>=4.6.0->sentence_transformers) (0.3.1)\n", - "Requirement already satisfied: MarkupSafe>=2.0 in /Users/liamthompson/.pyenv/versions/3.9.7/lib/python3.9/site-packages (from jinja2->torch>=1.6.0->sentence_transformers) (2.1.3)\n", - "Requirement already satisfied: click in /Users/liamthompson/.pyenv/versions/3.9.7/lib/python3.9/site-packages (from nltk->sentence_transformers) (8.1.3)\n", - "Requirement already satisfied: joblib in /Users/liamthompson/.pyenv/versions/3.9.7/lib/python3.9/site-packages (from nltk->sentence_transformers) (1.3.1)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/liamthompson/.pyenv/versions/3.9.7/lib/python3.9/site-packages (from requests->huggingface-hub>=0.4.0->sentence_transformers) (1.26.16)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /Users/liamthompson/.pyenv/versions/3.9.7/lib/python3.9/site-packages (from requests->huggingface-hub>=0.4.0->sentence_transformers) (2023.5.7)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/liamthompson/.pyenv/versions/3.9.7/lib/python3.9/site-packages (from requests->huggingface-hub>=0.4.0->sentence_transformers) (3.1.0)\n", - "Requirement already satisfied: idna<4,>=2.5 in /Users/liamthompson/.pyenv/versions/3.9.7/lib/python3.9/site-packages (from requests->huggingface-hub>=0.4.0->sentence_transformers) (3.4)\n", - "Requirement already satisfied: threadpoolctl>=2.0.0 in /Users/liamthompson/.pyenv/versions/3.9.7/lib/python3.9/site-packages (from scikit-learn->sentence_transformers) (3.1.0)\n", - "Requirement already satisfied: mpmath>=0.19 in /Users/liamthompson/.pyenv/versions/3.9.7/lib/python3.9/site-packages (from sympy->torch>=1.6.0->sentence_transformers) (1.3.0)\n", - "Requirement already satisfied: pillow!=8.3.*,>=5.3.0 in /Users/liamthompson/.pyenv/versions/3.9.7/lib/python3.9/site-packages (from torchvision->sentence_transformers) (10.0.0)\n", - "\u001b[33mWARNING: You are using pip version 21.2.3; however, version 23.1.2 is available.\n", - "You should consider upgrading via the '/Users/liamthompson/.pyenv/versions/3.9.7/bin/python3.9 -m pip install --upgrade pip' command.\u001b[0m\n", - "Requirement already satisfied: torch in /Users/liamthompson/.pyenv/versions/3.9.7/lib/python3.9/site-packages (2.0.1)\n", - "Requirement already satisfied: jinja2 in /Users/liamthompson/.pyenv/versions/3.9.7/lib/python3.9/site-packages (from torch) (3.1.2)\n", - "Requirement already satisfied: networkx in /Users/liamthompson/.pyenv/versions/3.9.7/lib/python3.9/site-packages (from torch) (3.1)\n", - "Requirement already satisfied: filelock in /Users/liamthompson/.pyenv/versions/3.9.7/lib/python3.9/site-packages (from torch) (3.12.2)\n", - "Requirement already satisfied: typing-extensions in /Users/liamthompson/.pyenv/versions/3.9.7/lib/python3.9/site-packages (from torch) (4.6.3)\n", - "Requirement already satisfied: sympy in /Users/liamthompson/.pyenv/versions/3.9.7/lib/python3.9/site-packages (from torch) (1.12)\n", - "Requirement already satisfied: MarkupSafe>=2.0 in /Users/liamthompson/.pyenv/versions/3.9.7/lib/python3.9/site-packages (from jinja2->torch) (2.1.3)\n", - "Requirement already satisfied: mpmath>=0.19 in /Users/liamthompson/.pyenv/versions/3.9.7/lib/python3.9/site-packages (from sympy->torch) (1.3.0)\n", - "\u001b[33mWARNING: You are using pip version 21.2.3; however, version 23.1.2 is available.\n", - "You should consider upgrading via the '/Users/liamthompson/.pyenv/versions/3.9.7/bin/python3.9 -m pip install --upgrade pip' command.\u001b[0m\n" - ] - } - ], + "outputs": [], "source": [ "!git clone https://github.com/elastic/elasticsearch-py.git\n", "%cd elasticsearch-py\n", @@ -153,20 +102,11 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": { "id": "uP_GTVRi-d96" }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/liamthompson/.pyenv/versions/3.9.7/lib/python3.9/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", - " from .autonotebook import tqdm as notebook_tqdm\n" - ] - } - ], + "outputs": [], "source": [ "from elasticsearch import Elasticsearch, helpers\n", "from urllib.request import urlopen\n", @@ -196,7 +136,7 @@ }, { "cell_type": "code", - "execution_count": 68, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -231,7 +171,7 @@ }, { "cell_type": "code", - "execution_count": 69, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -239,15 +179,7 @@ "id": "rdiUKqZbEKfF", "outputId": "43b6f1cd-a43e-4dbe-caa5-7fd170464881" }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'name': 'instance-0000000000', 'cluster_name': '9dd1e5c0b0d64796b8cf0746cf63d734', 'cluster_uuid': 'VeYvw6JhQcC3P-Q1-L9P_w', 'version': {'number': '8.9.0-SNAPSHOT', 'build_flavor': 'default', 'build_type': 'docker', 'build_hash': 'ac7d79178c3e57c935358453331efe9e9cc5104d', 'build_date': '2023-06-21T09:08:25.219504984Z', 'build_snapshot': True, 'lucene_version': '9.7.0', 'minimum_wire_compatibility_version': '7.17.0', 'minimum_index_compatibility_version': '7.0.0', 'transport_version': '8500019'}, 'tagline': 'You Know, for Search'}\n" - ] - } - ], + "outputs": [], "source": [ "print(client.info())" ] @@ -279,7 +211,7 @@ }, { "cell_type": "code", - "execution_count": 59, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -287,31 +219,7 @@ "id": "cvYECABJJs_2", "outputId": "18fb51e4-c4f6-4d1b-cb2d-bc6f8ec1aa84" }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/var/folders/vz/v2f6_x6s0kg51j2vbm5rlhww0000gn/T/ipykernel_2383/1628078329.py:22: DeprecationWarning: The 'body' parameter is deprecated and will be removed in a future version. Instead use individual parameters.\n", - " client.indices.create(index='rrf_book_index', body=mapping)\n" - ] - }, - { - "ename": "BadRequestError", - "evalue": "BadRequestError(400, 'resource_already_exists_exception', 'index [rrf_book_index/Ip8zitwhSMe0OJtEwpuqzQ] already exists')", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mBadRequestError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[59], line 22\u001b[0m\n\u001b[1;32m 2\u001b[0m mapping \u001b[39m=\u001b[39m {\n\u001b[1;32m 3\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mmappings\u001b[39m\u001b[39m\"\u001b[39m: {\n\u001b[1;32m 4\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mproperties\u001b[39m\u001b[39m\"\u001b[39m: {\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 18\u001b[0m }\n\u001b[1;32m 19\u001b[0m }\n\u001b[1;32m 21\u001b[0m \u001b[39m# Create the index\u001b[39;00m\n\u001b[0;32m---> 22\u001b[0m client\u001b[39m.\u001b[39;49mindices\u001b[39m.\u001b[39;49mcreate(index\u001b[39m=\u001b[39;49m\u001b[39m'\u001b[39;49m\u001b[39mrrf_book_index\u001b[39;49m\u001b[39m'\u001b[39;49m, body\u001b[39m=\u001b[39;49mmapping)\n", - "File \u001b[0;32m~/.pyenv/versions/3.9.7/lib/python3.9/site-packages/elasticsearch/_sync/client/utils.py:414\u001b[0m, in \u001b[0;36m_rewrite_parameters..wrapper..wrapped\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 411\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mKeyError\u001b[39;00m:\n\u001b[1;32m 412\u001b[0m \u001b[39mpass\u001b[39;00m\n\u001b[0;32m--> 414\u001b[0m \u001b[39mreturn\u001b[39;00m api(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n", - "File \u001b[0;32m~/.pyenv/versions/3.9.7/lib/python3.9/site-packages/elasticsearch/_sync/client/indices.py:517\u001b[0m, in \u001b[0;36mcreate\u001b[0;34m(self, index, aliases, error_trace, filter_path, human, mappings, master_timeout, pretty, settings, timeout, wait_for_active_shards)\u001b[0m\n", - "File \u001b[0;32m~/.pyenv/versions/3.9.7/lib/python3.9/site-packages/elasticsearch/_sync/client/_base.py:389\u001b[0m, in \u001b[0;36mNamespacedClient.perform_request\u001b[0;34m(self, method, path, params, headers, body)\u001b[0m\n\u001b[1;32m 378\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mperform_request\u001b[39m(\n\u001b[1;32m 379\u001b[0m \u001b[39mself\u001b[39m,\n\u001b[1;32m 380\u001b[0m method: \u001b[39mstr\u001b[39m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 387\u001b[0m \u001b[39m# Use the internal clients .perform_request() implementation\u001b[39;00m\n\u001b[1;32m 388\u001b[0m \u001b[39m# so we take advantage of their transport options.\u001b[39;00m\n\u001b[0;32m--> 389\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_client\u001b[39m.\u001b[39;49mperform_request(\n\u001b[1;32m 390\u001b[0m method, path, params\u001b[39m=\u001b[39;49mparams, headers\u001b[39m=\u001b[39;49mheaders, body\u001b[39m=\u001b[39;49mbody\n\u001b[1;32m 391\u001b[0m )\n", - "File \u001b[0;32m~/.pyenv/versions/3.9.7/lib/python3.9/site-packages/elasticsearch/_sync/client/_base.py:320\u001b[0m, in \u001b[0;36mBaseClient.perform_request\u001b[0;34m(self, method, path, params, headers, body)\u001b[0m\n\u001b[1;32m 317\u001b[0m \u001b[39mexcept\u001b[39;00m (\u001b[39mValueError\u001b[39;00m, \u001b[39mKeyError\u001b[39;00m, \u001b[39mTypeError\u001b[39;00m):\n\u001b[1;32m 318\u001b[0m \u001b[39mpass\u001b[39;00m\n\u001b[0;32m--> 320\u001b[0m \u001b[39mraise\u001b[39;00m HTTP_EXCEPTIONS\u001b[39m.\u001b[39mget(meta\u001b[39m.\u001b[39mstatus, ApiError)(\n\u001b[1;32m 321\u001b[0m message\u001b[39m=\u001b[39mmessage, meta\u001b[39m=\u001b[39mmeta, body\u001b[39m=\u001b[39mresp_body\n\u001b[1;32m 322\u001b[0m )\n\u001b[1;32m 324\u001b[0m \u001b[39m# 'X-Elastic-Product: Elasticsearch' should be on every 2XX response.\u001b[39;00m\n\u001b[1;32m 325\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_verified_elasticsearch:\n\u001b[1;32m 326\u001b[0m \u001b[39m# If the header is set we mark the server as verified.\u001b[39;00m\n", - "\u001b[0;31mBadRequestError\u001b[0m: BadRequestError(400, 'resource_already_exists_exception', 'index [rrf_book_index/Ip8zitwhSMe0OJtEwpuqzQ] already exists')" - ] - } - ], + "outputs": [], "source": [ "# Define the mapping\n", "mapping = {\n", @@ -353,20 +261,9 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "ObjectApiResponse({'took': 29, 'errors': False, 'items': [{'index': {'_index': 'rrf_book_index', '_id': '7c-QKokBaD3r4jKCZkdN', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 2, 'failed': 0}, '_seq_no': 10, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'rrf_book_index', '_id': '7s-QKokBaD3r4jKCZkdN', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 2, 'failed': 0}, '_seq_no': 11, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'rrf_book_index', '_id': '78-QKokBaD3r4jKCZkdN', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 2, 'failed': 0}, '_seq_no': 12, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'rrf_book_index', '_id': '8M-QKokBaD3r4jKCZkdN', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 2, 'failed': 0}, '_seq_no': 13, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'rrf_book_index', '_id': '8c-QKokBaD3r4jKCZkdN', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 2, 'failed': 0}, '_seq_no': 14, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'rrf_book_index', '_id': '8s-QKokBaD3r4jKCZkdN', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 2, 'failed': 0}, '_seq_no': 15, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'rrf_book_index', '_id': '88-QKokBaD3r4jKCZkdN', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 2, 'failed': 0}, '_seq_no': 16, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'rrf_book_index', '_id': '9M-QKokBaD3r4jKCZkdN', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 2, 'failed': 0}, '_seq_no': 17, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'rrf_book_index', '_id': '9c-QKokBaD3r4jKCZkdN', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 2, 'failed': 0}, '_seq_no': 18, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'rrf_book_index', '_id': '9s-QKokBaD3r4jKCZkdN', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 2, 'failed': 0}, '_seq_no': 19, '_primary_term': 1, 'status': 201}}]})" - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "books = [\n", " {\n", @@ -472,20 +369,9 @@ }, { "cell_type": "code", - "execution_count": 70, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "ObjectApiResponse({'took': 25, 'errors': False, 'items': [{'index': {'_index': 'rrf_book_index', '_id': 'KM-gK4kBaD3r4jKC2Ejk', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 2, 'failed': 0}, '_seq_no': 30, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'rrf_book_index', '_id': 'Kc-gK4kBaD3r4jKC2Ejk', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 2, 'failed': 0}, '_seq_no': 31, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'rrf_book_index', '_id': 'Ks-gK4kBaD3r4jKC2Ejk', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 2, 'failed': 0}, '_seq_no': 32, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'rrf_book_index', '_id': 'K8-gK4kBaD3r4jKC2Ejk', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 2, 'failed': 0}, '_seq_no': 33, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'rrf_book_index', '_id': 'LM-gK4kBaD3r4jKC2Ejk', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 2, 'failed': 0}, '_seq_no': 34, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'rrf_book_index', '_id': 'Lc-gK4kBaD3r4jKC2Ejk', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 2, 'failed': 0}, '_seq_no': 35, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'rrf_book_index', '_id': 'Ls-gK4kBaD3r4jKC2Ejk', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 2, 'failed': 0}, '_seq_no': 36, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'rrf_book_index', '_id': 'L8-gK4kBaD3r4jKC2Ejk', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 2, 'failed': 0}, '_seq_no': 37, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'rrf_book_index', '_id': 'MM-gK4kBaD3r4jKC2Ejk', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 2, 'failed': 0}, '_seq_no': 38, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'rrf_book_index', '_id': 'Mc-gK4kBaD3r4jKC2Ejk', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 2, 'failed': 0}, '_seq_no': 39, '_primary_term': 1, 'status': 201}}]})" - ] - }, - "execution_count": 70, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "actions = []\n", "for book in books:\n", @@ -511,7 +397,7 @@ }, { "cell_type": "code", - "execution_count": 71, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -553,30 +439,9 @@ }, { "cell_type": "code", - "execution_count": 76, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/var/folders/vz/v2f6_x6s0kg51j2vbm5rlhww0000gn/T/ipykernel_2383/2934485565.py:22: DeprecationWarning: The 'body' parameter is deprecated and will be removed in a future version. Instead use individual parameters.\n", - " response = client.search(index=\"rrf_book_index\", body=body)\n" - ] - }, - { - "ename": "TypeError", - "evalue": "search() got an unexpected keyword argument 'rank'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[76], line 22\u001b[0m\n\u001b[1;32m 1\u001b[0m body \u001b[39m=\u001b[39m {\n\u001b[1;32m 2\u001b[0m \u001b[39m\"\u001b[39m\u001b[39msize\u001b[39m\u001b[39m\"\u001b[39m: \u001b[39m5\u001b[39m,\n\u001b[1;32m 3\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mquery\u001b[39m\u001b[39m\"\u001b[39m: {\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 19\u001b[0m }\n\u001b[1;32m 20\u001b[0m }\n\u001b[0;32m---> 22\u001b[0m response \u001b[39m=\u001b[39m client\u001b[39m.\u001b[39;49msearch(index\u001b[39m=\u001b[39;49m\u001b[39m\"\u001b[39;49m\u001b[39mrrf_book_index\u001b[39;49m\u001b[39m\"\u001b[39;49m, body\u001b[39m=\u001b[39;49mbody)\n\u001b[1;32m 24\u001b[0m \u001b[39mprint\u001b[39m(response)\n", - "File \u001b[0;32m~/.pyenv/versions/3.9.7/lib/python3.9/site-packages/elasticsearch/_sync/client/utils.py:414\u001b[0m, in \u001b[0;36m_rewrite_parameters..wrapper..wrapped\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 411\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mKeyError\u001b[39;00m:\n\u001b[1;32m 412\u001b[0m \u001b[39mpass\u001b[39;00m\n\u001b[0;32m--> 414\u001b[0m \u001b[39mreturn\u001b[39;00m api(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n", - "\u001b[0;31mTypeError\u001b[0m: search() got an unexpected keyword argument 'rank'" - ] - } - ], + "outputs": [], "source": [ "body = {\n", " \"size\": 5,\n", @@ -667,20 +532,9 @@ }, { "cell_type": "code", - "execution_count": 80, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "ObjectApiResponse({'took': 7, 'errors': False, 'items': [{'index': {'_index': 'example-index', '_id': 'UM8cLYkBaD3r4jKCTUjQ', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 2, 'failed': 0}, '_seq_no': 0, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'example-index', '_id': 'Uc8cLYkBaD3r4jKCTUjQ', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 2, 'failed': 0}, '_seq_no': 1, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'example-index', '_id': 'Us8cLYkBaD3r4jKCTUjQ', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 2, 'failed': 0}, '_seq_no': 2, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'example-index', '_id': 'U88cLYkBaD3r4jKCTUjQ', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 2, 'failed': 0}, '_seq_no': 3, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'example-index', '_id': 'VM8cLYkBaD3r4jKCTUjQ', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 2, 'failed': 0}, '_seq_no': 4, '_primary_term': 1, 'status': 201}}]})" - ] - }, - "execution_count": 80, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "doc1 = {\n", " \"text\" : \"rrf\",\n", @@ -730,30 +584,9 @@ }, { "cell_type": "code", - "execution_count": 81, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/var/folders/vz/v2f6_x6s0kg51j2vbm5rlhww0000gn/T/ipykernel_2383/3671365121.py:29: DeprecationWarning: The 'body' parameter is deprecated and will be removed in a future version. Instead use individual parameters.\n", - " response = client.search(index=\"example-index\", body=body)\n" - ] - }, - { - "ename": "TypeError", - "evalue": "search() got an unexpected keyword argument 'rank'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[81], line 29\u001b[0m\n\u001b[1;32m 1\u001b[0m body \u001b[39m=\u001b[39m {\n\u001b[1;32m 2\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mquery\u001b[39m\u001b[39m\"\u001b[39m: {\n\u001b[1;32m 3\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mterm\u001b[39m\u001b[39m\"\u001b[39m: {\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 26\u001b[0m }\n\u001b[1;32m 27\u001b[0m }\n\u001b[0;32m---> 29\u001b[0m response \u001b[39m=\u001b[39m client\u001b[39m.\u001b[39;49msearch(index\u001b[39m=\u001b[39;49m\u001b[39m\"\u001b[39;49m\u001b[39mexample-index\u001b[39;49m\u001b[39m\"\u001b[39;49m, body\u001b[39m=\u001b[39;49mbody)\n", - "File \u001b[0;32m~/.pyenv/versions/3.9.7/lib/python3.9/site-packages/elasticsearch/_sync/client/utils.py:414\u001b[0m, in \u001b[0;36m_rewrite_parameters..wrapper..wrapped\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 411\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mKeyError\u001b[39;00m:\n\u001b[1;32m 412\u001b[0m \u001b[39mpass\u001b[39;00m\n\u001b[0;32m--> 414\u001b[0m \u001b[39mreturn\u001b[39;00m api(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n", - "\u001b[0;31mTypeError\u001b[0m: search() got an unexpected keyword argument 'rank'" - ] - } - ], + "outputs": [], "source": [ "body = {\n", " \"query\": {\n", From 866227e01085fad648d1d4f56923fc8e2d383f78 Mon Sep 17 00:00:00 2001 From: Liam Thompson Date: Fri, 7 Jul 2023 12:00:13 +0200 Subject: [PATCH 32/54] Cleanup --- .../search/02-hybrid-search-with-rrf.ipynb | 43 +++++++++++-------- 1 file changed, 24 insertions(+), 19 deletions(-) diff --git a/colab-notebooks-examples/search/02-hybrid-search-with-rrf.ipynb b/colab-notebooks-examples/search/02-hybrid-search-with-rrf.ipynb index 312bbdb1..ba55bdcf 100644 --- a/colab-notebooks-examples/search/02-hybrid-search-with-rrf.ipynb +++ b/colab-notebooks-examples/search/02-hybrid-search-with-rrf.ipynb @@ -676,12 +676,12 @@ "]\n", "```\n", "\n", - "```markdown\n", - "<1> rank 1, `_id` 4\n", - "<2> rank 2, `_id` 3\n", - "<3> rank 3, `_id` 2\n", - "<4> rank 4, `_id` 1\n", - "```" + "Note the following information about the hits:\n", + "\n", + "- **(1)** rank 1, `_id` 4\n", + "- **(2)** rank 2, `_id` 3\n", + "- **(3)** rank 3, `_id` 2\n", + "- **(4)** rank 4, `_id` 1\n" ] }, { @@ -698,7 +698,7 @@ "\"hits\" : [\n", " {\n", " \"_index\" : \"example-index\",\n", - " \"_id\" : \"3\", \n", + " \"_id\" : \"3\", (1)\n", " \"_score\" : 1.0,\n", " \"_source\" : {\n", " \"integer\" : 1,\n", @@ -708,7 +708,7 @@ " },\n", " {\n", " \"_index\" : \"example-index\",\n", - " \"_id\" : \"2\", \n", + " \"_id\" : \"2\", (2)\n", " \"_score\" : 0.5,\n", " \"_source\" : {\n", " \"integer\" : 2,\n", @@ -718,7 +718,7 @@ " },\n", " {\n", " \"_index\" : \"example-index\",\n", - " \"_id\" : \"1\", \n", + " \"_id\" : \"1\", (3)\n", " \"_score\" : 0.2,\n", " \"_source\" : {\n", " \"integer\" : 1,\n", @@ -728,22 +728,24 @@ " },\n", " {\n", " \"_index\" : \"example-index\",\n", - " \"_id\" : \"5\", \n", + " \"_id\" : \"5\", (4)\n", " \"_score\" : 0.1,\n", " \"_source\" : {\n", " \"integer\" : 1,\n", " \"vector\" : [0]\n", " }\n", " }\n", - "]```\n", - "\n", - "```markdown\n", - "<1> rank 1, `_id` 3\n", - "<2> rank 2, `_id` 2\n", - "<3> rank 3, `_id` 1\n", - "<4> rank 4, `_id` 5\n", + "]\n", "```\n", "\n", + "Note the following information about the hits:\n", + "\n", + "- **(1)** rank 1, `_id` 3\n", + "- **(2)** rank 2, `_id` 2\n", + "- **(3)** rank 3, `_id` 1\n", + "- **(4)** rank 4, `_id` 5\n", + "\n", + "\n", "We can now take the two individually ranked result sets and apply the RRF formula to them to get our final ranking.\n", "\n", "```python\n", @@ -757,8 +759,11 @@ "\n", "We rank the documents based on the RRF formula with a `window_size` of `5`\n", "truncating the bottom `2` docs in our RRF result set with a `size` of `3`.\n", - "We end with `_id: 3` as `_rank: 1`, `_id: 2` as `_rank: 2`, and\n", - "`_id: 4` as `_rank: 3`. This ranking matches the result set from the\n", + "\n", + "We end up with `_id: 3` as `_rank: 1`, `_id: 2` as `_rank: 2`, and\n", + "`_id: 4` as `_rank: 3`.\n", + "\n", + "This ranking matches the result set from the\n", "original RRF search as expected." ] } From 862f0217a8f78195bbf5f4571b83ba03a820a4f8 Mon Sep 17 00:00:00 2001 From: Yan Savitski Date: Fri, 7 Jul 2023 15:38:12 +0200 Subject: [PATCH 33/54] Update colab-notebooks-examples/search/01-keyword-querying-filtering.ipynb Co-authored-by: Liam Thompson <32779855+leemthompo@users.noreply.github.com> --- .../search/01-keyword-querying-filtering.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/colab-notebooks-examples/search/01-keyword-querying-filtering.ipynb b/colab-notebooks-examples/search/01-keyword-querying-filtering.ipynb index 94d8a4c8..e247c5a2 100644 --- a/colab-notebooks-examples/search/01-keyword-querying-filtering.ipynb +++ b/colab-notebooks-examples/search/01-keyword-querying-filtering.ipynb @@ -64,7 +64,7 @@ { "cell_type": "markdown", "source": [ - "# Keyword Quering Filtering\n", + "# Keyword querying and filtering\n", "\n", "\"Open\n", "\n", From bca6a07289dbe499e223f92385b4e2d544a6763f Mon Sep 17 00:00:00 2001 From: Yan Savitski Date: Fri, 7 Jul 2023 15:38:40 +0200 Subject: [PATCH 34/54] Update colab-notebooks-examples/search/01-keyword-querying-filtering.ipynb Co-authored-by: Liam Thompson <32779855+leemthompo@users.noreply.github.com> --- .../search/01-keyword-querying-filtering.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/colab-notebooks-examples/search/01-keyword-querying-filtering.ipynb b/colab-notebooks-examples/search/01-keyword-querying-filtering.ipynb index e247c5a2..3b4baa36 100644 --- a/colab-notebooks-examples/search/01-keyword-querying-filtering.ipynb +++ b/colab-notebooks-examples/search/01-keyword-querying-filtering.ipynb @@ -68,7 +68,7 @@ "\n", "\"Open\n", "\n", - "This interactive notebook will introduce you to the Elasticsearch queries, using the official Elasticsearch Python client. Before getting start this section we highly recomented firstly finish [quick start](https://github.com/yansavitski/elasticsearch-labs/blob/notebooks-guides/colab-notebooks-examples/search/00-quick-start.ipynb)" + "This interactive notebook will introduce you to the basic Elasticsearch queries, using the official Elasticsearch Python client. Before getting start this section we recommend working through our [quick start](https://github.com/yansavitski/elasticsearch-labs/blob/notebooks-guides/colab-notebooks-examples/search/00-quick-start.ipynb)." ], "metadata": { "id": "83LdOUCwwHzs" From 44e8d699ab9419b8699e074a004ab228f7bce157 Mon Sep 17 00:00:00 2001 From: Yan Savitski Date: Fri, 7 Jul 2023 15:39:03 +0200 Subject: [PATCH 35/54] Update colab-notebooks-examples/search/01-keyword-querying-filtering.ipynb Co-authored-by: Liam Thompson <32779855+leemthompo@users.noreply.github.com> --- .../search/01-keyword-querying-filtering.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/colab-notebooks-examples/search/01-keyword-querying-filtering.ipynb b/colab-notebooks-examples/search/01-keyword-querying-filtering.ipynb index 3b4baa36..d569a2d6 100644 --- a/colab-notebooks-examples/search/01-keyword-querying-filtering.ipynb +++ b/colab-notebooks-examples/search/01-keyword-querying-filtering.ipynb @@ -78,7 +78,7 @@ "cell_type": "markdown", "source": [ "## Querying\n", - "In the query context, a query clause answers the question “How well does this document match this query clause?” Besides deciding whether or not the document matches, the query clause also calculates a relevance score in the _score metadata field.\n", + "In the query context, a query clause answers the question _“How well does this document match this query clause?”_. In addition to deciding whether or not the document matches, the query clause also calculates a relevance score in the `_score `metadata field.\n", "\n", "### Full text queries\n", "\n", From 93fc1e43e2a71095093832847691bd12de38c185 Mon Sep 17 00:00:00 2001 From: Yan Savitski Date: Fri, 7 Jul 2023 15:39:10 +0200 Subject: [PATCH 36/54] Update colab-notebooks-examples/search/01-keyword-querying-filtering.ipynb Co-authored-by: Liam Thompson <32779855+leemthompo@users.noreply.github.com> --- .../search/01-keyword-querying-filtering.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/colab-notebooks-examples/search/01-keyword-querying-filtering.ipynb b/colab-notebooks-examples/search/01-keyword-querying-filtering.ipynb index d569a2d6..1b3121b9 100644 --- a/colab-notebooks-examples/search/01-keyword-querying-filtering.ipynb +++ b/colab-notebooks-examples/search/01-keyword-querying-filtering.ipynb @@ -82,7 +82,7 @@ "\n", "### Full text queries\n", "\n", - "The full text queries enable you to search analyzed text fields such as the body of an email. The query string is processed using the same analyzer that was applied to the field during indexing.\n", + "Full text queries enable you to search analyzed text fields such as the body of an email. The query string is processed using the same analyzer that was applied to the field during indexing.\n", "\n", "* **match**.\n", " The standard query for performing full text queries, including fuzzy matching and phrase or proximity queries.\n", From a897c24c704c29c64e24e916ae5363467dd7fbec Mon Sep 17 00:00:00 2001 From: Yan Savitski Date: Fri, 7 Jul 2023 15:39:18 +0200 Subject: [PATCH 37/54] Update colab-notebooks-examples/search/01-keyword-querying-filtering.ipynb Co-authored-by: Liam Thompson <32779855+leemthompo@users.noreply.github.com> --- .../search/01-keyword-querying-filtering.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/colab-notebooks-examples/search/01-keyword-querying-filtering.ipynb b/colab-notebooks-examples/search/01-keyword-querying-filtering.ipynb index 1b3121b9..90f6cf45 100644 --- a/colab-notebooks-examples/search/01-keyword-querying-filtering.ipynb +++ b/colab-notebooks-examples/search/01-keyword-querying-filtering.ipynb @@ -101,7 +101,7 @@ "\n", "The `match` query is the standard query for performing a full-text search, including options for fuzzy matching.\n", "\n", - "[Read more](https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-match-query.html#match-query-ex-request)\n", + "[Read more](https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-match-query.html#match-query-ex-request).\n", "\n" ], "metadata": { From 1a0dd44a6476d7b9007ae5168804a2eb8261e908 Mon Sep 17 00:00:00 2001 From: Yan Savitski Date: Fri, 7 Jul 2023 15:39:26 +0200 Subject: [PATCH 38/54] Update colab-notebooks-examples/search/01-keyword-querying-filtering.ipynb Co-authored-by: Liam Thompson <32779855+leemthompo@users.noreply.github.com> --- .../search/01-keyword-querying-filtering.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/colab-notebooks-examples/search/01-keyword-querying-filtering.ipynb b/colab-notebooks-examples/search/01-keyword-querying-filtering.ipynb index 90f6cf45..0b18676b 100644 --- a/colab-notebooks-examples/search/01-keyword-querying-filtering.ipynb +++ b/colab-notebooks-examples/search/01-keyword-querying-filtering.ipynb @@ -1078,7 +1078,7 @@ "source": [ "## Filtering\n", "\n", - "In a filter context, a query clause answers the question *“Does this document match this query clause?”* The answer is a simple Yes or No — no scores are calculated. Filter context is mostly used for filtering structured data, e.g.\n", + "In a filter context, a query clause answers the question *“Does this document match this query clause?”* The answer is a simple Yes or No — no scores are calculated. Filter context is mostly used for filtering structured data, for example:\n", "* Does this `timestamp` fall into the range 2015 to 2016?\n", "* Is the `status` field set to `\"published\"`?\n", "\n", From 2691ee419d05b4606ba2133a8bfd8dee248e0b90 Mon Sep 17 00:00:00 2001 From: Yan Savitski Date: Fri, 7 Jul 2023 15:39:35 +0200 Subject: [PATCH 39/54] Update colab-notebooks-examples/search/01-keyword-querying-filtering.ipynb Co-authored-by: Liam Thompson <32779855+leemthompo@users.noreply.github.com> --- .../search/01-keyword-querying-filtering.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/colab-notebooks-examples/search/01-keyword-querying-filtering.ipynb b/colab-notebooks-examples/search/01-keyword-querying-filtering.ipynb index 0b18676b..52f6b734 100644 --- a/colab-notebooks-examples/search/01-keyword-querying-filtering.ipynb +++ b/colab-notebooks-examples/search/01-keyword-querying-filtering.ipynb @@ -320,7 +320,7 @@ "\n", "The `multi_match` query builds on the match query to allow multi-field queries\n", "\n", - "[Read more](https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-multi-match-query.html)" + "[Read more](https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-multi-match-query.html)." ], "metadata": { "id": "H-n6hoVsfAqc" From 1ec54ab0fd3a73d748dac97b9e0d82da8d9f608d Mon Sep 17 00:00:00 2001 From: Yan Savitski Date: Fri, 7 Jul 2023 15:39:41 +0200 Subject: [PATCH 40/54] Update colab-notebooks-examples/search/01-keyword-querying-filtering.ipynb Co-authored-by: Liam Thompson <32779855+leemthompo@users.noreply.github.com> --- .../search/01-keyword-querying-filtering.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/colab-notebooks-examples/search/01-keyword-querying-filtering.ipynb b/colab-notebooks-examples/search/01-keyword-querying-filtering.ipynb index 52f6b734..ac7a96fb 100644 --- a/colab-notebooks-examples/search/01-keyword-querying-filtering.ipynb +++ b/colab-notebooks-examples/search/01-keyword-querying-filtering.ipynb @@ -704,7 +704,7 @@ "source": [ "### Prefix search\n", "\n", - "Returns documents that contain a specific prefix in a provided field\n", + "Returns documents that contain a specific prefix in a provided field.\n", "\n", "[Read more](https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-prefix-query.html)" ], From d52cd1a9ec33a8088a0956a4d5f84ac2c791ff11 Mon Sep 17 00:00:00 2001 From: Yan Savitski Date: Fri, 7 Jul 2023 15:39:52 +0200 Subject: [PATCH 41/54] Update colab-notebooks-examples/search/01-keyword-querying-filtering.ipynb Co-authored-by: Liam Thompson <32779855+leemthompo@users.noreply.github.com> --- .../search/01-keyword-querying-filtering.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/colab-notebooks-examples/search/01-keyword-querying-filtering.ipynb b/colab-notebooks-examples/search/01-keyword-querying-filtering.ipynb index ac7a96fb..0fb4a019 100644 --- a/colab-notebooks-examples/search/01-keyword-querying-filtering.ipynb +++ b/colab-notebooks-examples/search/01-keyword-querying-filtering.ipynb @@ -318,7 +318,7 @@ "source": [ "### Multi-match query\n", "\n", - "The `multi_match` query builds on the match query to allow multi-field queries\n", + "The `multi_match` query builds on the match query to allow multi-field queries.\n", "\n", "[Read more](https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-multi-match-query.html)." ], From 57d2481065f7b8c6e88682c5033d094eeafa6997 Mon Sep 17 00:00:00 2001 From: Yan Savitski Date: Fri, 7 Jul 2023 15:40:01 +0200 Subject: [PATCH 42/54] Update colab-notebooks-examples/search/01-keyword-querying-filtering.ipynb Co-authored-by: Liam Thompson <32779855+leemthompo@users.noreply.github.com> --- .../search/01-keyword-querying-filtering.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/colab-notebooks-examples/search/01-keyword-querying-filtering.ipynb b/colab-notebooks-examples/search/01-keyword-querying-filtering.ipynb index 0fb4a019..e7a97930 100644 --- a/colab-notebooks-examples/search/01-keyword-querying-filtering.ipynb +++ b/colab-notebooks-examples/search/01-keyword-querying-filtering.ipynb @@ -511,7 +511,7 @@ { "cell_type": "markdown", "source": [ - "Individual fields can be boosted with the caret (^) notation" + "Individual fields can be boosted with the caret (^) notation." ], "metadata": { "id": "FnBeBIVKiPnS" From 87ff8f8f22a8565b6be53f55bb676589310f9caf Mon Sep 17 00:00:00 2001 From: Yan Savitski Date: Fri, 7 Jul 2023 15:41:32 +0200 Subject: [PATCH 43/54] Update 01-keyword-querying-filtering.ipynb Remove unused library --- .../search/01-keyword-querying-filtering.ipynb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/colab-notebooks-examples/search/01-keyword-querying-filtering.ipynb b/colab-notebooks-examples/search/01-keyword-querying-filtering.ipynb index e7a97930..34bc6a35 100644 --- a/colab-notebooks-examples/search/01-keyword-querying-filtering.ipynb +++ b/colab-notebooks-examples/search/01-keyword-querying-filtering.ipynb @@ -24,7 +24,7 @@ "outputs": [], "source": [ "#@title Prepare elasticsearch client { display-mode: \"form\" }\n", - "!pip install -qU elasticsearch sentence-transformers==2.2.2\n", + "!pip install elasticsearch\n", "from elasticsearch import Elasticsearch\n", "import pandas as pd\n", "from google.colab import data_table\n", @@ -1719,4 +1719,4 @@ ] } ] -} \ No newline at end of file +} From aad146266243dec59d2a1f88f40959550557689f Mon Sep 17 00:00:00 2001 From: Yan Savitski Date: Fri, 7 Jul 2023 15:44:52 +0200 Subject: [PATCH 44/54] Update 03-ELSER.ipynb Import json library --- colab-notebooks-examples/search/03-ELSER.ipynb | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/colab-notebooks-examples/search/03-ELSER.ipynb b/colab-notebooks-examples/search/03-ELSER.ipynb index b8c6cc8a..8956d6d2 100644 --- a/colab-notebooks-examples/search/03-ELSER.ipynb +++ b/colab-notebooks-examples/search/03-ELSER.ipynb @@ -132,7 +132,8 @@ "source": [ "from elasticsearch import Elasticsearch, helpers\n", "from urllib.request import urlopen\n", - "import getpass" + "import getpass", + "import json" ] }, { From eeb21cfd092c3fb0d040108b051960f0d3215dc0 Mon Sep 17 00:00:00 2001 From: Joseph McElroy Date: Fri, 7 Jul 2023 21:07:40 +0100 Subject: [PATCH 45/54] typos --- .../01-keyword-querying-filtering.ipynb | 23 +++++++++---------- 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/colab-notebooks-examples/search/01-keyword-querying-filtering.ipynb b/colab-notebooks-examples/search/01-keyword-querying-filtering.ipynb index 34bc6a35..deb44e47 100644 --- a/colab-notebooks-examples/search/01-keyword-querying-filtering.ipynb +++ b/colab-notebooks-examples/search/01-keyword-querying-filtering.ipynb @@ -3,8 +3,7 @@ "nbformat_minor": 0, "metadata": { "colab": { - "provenance": [], - "authorship_tag": "ABX9TyNRIISv0/ilwAn8BxsNs0U0" + "provenance": [] }, "kernelspec": { "name": "python3", @@ -55,7 +54,7 @@ { "cell_type": "markdown", "source": [ - "[Quick Start](https://github.com/joemcelroy/elasticsearch-labs/blob/notebooks-guides/colab-notebooks-examples/search/00-quick-start.ipynb) || **Keyword Quering Filtering** || [Hubrid search with RRF](https://github.com/joemcelroy/elasticsearch-labs/blob/notebooks-guides/colab-notebooks-examples/search/02-hybrid-search-with-rrf.ipynb) || [ELSER](https://github.com/joemcelroy/elasticsearch-labs/blob/notebooks-guides/colab-notebooks-examples/search/03-ELSER.ipynb)" + "[Quick Start](https://github.com/joemcelroy/elasticsearch-labs/blob/notebooks-guides/colab-notebooks-examples/search/00-quick-start.ipynb) || **Keyword Querying Filtering** || [Hybrid search with RRF](https://github.com/joemcelroy/elasticsearch-labs/blob/notebooks-guides/colab-notebooks-examples/search/02-hybrid-search-with-rrf.ipynb) || [ELSER](https://github.com/joemcelroy/elasticsearch-labs/blob/notebooks-guides/colab-notebooks-examples/search/03-ELSER.ipynb)" ], "metadata": { "id": "0wgbLWl2udLQ" @@ -129,7 +128,7 @@ "id": "q_OE0XVx6_qX", "outputId": "6a1d7760-5fb9-4809-e060-e35a398ed3c4" }, - "execution_count": 41, + "execution_count": null, "outputs": [ { "output_type": "execute_result", @@ -346,7 +345,7 @@ "id": "TRmGYM94gCtb", "outputId": "dc58b19f-e585-4d0a-d065-ac3fc18ae123" }, - "execution_count": 50, + "execution_count": null, "outputs": [ { "output_type": "execute_result", @@ -537,7 +536,7 @@ "id": "_aI7hnH0ixkG", "outputId": "2af27f3d-f9fd-4c7a-cab5-7cb06132582c" }, - "execution_count": 49, + "execution_count": null, "outputs": [ { "output_type": "execute_result", @@ -733,7 +732,7 @@ "id": "dCr1pwlqlOE7", "outputId": "ae55cd66-0ded-4868-dac5-5815ea317c44" }, - "execution_count": 48, + "execution_count": null, "outputs": [ { "output_type": "execute_result", @@ -922,7 +921,7 @@ "id": "dTMc-IxPmbtC", "outputId": "9acf74fd-bc16-45df-80f3-49504860b10a" }, - "execution_count": 47, + "execution_count": null, "outputs": [ { "output_type": "execute_result", @@ -1127,7 +1126,7 @@ "id": "8_C-JHRQFDl7", "outputId": "be59d18b-5e20-4db0-8697-2e7746251742" }, - "execution_count": 46, + "execution_count": null, "outputs": [ { "output_type": "execute_result", @@ -1305,7 +1304,7 @@ "id": "GRm9T1vfIsmF", "outputId": "d9fb6936-3ffb-4fff-9467-1f7ac7b41490" }, - "execution_count": 44, + "execution_count": null, "outputs": [ { "output_type": "execute_result", @@ -1534,7 +1533,7 @@ "id": "6RH0OALLJPHv", "outputId": "338419b0-3e60-4ac9-ddeb-67cac6202ca2" }, - "execution_count": 45, + "execution_count": null, "outputs": [ { "output_type": "execute_result", @@ -1719,4 +1718,4 @@ ] } ] -} +} \ No newline at end of file From ee5705cccc5cbff1204f9f814836b59cfb091d89 Mon Sep 17 00:00:00 2001 From: Joseph McElroy Date: Mon, 10 Jul 2023 05:03:11 -0400 Subject: [PATCH 46/54] updates --- .../generative-ai/question-answering.ipynb | 232 +++++++++--------- ...-with-rrf.ipynb => 02-hybrid-search.ipynb} | 0 2 files changed, 118 insertions(+), 114 deletions(-) rename colab-notebooks-examples/search/{02-hybrid-search-with-rrf.ipynb => 02-hybrid-search.ipynb} (100%) diff --git a/colab-notebooks-examples/generative-ai/question-answering.ipynb b/colab-notebooks-examples/generative-ai/question-answering.ipynb index b98a4e11..6a8313d3 100644 --- a/colab-notebooks-examples/generative-ai/question-answering.ipynb +++ b/colab-notebooks-examples/generative-ai/question-answering.ipynb @@ -1,7 +1,11 @@ { "cells": [ { + "attachments": {}, "cell_type": "markdown", + "metadata": { + "id": "tZnIXBfrRpex" + }, "source": [ "# Question Answering with Langchain and OpenAI\n", "\n", @@ -13,13 +17,14 @@ "![image.png](data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAf4AAACiCAIAAAAvNjhKAAAMQWlDQ1BJQ0MgUHJvZmlsZQAASImVVwdYU8kWnluSkEBCCV1K6E0QqQGkhNACSC+CqIQkQCgxBoKKHV1UcO1iARu6KqLYAbGgiGJhUex9saCirIsFu/ImBXTdV7433zd3/vvPmf+cOXfm3jsAqJ/gisV5qAYA+aJCSVxoIGNMSiqD9BSQAApoQBvYcXkFYlZMTCSAZbD9e3l3HSCy9oqjTOuf/f+1aPIFBTwAkBiIM/gFvHyIDwKAV/HEkkIAiDLeYnKhWIZhBdoSGCDEC2Q4S4GrZDhDgffKbRLi2BC3AqCixuVKsgCgXYI8o4iXBTVofRA7i/hCEQDqDIj98vMn8iFOh9gW2oghlukzM37QyfqbZsaQJpebNYQVc5EXlSBhgTiPO/X/TMf/Lvl50kEf1rCqZUvC4mRzhnm7mTsxQobVIO4VZURFQ6wF8QchX24PMUrJloYlKuxRI14BG+YM6ELszOcGRUBsBHGIKC8qUslnZApDOBDDFYJOERZyEiDWh3iBoCA4XmmzSTIxTukLrc+UsFlK/ixXIvcr83VfmpvIUuq/zhZwlPoYrTg7IRliCsSWRcKkKIhpEDsV5MZHKG1GFWezowZtJNI4WfyWEMcJRKGBCn2sKFMSEqe0L8svGJwvtilbyIlS4v2F2QlhivxgrTyuPH44F+ySQMRKHNQRFIyJHJwLXxAUrJg79kwgSoxX6nwQFwbGKcbiFHFejNIeNxfkhcp4c4jdCorilWPxpEK4IBX6eKa4MCZBESdenMMNj1HEgy8FkYANggADSGHNABNBDhB29Db0wjtFTwjgAgnIAgLgqGQGRyTLe0TwGg+KwZ8QCUDB0LhAea8AFEH+6xCruDqCTHlvkXxELngCcT6IAHnwXiofJRrylgQeQ0b4D+9cWHkw3jxYZf3/nh9kvzMsyEQqGemgR4b6oCUxmBhEDCOGEO1wQ9wP98Ej4TUAVheciXsNzuO7PeEJoZPwkHCN0EW4NUFYIvkpytGgC+qHKHOR8WMucGuo6Y4H4r5QHSrjurghcMTdoB8W7g89u0OWrYxblhXGT9p/m8EPT0NpR3Ymo2Q9cgDZ9ueRNHua+5CKLNc/5kcRa8ZQvtlDPT/7Z/+QfT5sI362xBZgB7A27CR2DjuKNQAG1ow1Yu3YMRkeWl2P5atr0FucPJ5cqCP8h7/BJyvLZIFzrXOP8xdFX6FgiuwdDdgTxVMlwqzsQgYLfhEEDI6I5zSc4eLs4gqA7PuieH29iZV/NxDd9u/c3D8A8G0eGBg48p0LbwZgnyfc/oe/c7ZM+OlQBeDsYZ5UUqTgcNmFAN8S6nCnGQATYAFs4XxcgAfwAQEgGISDaJAAUsB4GH02XOcSMBlMB3NAKSgHS8EqsA5sBFvADrAb7AcN4Cg4Cc6AC+ASuAbuwNXTDV6APvAOfEYQhIRQETpigJgiVogD4oIwET8kGIlE4pAUJB3JQkSIFJmOzEXKkeXIOmQzUoPsQw4jJ5FzSCdyC3mA9CCvkU8ohqqh2qgxao2OQJkoC41AE9BxaBY6CS1G56GL0TVoNboLrUdPohfQa2gX+gLtxwCmiuliZpgjxsTYWDSWimViEmwmVoZVYNVYHdYEn/MVrAvrxT7iRJyOM3BHuILD8ESch0/CZ+KL8HX4Drweb8Wv4A/wPvwbgUowIjgQvAkcwhhCFmEyoZRQQdhGOEQ4DfdSN+EdkUjUJdoQPeFeTCHmEKcRFxHXE/cQTxA7iY+I/SQSyYDkQPIlRZO4pEJSKWktaRepmXSZ1E36oKKqYqriohKikqoiUilRqVDZqXJc5bLKU5XPZA2yFdmbHE3mk6eSl5C3kpvIF8nd5M8UTYoNxZeSQMmhzKGsodRRTlPuUt6oqqqaq3qpxqoKVWerrlHdq3pW9YHqRzUtNXs1tlqamlRtsdp2tRNqt9TeUKlUa2oANZVaSF1MraGeot6nfqDRaU40Do1Pm0WrpNXTLtNeqpPVrdRZ6uPVi9Ur1A+oX1Tv1SBrWGuwNbgaMzUqNQ5r3NDo16RrjtSM1szXXKS5U/Oc5jMtkpa1VrAWX2ue1hatU1qP6Bjdgs6m8+hz6Vvpp+nd2kRtG22Odo52ufZu7Q7tPh0tHTedJJ0pOpU6x3S6dDFda12Obp7uEt39utd1P+kZ67H0BHoL9er0Luu91x+mH6Av0C/T36N/Tf+TAcMg2CDXYJlBg8E9Q9zQ3jDWcLLhBsPThr3DtIf5DOMNKxu2f9htI9TI3ijOaJrRFqN2o35jE+NQY7HxWuNTxr0muiYBJjkmK02Om/SY0k39TIWmK02bTZ8zdBgsRh5jDaOV0WdmZBZmJjXbbNZh9tncxjzRvMR8j/k9C4oF0yLTYqVFi0WfpanlaMvplrWWt63IVkyrbKvVVm1W761trJOt51s3WD+z0bfh2BTb1NrctaXa+ttOsq22vWpHtGPa5dqtt7tkj9q722fbV9pfdEAdPByEDusdOocThnsNFw2vHn7DUc2R5VjkWOv4wEnXKdKpxKnB6eUIyxGpI5aNaBvxzdndOc95q/OdkVojw0eWjGwa+drF3oXnUuly1ZXqGuI6y7XR9ZWbg5vAbYPbTXe6+2j3+e4t7l89PD0kHnUePZ6WnumeVZ43mNrMGOYi5lkvgleg1yyvo14fvT28C733e//l4+iT67PT59kom1GCUVtHPfI19+X6bvbt8mP4pftt8uvyN/Pn+lf7PwywCOAHbAt4yrJj5bB2sV4GOgdKAg8Fvmd7s2ewTwRhQaFBZUEdwVrBicHrgu+HmIdkhdSG9IW6h04LPRFGCIsIWxZ2g2PM4XFqOH3hnuEzwlsj1CLiI9ZFPIy0j5RENo1GR4ePXjH6bpRVlCiqIRpEc6JXRN+LsYmZFHMklhgbE1sZ+yRuZNz0uLZ4evyE+J3x7xICE5Yk3Em0TZQmtiSpJ6Ul1SS9Tw5KXp7cNWbEmBljLqQYpghTGlNJqUmp21L7xwaPXTW2O809rTTt+jibcVPGnRtvOD5v/LEJ6hO4Ew6kE9KT03emf+FGc6u5/RmcjKqMPh6bt5r3gh/AX8nvEfgKlgueZvpmLs98luWbtSKrJ9s/uyK7V8gWrhO+ygnL2ZjzPjc6d3vuQF5y3p58lfz0/MMiLVGuqHWiycQpEzvFDuJScdck70mrJvVJIiTbCpCCcQWNhdrwR75daiv9RfqgyK+osujD5KTJB6ZoThFNaZ9qP3Xh1KfFIcW/TcOn8aa1TDebPmf6gxmsGZtnIjMzZrbMspg1b1b37NDZO+ZQ5uTO+b3EuWR5ydu5yXOb5hnPmz3v0S+hv9SW0kolpTfm+8zfuABfIFzQsdB14dqF38r4ZefLncsryr8s4i06/+vIX9f8OrA4c3HHEo8lG5YSl4qWXl/mv2zHcs3lxcsfrRi9on4lY2XZyrerJqw6V+FWsXE1ZbV0ddeayDWNay3XLl37ZV32umuVgZV7qoyqFla9X89ff3lDwIa6jcYbyzd+2iTcdHNz6Ob6auvqii3ELUVbnmxN2tr2G/O3mm2G28q3fd0u2t61I25Ha41nTc1Oo51LatFaaW3PrrRdl3YH7W6sc6zbvEd3T/lesFe69/m+9H3X90fsbznAPFB30Opg1SH6obJ6pH5qfV9DdkNXY0pj5+Hwwy1NPk2Hjjgd2X7U7GjlMZ1jS45Tjs87PtBc3Nx/Qnyi92TWyUctE1runBpz6mprbGvH6YjTZ8+EnDnVxmprPut79ug573OHzzPPN1zwuFDf7t5+6Hf33w91eHTUX/S82HjJ61JT56jO45f9L5+8EnTlzFXO1QvXoq51Xk+8fvNG2o2um/ybz27l3Xp1u+j25zuz7xLult3TuFdx3+h+9R92f+zp8ug69iDoQfvD+Id3HvEevXhc8PhL97wn1CcVT02f1jxzeXa0J6Tn0vOxz7tfiF987i39U/PPqpe2Lw/+FfBXe9+Yvu5XklcDrxe9MXiz/a3b25b+mP777/LffX5f9sHgw46PzI9tn5I/Pf08+Qvpy5qvdl+bvkV8uzuQPzAg5kq48l8BDFY0MxOA19sBoKYAQIfnM8pYxflPXhDFmVWOwH/CijOivHgAUAf/32N74d/NDQD2boXHL6ivngZADBWABC+AuroO1cGzmvxcKStEeA7YFP01Iz8D/JuiOHP+EPfPLZCpuoGf238Bk9B8XQo0or4AAAA4ZVhJZk1NACoAAAAIAAGHaQAEAAAAAQAAABoAAAAAAAKgAgAEAAAAAQAAAf6gAwAEAAAAAQAAAKIAAAAAGoXaLQAAJsNJREFUeAHtnXfYFcX1x1FBKbHgg6KigMZEsKMoMSoqalBUxBIhWGOMsfcSEwvGhr0QBPNYiCZKRIEQwPAEFRGNogRRihgLigJBjRpRVCy/T5gn6/5umbt3Z+fu7rvf/eO+80495zuzZ2fOnDmzyjfffNNMjxAQAkJACBQJgVWLxKx4FQJCQAgIgf8iINGvcSAEhIAQKBwCEv2F63IxLASEgBCQ6NcYEAJCQAgUDgGJ/sJ1uRgWAkJACEj0awwIASEgBAqHgER/4bpcDAsBISAEJPo1BoSAEBAChUNAor9wXS6GhYAQEAIS/RoDQkAICIHCISDRX7guF8NCQAgIAYl+jQEhIASEQOEQkOgvXJeLYSEgBISARL/GgBAQAkKgcAhI9Beuy8WwEBACQkCiX2NACAgBIVA4BCT6C9flYlgICAEhINGvMSAEhIAQKBwCEv2F63IxLASEgBCQ6NcYEAJCQAgUDgGJ/sJ1uRgWAkJACEj0awwIASEgBAqHgER/4bpcDAsBISAEJPo1BoSAEBAChUNAor9wXS6GhYAQEAIS/RoDQkAICIHCISDRX7guF8NCQAgIAYl+jQEhIASEQOEQkOgvXJeLYSEgBISARL/GgBAQAkKgcAhI9Beuy8WwEBACQqB5PAhWWWWVeAXzW+qbb77JL/HZpDzFUaTezOaQEFUNQyCm6Ie+Qr08KQqphg2FVBpKZRSpN1PpazWaKQSk8MlUd9RHzPvvv//BBx/UV0a5hUCzZp9++um8efPee+89gVFYBCT6c9n1kydP3nfffdu1a7fuuuvutttu48ePr8bG1VdfffTRR/OqM9V99dVXP//887vvvpt/w/kHDRp03HHHhWOCcMX8QaoCuUOAuUL//v3btGmz5ZZbrrfeevvtt9+iRYvicaGxEQ+3jJSS6M9IR9RBBoIbuf+jH/1o2bJlb731FqL/oIMOqjb9R6Py9ddft2zZ8oknnthoo40++eSTn/3sZx9++GG4vWOPPfaXv/xlOCYIV8wfpCqQLwQYDH379n3nnXdee+01wi+//DJD6LDDDovHhcZGPNwyUkqiPyMdUQcZZprWvXt35m6bbLLJZZdddumlly5fvvzRRx894YQTzjzzzPbt2++1117z588PKl2xYsWvf/3rf//73+Y933///d99990g9bHHHnv44Yc/++yzHj163HXXXVtttdV3v/vdO++8kwzh/KgIqHbNNdck21NPPRUUVyAvCEydOnXatGl08WabbQbNW2yxxR133LHTTjshxO+55x5GEdOCX/ziFySNGDFiu+22Yxgwur788kti/vGPfxx++OEMrQMPPPDJJ58kJjw2lixZwmKCVCYlM2fOJFVP1hHg4x/jgasYpfJbJFP8Isd5db/zne+cccYZY8eORaAbYB944AHo/PnPf86byTqgX79+xF955ZUDBw7k3SbplVdemTRpEoHRo0ezWg+6g3cepZDJ8/3vf588iACy8TkJ8v/nP//ZeOON+WZQ+cUXX0zr//rXv4Ia4gVoIl5Bx1JptetItnvxYcOGrb/++hXrYZwAS+/evZkHTJw4kfBtt932t7/9jU6//PLLKcLAOPHEE5977rlzzz0XZRFLyWBsMGnYeeed99lnH/Kb5SNr0IqtKDI7CMR894r28mSNXyZZ11133bbbbgthPGefffZXX31lRD/vIcMLRRDxSPMS0c/OMPEs+cNDMCz6efNJQtCTjWl+kP+RRx4hhnhTENF///33hyuJEabCGKXci6TVrjvljjWw8YMEr1gJ44SvAqOIVOYNfPtNNpYIpgij66OPPmIA3HfffQDItCAYG88//zwxb7zxBkX4JFDPQw89ZIrrN7MIxDfupLP1pIIAwh1Vz/krH74BaGYuueSSPffcE2L4GKyxxhoEOnTowO+LL77Ib10PGiTyo9Xhl5VBUPbNN99krmfiiUQbwKw/SFUgFwhsuummrPxYJmIdYAhGlP/ud787/vjj+Zdp+6qrrkoAVeFf/vIXhL7Jw2eeAKW+973vLV261CiLTJL5ff311wlQeRAZVicGkQpkCoH/9rSefCHwxz/+ESWsoXmDDTZA/cKq3Lx+vJ8mnjecAPOvellbbbXVKhbp2rUrKwlmhaQykZk1axZr/Io5FZlZBOhEaGPDP6AQhSFaoHXWWSeIIcCH4cILL2TJyPP2229PmTKFbeFTTz31+uuvZ7I/ZsyYcGbCa6+9Nr/MQkwRlEKxt45Lata//hDwMutvkkdm7EwhDf11UknNu+yyC5OvW2+99eSTT27evDkqV95P5vtEEkAzs8cee9x+++3I/Y4dO5aUNdM6VLFY+5QkVfw3yE+jZGCFgSoAlS5h36LfDnhFaqNH2itvZG9Gp9k9J2u1H//4x+eddx6LQgYMMhqBjqrH9HJQP9P/Bx98kE0jFnmnnXYa5mFoFEklHtx++9vfEkaxE4yNHXbYgRgUQWw+MTbYB549ezaWo0GFuQssXLhwwYIFrHRRjfJJ483iDAR2cai8zOcNK7svvvgiGCdAsfrqq7M8atWq1VprrcW3kM8nCLDvzYvGzKxTp06slko+senC4kX0w1IASrrsNaZ1uxxJnAYUL4hgjHnOOussKmdgocPt1avXyJEjGXxY6DNSiUQ5y4cB2oIXmzCDb9ddd916660Z02HpH+QJU1uSf/DgwSeddBKCA4tAtAFUHs7sI5zKKGpwb/rAzVLnkCFDkOnYaJk8CGs2bwmHxwkbuU8//fTmm29OPB/4UaNGoQY84IADjBaRUcesgkpYfQZjibE3YMAAdJAUYTRiJGbqz8UvAp3tCgyTUJDOnTsXm1fEN5IaeQ3L/GIExRmatm3bItN5xVq3bs3nsEWLFmaJzChlNcyXgCURnwR0aHwkWH+j9UIpyvqb/TMW5TyUxaqK93ebbbbZfvvtd9xxRz4YaUG0Sry3i4FiKWhPTYtVf+2mwi+DjMHEvIyhaVjj9bv55pt5aZmwoHitKM1NTmb9jOPogAT5GdMsLHgrGP3Ri1fLacfNnlqtTvf4tNp1pzx6DezQMpllkFTrR95u5rxs9rDHCyCmZubCTGbZZ2K3icfMYYOxwXSYnV5kZV1DKzrNyeZE3GMMjS4La9c5c+bwhUMQsyrio4VaLNjTSrZRzLLZR2FJxDeGL82MGTNYTP/whz/s2bMnUzc+DMk2Z6/NJsEtJe2vhz3VUm1OkzLCrxH9zz77bF5gtONmT/XHY1rt+uNINQcI8D1je4PT78zEsWTlnApiN1gDBdkaE8BK++9//zvW0nyBOJ/PIU2WVpy569y5cwMIkOhPAOSMCAvWqtOnT6/mkiEBPpOuwo6bPTVpWr6tL612v6VAoaQRQBWDLTJzIybahxxyCOK1T58+aGySbid+fSyw2KXDsIpddNYfP/nJT4466iiv6iCJ/vi9FZSUsAigqCtgx82eWldDdWVOq926iFTmiAjgtwrrVfbGsH3gbCO73BELppiNUxHsmbMgYBuG3TX27XwQI+NOH6iqTiEgBFJGAFvk008/nW1V9i3wQsFsOhdyH9RwmPHnP/8ZXylsSGCkxwFpNlQSR1OiP3FIU66QgW6crqRMh5rPMAJYpPDUS2D5uMLEszyy3mp95L/hhhvYoMbUEqHJcYTGaM+TZYQDdJhjYVXBBwCjoOCEXVKtSPQnhWRW6sFuIXyusi6y0DZi5FBXEWVOFwG6Gw1VyVPTDzPW+sZpT3TisUspV46jSzniiCMwNoMAVCvRa/OXk41cNkvZxX3hhRdwdoKZpr+2GlAzFticucPpFlAfc8wxQJ1UoxL9SSGpeoRACggYG+uVVuPf/nCSqJGkBC7BG9loxbbYxcVK8gc/+AEe6Lp06VIxTx4jsQHFcg9rWk7VYWKbCAspiP6SGUoT+JeesHORSFdVq4RNIRSavO0cz8HammycLvnVr37FMRxMBYxrraFDhwazPE5jXnvtteQsd9EcNIH9PlYQnOEihszUT23UYARNkC3FgB1wl1SYshdPketqTbPOCz8cNcIbMyezmCfScQTMIKEfJ0yYYCrBmH333XfHgP3II480qmQs/csdL1OQUcRQ+cMf/mAKouTBlydV4Z/ZWBIHLsExMKOG3/zmN2bsPfPMMxRBs3TFFVeQH9Oaq666ygwqNO/GATgUcgaqGl91xfPpO/TQQy+66CJcWtVVMC+Z8aWKYRI7ARyhSIBmXuYYDw1bSrmkWqrNaZIdDUemXnrpJepnTKCrQbmJYYDxssBrhqEYMyDcutEELlkCX4yETznlFDN6KBJ20cxcifP9HNbllCb+mfk88PZydBNjA84Gc+SEV92R4HBxOzIuqeFWkg3bqUq2rSi1me7GcjF4Hn/8cQrijRVS+UVRQIBOpAdx08ZxPFKxGyESGc2AIYl/Eegca2JSSYWB42XOBpKNKcXvf/97shGmrBkJN910kxHijDQzlji2Gow9AgwhLhEi/7hx4xg5eIagOWrAzMY4dyOGbIy3G2+8kWzuDzeOXXPNNe71ZLwGLuTgbiV3Ir2fxaez9fhDgMP0vGBYMtAEU3vjq5kwrzSnLpn9oYq1tD58+HAmXywn2UQy7zlyH0f/zOOY7uEElApxC4GGkbkGe025dsxiwSHvSexqBiwwHowbV4Qv03PicfDHxx5HC+wccj0nnUskWhEzO2bSzWwAvyDM2TmOy47o3nvvTTZ2EUhCNOMtivyEzTBjfoDRofHqU/H84L333ousZ/vXWNSQHzJM2OQ3CmucHDC9YIKC0X1AfOzAn/70J4ZutcvmYlebwYK33HILSyh6h25yIS8FhY8LuSpbggCLXJyBmEimbMb7LhM05D6RLOd5H0qKoA4KYliYm2z8GhfNVMjBQj4DxnKDFTRTDAYZp/P5PBgfjUFxBTKCADru4DGSGsKMEx4C6IiR4ATMESFj28NoMcSzsc8gMZu0aI1QduEChO89c3M2SznsarLhx8YE0NUYX378ixMCExn8MvaMQwL2V83YY4sSUWUymIIYLPKt4kPC+OSbZPE4ElRbM8AcyCxlauZsAhmAjnWYIyMS/Y4AplwclyOYfxkicMTIa0m44iHAwJiPmV1AdLmLZl7dxYsXU4NZnlM563p8UTEN5DAkL1hQVoGMIxD2r4dAL6EWV5Qm5p///CcLBeN4p8TxMh+PYHQFBjx8KvD9Z8oa3+DhmsvHHl8CrG5MHuongAehgw8+GHUinxYsFwcNGhSuIV4YnWTwMYtXQ45KofFHredIsES/I4ApF2cQMEnn9AevE7MeXqSKBDHP4l5WhD5nW/BhUjGPieQCAB62gtHA4mSKzMYzBB7Y2Rgw28iW4kpKBQF6KvxEUaGwbYs0Ryhz0hVryMDxMh8MxApzfCb+xDO6+JfvBLf1GtYYCah0KMtGMSePavKL9okpKmOPdYmphEa7devG9wNlI9tR1QZtzZqDDCi+obDBdk1B640PsGGDgs6xXYl+RwBTLs4bixaeWRtOZXGaiD63IkG4UEfPw4hhtkXminnCE0OyMYcyCmLecPw1ourhBcMapGJZRaaFgOk1ZuLhh4l8CT3hzjVhdm5Q/aPZ//jjj9HpMX5Y1eF1mQ0eRotxvMzowscZ0pk9HqPGoVrGGDMAyuL6my9EoK4JNxFuHcMw6mHsMU2BSJYF/IuWn+uGENY0ijVaOH+MsGm6ONfGwan7UikFHz70E0IkRgfntEgD+GXKj+rGONGthhImHGhyOCFS7RWtVpA9ADYAUAiwGqiWJ168HRmX1Hj0RCllpypKDdnJgwRHA4PT72BIVHS8zMYPlvvh3mcfiHu7GEso62uyw5KUkYnuiI8KK0g+GFz5QCnm/kxH+ITUrCFKBlhgHVOQqQl6V5y7OUrRmFLY/gK4pEbp5nzlsaORL16SpdaOjEtqsnSGa7NTFc6psEHALCa44IXDIlinYC7s4yIX+oXlLA6QiwA7BzK4ZsBR9EvhU4ShIh6FQGoIYF6MKwJWnCwduEfIh9w3vKGSMkcNUmO1IQ3DY6B8c2lQs34X9CKV1TyxGkx2ZBxTqzXqO95xLuabvKZaP6MFHRT7xhxWcN8CzSxK7MlzNJqNd3ZKHEda88wyKcKEgAsCji9GvKYRQPEKqpQ7Auwkc4aARQYrjCbpywGhj4kUPMKpO1xS+LhjqBqEgBDIBAK4G+KsANsJWBNxu3omaEqCCHiBI/iCO3hMospmEv2JwFjoSi677LJC8y/m60HA92jBYAkXdRikcsr9ggsuSMo3XD0sJpkX+uECXuAIvuAuqdol+pNCsqD1cPSfc7/48Coo/2K7HgQaNlqwH+WkMUfbMErmsILxT1UPpennhWYoh364gBdjEZsgWRL9CYJZxKo4E4R/GM58Gi8xRYRAPEdGoJGjhRPs3HI1f/58NmA4RMbsZNSoUZEpTTMjdEItNEM59MMFvCROkER/4pAWrkLO+uLkh0OhnPk0TqQLB4EYjoxAg0cLR8m4qwunERxRxn81reO9HP8Txn1pZKq9Z4QeqII2KIROqIVmKA988CVOgUR/4pAWsUKu7sNJCybbKCVvv/32IkIgniMj0PjRwiW9SFWuB5g5cybLU1xUcciAjVOu7a3odzoyK64ZaR0aoAR6oAraoBA6oRaaXWu3lpddvxWeJBJZtaViaJgE7bXrwJKaTIEfQdz+cH6HgYsBcs3CdmT8pdYkLHYGO82xq20yBV1Gix2EepHHZxwu76dMmcKxWLxUcU0NljO4GGL60rVr1ygOKuz0VExlZTxv3jyamzVrFs7suCCB5jiEjIc7/KLX5RG9Xn7L6YkplewNu6SWk5j3GDsaeeeu5GWGHdw7c5UHN4HwUtm5syPjL9VOlUuqnWaXmptGWZfRYkfABXk+A9xjynQb76dz587FkpLLBrCdx5wGl4i4KsKTOe7t2HFFOnOStnXr1kzSWbsEPs/xiI7GBp9I3ELDLQVUyJ2XOBPF++miRYtQ3eCwCEdYmOtwYzCO85jd47u0e/fudYn7MAIu/Jp6JPrDeHoJu3eSF7ISqrT8ZaZizlVyiQfuuniXLE7lQMZOhWW1ZEe1Zqq9XX+pFo78NZqdml1Gi50Le4/by5ancvU5BjbIa6Q2Hu6Q4MhxfBAh0/Fwx4OIx/dc0Ju0jkdSPglckcSDQGfY87Xgm4HbO74ffEXY4jY3I5U3FyPGnV+d5o0Bu4rUQIBT5viVxURhww035M5YzBUqFgjenIqpDO6K8YlE2ptOpInySrxyVN5cXmIijpZGsoOM5sFLWs1GGUg57VZt89bsXGWIiQDXwvTt25crHotwY2pMjFTsfwjkdLTkVO6Dumb9/xt6+usBAS7LxuaHuzhmz549fvx4Dy2oyqaDgEZLQ/uSBUuMBxItpVxSLdXmNMmORpgpl5t3MBKgoVQemg5zUR7GjoJ9M3a3ypMsMfCSSqqlUcckO0f1Vq7REkYsWWzDNWcz7M6vtnnB0O/DkpDRE6WNyy+/nGy+nZxEocRHHq72Za/sr3/9K4Z0Ueq34+YvNQpt8fLYaa63To2WMGLJYhuuOZthd36l689mzzZBqvBDguk0Rp8333xzE2RPLCWKgEZLonBWqEyivwIoivKEAMcUzzzzzHPOOeeYY47x1ISqbTIIaLR47UqJfq/wqvJSBJjyc6k0G3o9evQoTdP/QuD/I6DR8v/xSPI/if4k0VRdURAYOHAgl04899xz99xzT5T8ylNkBDRaPPW+RL8nYFVtVQQ45IXGf6eddvrpT39aNdPKBPayqj32gi6p1Vp0j4cqeyUuZDfVstFHS1NFwBNfEv2egFW1lRE4++yzjzzySC5Qrekx0W5UV7n2hGLtTXtKTYj2JlVN9NHSpNhuCDM60tUQmNXISgT23Xdf3CXedNNNvNKCRAjYEdBosePjmCrR7wigikdFwNj1o+WPaNcftV7la4oIaLT47lUpfHwjXF/9HNG0q4OrpeITsVqS73jjjtHC5xNPPIEvQzzccpmX5L4FqHqTNFrqRUz5v0UgnuKS8paCLqmjR48OiMM1du/evfHrZGmrYUk4477rrrvw11pvi3Y06q0ta/nx4mB35HD11VeDwAEHHJAs5XZU/aUmy0W4NjvN4Zz5DfsbLUVAL9zv7vxmbtYPe3CFy18eDMDxgo3vX+5PIDLdB6HP5VP47E6XjHy13r9/f3y3XXjhhfLdlq+OS4VajZZGwp450W+Y54oDHlz+jhs3DkNANgaJ524zdAvcncZpoKeeesrkfOihh7bYYov27dufccYZTMyffvppSpkk8hx++OGE77jjjnPPPZcTpPjgJmCKUGrChAkm54gRI7ibDb/h+M/58ssvqYcmmONzfRqR5q7Bww47jMz777//u+++O2bMGEMJdXLzjqlEvyUI8M3mpmmWcYMHDy5J0r9CoAQBjZYSQLz/G15ERA9DliWzS+rDDz9cUhyFJgpixPHGG2+M5H3yyScvvvhidEEsC1566SUy33bbbY888gj7Qvfdd9/EiRPJZmhjpskta4QvvfRSsvHLVfcE+Kg88MADxx9/vEmliKmEg+OUxSsWE3xiqHDSpEnM9AkvX76cMAEEGRf3EHjwwQfJz0VrN954owUKkshsz5Dr1IpL+FdffZXLifjQck2dJ+7sqPpL9cQO1dpp9tduI2v2N1qKgF64p9z5jSmV7A27pJaL/iFDhuDvF+FOtUyxDf+Ifs56cAcIlxqbGAQx8/Rqon/XXXc12ZD7fEsIcxsnFXLL2kEHHYR8N6nUgMQ3ov+xxx4j0kzqWXC8//775DfXbBKAKi5Z5go3bvI0Zav9krlaUhOIL3+ZwZAdXS5o9MqdHVV/qf6YstPsr91G1uxvtBQBvXBPufObUYUPjAUP1xnj8REhywcAbY+JRz/DrJ8kbgIxMfvssw8T+aAUAVQ3wb+bb765CXNnJlN1wuwi8Mt9yvPnz0daGTMYvgFco2xyMmklYFr8/PPPTSS/HTt2vOGGG04//XSSTjnllFVXzQGGAfG+A6eeeioYHnfccYFGzl+LFsulVBq10BMlCZrt2fwxlVbNjRwtafGY2XazLrbQtLDZi+jv2rXrW2+9haQGSr5+s2bNQguELv7tt9824OITBhU8YSbyJob8JsBv8+bfnmDgBQviCay77rrsQzLT56E27hUxqUxdw9mCMNP/gw8+GAUUywKuaWYNESQVPMD+x/Dhw4cOHWp2R7yiEZ4BlYdTbLqcmERivHKUSuWNHC2pMJjxRjMq+pmJ8zBz5EQfup1jjz12l112AUpkCnN5rvsgjOjv06fP5MmTyYY4PumkkxDEHTp0WLp06dSpU7kVBDEUBX2WC3wzFi9evGzZstNOO40ZfcVSZnaP8pr1R7du3dD8MHZ79epFoxXzFypyxYoVm266KbcwvvDCC6yECsW7mK0XAY2WehHzkT9zot9MybusfPbbbz92YlHfo6Vp0aIFhiLI97Zt2yLxb731VibyWAVgw4O6v127dgj9AQMGoBRClO+xxx5IIrT2FSELz/oJY/PD7i4aIcyE0PZce+215aXIBg1sGGy99dZsYLLbjOUP+UeOHInxYnn+QsVwUIuVE+ovbJ+22WabQvEuZutFIJXRwtvKyj4gFcUAw/W6664LYiwBpnqjRo2yZLAkuZS1VJtMUrzVKG1bCrqkWqoliUk3Vj1oZsLZmOCXWJIwhUc7H85jD3/99ddvvPEGG7YE7DkZuCbDggULuEjIntmk2tGIUkOW83Tu3BkG+/Xrlyki7Zj7S/UHgp1mf+0mW7O/0WLHB7M9pnfB241JCPmxQ4vCHfblxhQwSuaSPC5lS6oq+dfOb0nmiv/aJHjFAibS3rBLqqXRnCbZ0cgpU2GysZoN/5uFsB1zf6n+eLfT7K/dxGv2NFrs+JgzodOnTzfsnHzyyZwWIoz7WHQGWP0dffTRzCCJQRnF4R5i0B9gxYchCXKfyjnWQyrnVIhHBc1cZ+HChcTcfffdl1xyCQYmJ554IvNCTheZsgj9uspSFddXYIFCc0AEGcRYHju/loJBkkR/AIWvgHsn+aKs6dZrx9xfqj9E7TT7azcvNdfEB+UwtuCwg1RFdg8bNmzJkiWUwlQPH1M4GjFm4mwQksqRIA4JkTpnzhw8kSDNZ8yYQZiYs846CxeE5Ef9yzLiyiuvJBJ/Mxh9XH/99QSwGjeRfAmily0/XWRHnkbtGWqmxixvb9gltSbFuctgRyN37OSCYDvm/lL9gWOn2V+7eam5Jj633HKL0fkg6MmMaTi3PwZaIJS9RKIoxngE7ZDhms8DmQOlzUUXXWTWCqRy0If8qPKR8nwYsDwkEgMHPjBopPkkcPIUvXT0suWni+zI1+TXXpzUzG3zwpIeISAEhECyCGAPgun2zJkzx44dy5wdef3aa68Rg+UeRhyYldAcOh9UQ9iRm6YxKunZs2dABvmNnSExnTp14he7Bn6xKzHmf3wbWrVqhaUDNiOYurVu3Tp6WQwaK54uCmpIPCDRnzikqlAICIHMIYAFIKbY+O/inNBRRx0FfdjsMcf/73GeTz7BLeO0adOwCeSIaHCoE68t5sy/YQaZHhwVYouYSOz9wnxSCe5eWAqgFEKPZFYGEctWO10Urj/ZsER/sniqtqwgwFSu2pMVEkVHYxFgL/eaa67h3A+zflrefffd2fhFuDNOMBY/9NBDmbzze++992K/h2afrV3umSCSEz8cJ8KmHMnOF4KyeJFBRbPGGmuEOWC/F9PwDTfckCtIiefUZ/SyEU8XhZtzDddUCVXMQKsV402kS6ql2pwm2dHIKVO5JtveIy6p/mCxU+Wv3bzUHAUf9l3JxpQ/YOqcc84hhgf9D1eHEs8eAPp6YtjsxU0kMRzeJJUFARp8Tg6ZJGKwDiL1qquuCipkp9ekUta4BYteFvN0FiUU52EtwtIhILJigGwV46NHrkLWlc3V98N30lLQJbU+OvKQ245GHjhoajTae8Ql1R9Sdqr8tZuXmmPjg74ecY92vmXLlgGz7AEg3I2bLyKZ8uNRhlNghFEH4beRM2Jh3zBBQbw9smIgtU2bNiYyelkkKhKfA0l8ZmAnqLNiIDa/QW02CR5kKg/YG3ZJLW8r7zF2NPLOXR7pt/eIS6o/NOxU+Ws3LzUXDR93fqXrz8vYFp1CQAgIgcQQkOhPDEpVJASEgBDICwIS/XnpKdEpBISAEEgMAYn+xKBURUJACAiBvCAg0Z+XnhKdQkAICIHEEJDoTwxKVSQEhIAQyAsC315bWC/FWBfVW0T5hUBGENDozUhHiIy0EIgp+i3nueBE71Va3al2oyCg0RsFJeVp2ghI4dO0+1fcCQEhIAQqICDRXwEURQkBISAEmjYCEv1Nu3/FnRAQAkKgAgIS/RVAUZQQEAL5QmDQoEH5ItiRWnd+Y7pvs9Ntdy1kT7XXnMfUovGbxz4K02zvL3tquJ5kw2m1mywX/mpbb731Zs+e3b59e39NZKdmXI1yS4y5Iyw2VZr1x4ZOBYWAEMgKAlydOHny5KxQ45kOOA2uiozdlER/bOhUUAgIgawgwMVYw4cPzwo1numAU3MRmEs7Ev0u6KmsEBACmUCgf//+3I01ePDgTFDjkwh4hFP4dWwk5pGumq2imqyZRxmEQDYR0OjNZr/YqRo6dOhee+3Vrl27E044wZ4zv6l33nnnsGHDHn/8cXcWvIh+nZZ07xjVkBYCGr1pIe/Y7mabbTZ69Ogjjjhi8eLF3JDuWFsGi19xxRUjRoyARzh1J08KH3cMVYMQEAKZQGDHHXfkbvRnnnmmT58+L7/8ciZoSoIIeIEj+II7eEyiymYS/YnAqEqEgBDIBAKdOnWaMGFCr169tt9++wsuuIB70jNBVlwioB8u4AWO4Avu4tZUWk6ivxQR/S8EhEDeETjvvPNeeeWV5cuXt23b9vzzz1+wYEHuOIJmKId+uIAXOEqWBYn+ZPFUbUJACGQCgY4dOw4ZMmT+/Pls2u+www6HHHLIqFGjMkFZLSKgE2qhGcqhHy7gpVahutO9nOa1UwE/9p00e/HcpRaN39x1UF0Ep9WbabVbFziZzczE+f777x85cuSMGTOQqn379kV13qJFi+wQvGLFiokTJ44bN27MmDFo8wcMGDBw4MBWrVr5ozAFKVy0QVw0fv0N1izUnFZvptVuFjBPkIY333xz7Nix48ePZ7+0d+/eGIP27NmzR48eCTZRV1XPPvvs1KlTMdacNGkS2vwDDzywX79+CSr0LcRI9FvASSZJL20yOGajlrR6M612s4F68lR89NFHjz766JQpU6ZNmzZnzpydd96ZufZ222231VZbde3adc0110y+yWbNPv7443nz5tHcrFmzWH9Mnz6d5nbbbbc999xz7733XnvttX00Wq1Oif5qyCQWr5c2MSgzUFFavZlWuxmA3DsJfAaef/75mTNnvvjii3PnzsWScq211sJ2ntl3hw4dNtpoo/XXX5+TYuy4Ip05Sdu6deuWLVuiL1pttdUMcV999RUam88+++zTTz9dtmwZFX7wwQfvvffe0qVLFy1a9M4777DaeP311zHX6dKly5Zbbrntttt269ate/fuDRb3YSjTEf1hCooQLtTeRtPuUERwWgxqFDUM+YULF2Jgg7xGai9ZsgQJjhz/8MMPkemfrHwQ8V988UXQI4yK1VdfnU9Cm5UPAn2dddbha8E3Y4MNNuD7wVekc+fOm2yyScNYqNlQCqK/Jk3KIASEgBDIBQJI/xRnAy4QSfS7oKeyQkAICIFcIiC7/lx2m4gWAkJACLggINHvgp7KCgEhIARyiYBEfy67TUQLASEgBFwQkOh3QU9lhYAQEAK5RECiP5fdJqKFgBAQAi4ISPS7oKeyQkAICIFcIiDRn8tuE9FCQAgIARcEJPpd0FNZISAEhEAuEZDoz2W3iWghIASEgAsCEv0u6KmsEBACQiCXCEj057LbRLQQEAJCwAUBiX4X9FRWCAgBIZBLBCT6c9ltIloICAEh4IKARL8LeiorBISAEMglAhL9uew2ES0EhIAQcEFAot8FPZUVAkJACOQSAYn+XHabiBYCQkAIuCAg0e+CnsoKASEgBHKJgER/LrtNRAsBISAEXBCQ6HdBT2WFgBAQArlEQKI/l90mooWAEBACLghI9Lugp7JCQAgIgVwiINGfy24T0UJACAgBFwQk+l3QU1khIASEQC4RkOjPZbeJaCEgBISACwIS/S7oqawQEAJCIJcISPTnsttEtBAQAkLABYH/AzFaA2FwdiyIAAAAAElFTkSuQmCC)\n", "\n", "Then when we ask a question, we retrieve the relevant passages from the vector store and use langchain and OpenAI to provide a summary for the question." - ], - "metadata": { - "id": "tZnIXBfrRpex" - } + ] }, { + "attachments": {}, "cell_type": "markdown", + "metadata": { + "id": "GyAst2W-VpHb" + }, "source": [ "## Install packages and import modules\n", "\n", @@ -27,24 +32,26 @@ "Because we're using an Elastic Cloud deployment, we'll use the **Cloud ID** to identify our deployment.\n", "\n", "First we need to install the `elasticsearch` Python client." - ], - "metadata": { - "id": "GyAst2W-VpHb" - } + ] }, { "cell_type": "code", - "source": [ - "!pip install -qU langchain jq openai elasticsearch tiktoken" - ], + "execution_count": null, "metadata": { "id": "33A-cP-XvFCr" }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "!pip install -qU langchain jq openai elasticsearch tiktoken" + ] }, { "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "J8-93TiJsNyK" + }, + "outputs": [], "source": [ "import json\n", "from urllib.request import urlopen\n", @@ -54,15 +61,14 @@ "response = urlopen(url)\n", "\n", "workplace_docs = json.loads(response.read())\n" - ], - "metadata": { - "id": "J8-93TiJsNyK" - }, - "execution_count": 2, - "outputs": [] + ] }, { + "attachments": {}, "cell_type": "markdown", + "metadata": { + "id": "qtEOCsCLWCZp" + }, "source": [ "## Create Elastic Cloud deployment\n", "\n", @@ -72,13 +78,15 @@ "- Select **Create deployment**\n", "\n", "Now we can instantiate the [Elasticsearch python client](https://www.elastic.co/guide/en/elasticsearch/client/python-api/current/index.html), providing the cloud id and password in your deployment." - ], - "metadata": { - "id": "qtEOCsCLWCZp" - } + ] }, { "cell_type": "code", + "execution_count": 3, + "metadata": { + "id": "a-t1mglib54F" + }, + "outputs": [], "source": [ "from elasticsearch import Elasticsearch\n", "\n", @@ -86,36 +94,34 @@ " cloud_id=\"CLOUD_ID\",\n", " basic_auth=(\"elastic\", \"PASSWORD\")\n", ")\n" - ], - "metadata": { - "id": "a-t1mglib54F" - }, - "execution_count": 3, - "outputs": [] + ] }, { + "attachments": {}, "cell_type": "markdown", + "metadata": { + "id": "p0cQFDl1b9v4" + }, "source": [ "### Split Documents into Passages\n", "\n", - "With the workplace dataset, we need to split the content of each document into smaller passages. Models have a limit number of tokens length that they can handle. By splitting up long documents into smaller chunks, we can get round that limitation.\n", + "We’ll chunk documents into passages in order to improve the retrieval specificity and to ensure that we can provide multiple passages within the context window of the final question answering prompt.\n", "\n", - "Also if the whole document was represented by a single vector, it may lead to the inability to surface relevant content.\n", - "\n", - "Here we are chunking documents into 500 token passages.\n", + "Here we are chunking documents into 800 token passages with an overlap of 400 tokens.\n", "\n", "Here we are using a simple splitter but Langchain offers more advanced splitters to reduce the chace of context being lost." - ], - "metadata": { - "id": "p0cQFDl1b9v4" - } + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "dbHEoTF6vBXE" + }, + "outputs": [], "source": [ "from langchain.embeddings.openai import OpenAIEmbeddings\n", "from langchain.text_splitter import CharacterTextSplitter\n", - "from langchain.vectorstores import ElasticVectorSearch\n", "from langchain.embeddings import OpenAIEmbeddings\n", "\n", "metadata = []\n", @@ -128,29 +134,30 @@ " \"summary\": doc[\"summary\"]\n", " })\n", "\n", - "text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=0)\n", + "text_splitter = CharacterTextSplitter(chunk_size=800, chunk_overlap=400)\n", "docs = text_splitter.create_documents(content, metadatas=metadata)\n", "\n", "embeddings = OpenAIEmbeddings(openai_api_key=\"OPENAI_KEY\")" - ], - "metadata": { - "id": "dbHEoTF6vBXE" - }, - "execution_count": null, - "outputs": [] + ] }, { + "attachments": {}, "cell_type": "markdown", + "metadata": { + "id": "kRPxrJnXWfMD" + }, "source": [ "### Setup the Index\n", "Next define the mapping for the passages. Langchain relies on two fields: text and vector." - ], - "metadata": { - "id": "kRPxrJnXWfMD" - } + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "BfhBEjLsOr0l" + }, + "outputs": [], "source": [ "# Define the mapping\n", "mapping = {\n", @@ -169,26 +176,27 @@ "\n", "# Create the index\n", "client.indices.create(index='workplace_index', body=mapping)" - ], - "metadata": { - "id": "BfhBEjLsOr0l" - }, - "execution_count": null, - "outputs": [] + ] }, { + "attachments": {}, "cell_type": "markdown", + "metadata": { + "id": "RmCUl0hxW4lG" + }, "source": [ "### Enrich Passages with OpenAI Model\n", "\n", "Next we are going to enrich each passage with an embedding from OpenAI." - ], - "metadata": { - "id": "RmCUl0hxW4lG" - } + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Qc1LXk-rOzNR" + }, + "outputs": [], "source": [ "# get the embeddings from openAI\n", "\n", @@ -210,54 +218,22 @@ " })\n", "\n", "client.bulk(operations=actions)\n" - ], - "metadata": { - "id": "Qc1LXk-rOzNR" - }, - "execution_count": null, - "outputs": [] + ] }, { + "attachments": {}, "cell_type": "markdown", + "metadata": { + "id": "rXJH_MiWejv7" + }, "source": [ "## Asking a question\n", "Now that we have the passages stored in Elasticsearch, we can now ask a question to get the relevant passages." - ], - "metadata": { - "id": "rXJH_MiWejv7" - } + ] }, { "cell_type": "code", - "source": [ - "from langchain.vectorstores.elastic_vector_search import ElasticKnnSearch\n", - "from langchain.llms import OpenAI\n", - "from langchain.chains import ConversationalRetrievalChain, RetrievalQA\n", - "\n", - "db = ElasticKnnSearch(\n", - " es_connection=client, index_name=\"workplace_index\", embedding=embeddings\n", - ")\n", - "\n", - "retriever = db.as_retriever()\n", - "\n", - "llm = OpenAI(openai_api_key=\"OPENAI_KEY\")\n", - "\n", - "qa = RetrievalQA.from_chain_type(\n", - " llm=llm,\n", - " chain_type=\"stuff\",\n", - " retriever=retriever,\n", - " return_source_documents=True\n", - ")\n", - "\n", - "ans = qa({\"query\": \"what is the nasa sales team?\"})\n", - "\n", - "print(\"---- answer ----\")\n", - "print(ans[\"result\"])\n", - "print(\"---- sources ----\")\n", - "for doc in ans[\"source_documents\"]:\n", - " print(doc.metadata[\"name\"])\n", - " print(doc.page_content)" - ], + "execution_count": 12, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -265,11 +241,10 @@ "id": "OobeBT6rek7Q", "outputId": "ba7b3a7a-253e-4e7f-83b9-cec07ebdac09" }, - "execution_count": 12, "outputs": [ { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "---- answer ----\n", " The NASA Sales Team is a regional sales team consisting of dedicated account managers, sales representatives, and support staff, led by their respective Area Vice-Presidents (Laura Martinez and Gary Johnson). They are responsible for identifying and pursuing new business opportunities, nurturing existing client relationships, and ensuring customer satisfaction.\n", @@ -289,13 +264,40 @@ "The sales team is responsible for promoting and selling the company's products and services to potential clients. Their role involves establishing relationships with customers, understanding their needs, and ensuring that the offered solutions align with their requirements.\n" ] } + ], + "source": [ + "from langchain.vectorstores.elastic_vector_search import ElasticKnnSearch\n", + "from langchain.llms import OpenAI\n", + "from langchain.chains import RetrievalQA\n", + "\n", + "db = ElasticKnnSearch(\n", + " es_connection=client, index_name=\"workplace_index\", embedding=embeddings\n", + ")\n", + "\n", + "retriever = db.as_retriever()\n", + "\n", + "llm = OpenAI(openai_api_key=\"OPENAI_KEY\")\n", + "\n", + "qa = RetrievalQA.from_chain_type(\n", + " llm=llm,\n", + " chain_type=\"stuff\",\n", + " retriever=retriever,\n", + " return_source_documents=True\n", + ")\n", + "\n", + "ans = qa({\"query\": \"what is the nasa sales team?\"})\n", + "\n", + "print(\"---- answer ----\")\n", + "print(ans[\"result\"])\n", + "print(\"---- sources ----\")\n", + "for doc in ans[\"source_documents\"]:\n", + " print(doc.metadata[\"name\"])\n", + " print(doc.page_content)" ] }, { "cell_type": "code", - "source": [ - "client.indices.delete(index=\"workplace_index\")" - ], + "execution_count": 6, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -303,34 +305,36 @@ "id": "SOeP4-DLnwDB", "outputId": "acff2056-eec6-46e5-cddc-9af9050c046e" }, - "execution_count": 6, "outputs": [ { - "output_type": "execute_result", "data": { "text/plain": [ "ObjectApiResponse({'acknowledged': True})" ] }, + "execution_count": 6, "metadata": {}, - "execution_count": 6 + "output_type": "execute_result" } + ], + "source": [ + "client.indices.delete(index=\"workplace_index\")" ] } ], "metadata": { - "language_info": { - "name": "python" - }, - "orig_nbformat": 4, "colab": { "provenance": [] }, "kernelspec": { - "name": "python3", - "display_name": "Python 3" - } + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + }, + "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 0 -} \ No newline at end of file +} diff --git a/colab-notebooks-examples/search/02-hybrid-search-with-rrf.ipynb b/colab-notebooks-examples/search/02-hybrid-search.ipynb similarity index 100% rename from colab-notebooks-examples/search/02-hybrid-search-with-rrf.ipynb rename to colab-notebooks-examples/search/02-hybrid-search.ipynb From 9541cf31242ba51e5419141a7f25bd26ba529855 Mon Sep 17 00:00:00 2001 From: Joseph McElroy Date: Mon, 10 Jul 2023 05:04:10 -0400 Subject: [PATCH 47/54] example json --- colab-notebooks-examples/search/data.json | 87 +++++++++++++++++++++++ 1 file changed, 87 insertions(+) create mode 100644 colab-notebooks-examples/search/data.json diff --git a/colab-notebooks-examples/search/data.json b/colab-notebooks-examples/search/data.json new file mode 100644 index 00000000..ebc81684 --- /dev/null +++ b/colab-notebooks-examples/search/data.json @@ -0,0 +1,87 @@ +[ + { + "title": "The Pragmatic Programmer: Your Journey to Mastery", + "authors": ["andrew hunt", "david thomas"], + "summary": "A guide to pragmatic programming for software engineers and developers", + "publish_date": "2019-10-29", + "num_reviews": 30, + "publisher": "addison-wesley" + }, + { + "title": "Python Crash Course", + "authors": ["eric matthes"], + "summary": "A fast-paced, no-nonsense guide to programming in Python", + "publish_date": "2019-05-03", + "num_reviews": 42, + "publisher": "no starch press" + }, + { + "title": "Artificial Intelligence: A Modern Approach", + "authors": ["stuart russell", "peter norvig"], + "summary": "Comprehensive introduction to the theory and practice of artificial intelligence", + "publish_date": "2020-04-06", + "num_reviews": 39, + "publisher": "pearson" + }, + { + "title": "Clean Code: A Handbook of Agile Software Craftsmanship", + "authors": ["robert c. martin"], + "summary": "A guide to writing code that is easy to read, understand and maintain", + "publish_date": "2008-08-11", + "num_reviews": 55, + "publisher": "prentice hall" + }, + { + "title": "You Don't Know JS: Up & Going", + "authors": ["kyle simpson"], + "summary": "Introduction to JavaScript and programming as a whole", + "publish_date": "2015-03-27", + "num_reviews": 36, + "publisher": "oreilly" + }, + { + "title": "Eloquent JavaScript", + "authors": ["marijn haverbeke"], + "summary": "A modern introduction to programming", + "publish_date": "2018-12-04", + "num_reviews": 38, + "publisher": "no starch press" + }, + { + "title": "Design Patterns: Elements of Reusable Object-Oriented Software", + "authors": [ + "erich gamma", + "richard helm", + "ralph johnson", + "john vlissides" + ], + "summary": "Guide to design patterns that can be used in any object-oriented language", + "publish_date": "1994-10-31", + "num_reviews": 45, + "publisher": "addison-wesley" + }, + { + "title": "The Clean Coder: A Code of Conduct for Professional Programmers", + "authors": ["robert c. martin"], + "summary": "A guide to professional conduct in the field of software engineering", + "publish_date": "2011-05-13", + "num_reviews": 20, + "publisher": "prentice hall" + }, + { + "title": "JavaScript: The Good Parts", + "authors": ["douglas crockford"], + "summary": "A deep dive into the parts of JavaScript that are essential to writing maintainable code", + "publish_date": "2008-05-15", + "num_reviews": 51, + "publisher": "oreilly" + }, + { + "title": "Introduction to the Theory of Computation", + "authors": ["michael sipser"], + "summary": "Introduction to the theory of computation and complexity theory", + "publish_date": "2012-06-27", + "num_reviews": 33, + "publisher": "cengage learning" + } +] From 59e3c99ebffce1aead02e1988e454d1d79e5d16c Mon Sep 17 00:00:00 2001 From: Joseph McElroy Date: Mon, 10 Jul 2023 06:29:57 -0400 Subject: [PATCH 48/54] updates to quick start --- .../search/00-quick-start.ipynb | 255 ++++++------------ 1 file changed, 82 insertions(+), 173 deletions(-) diff --git a/colab-notebooks-examples/search/00-quick-start.ipynb b/colab-notebooks-examples/search/00-quick-start.ipynb index deabf786..534a8318 100644 --- a/colab-notebooks-examples/search/00-quick-start.ipynb +++ b/colab-notebooks-examples/search/00-quick-start.ipynb @@ -58,29 +58,6 @@ "!pip install -qU elasticsearch sentence-transformers==2.2.2" ] }, - { - "cell_type": "markdown", - "id": "d9cb4609", - "metadata": { - "id": "d9cb4609" - }, - "source": [ - "Next we need to import the modules we need." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "099415ba", - "metadata": { - "id": "099415ba" - }, - "outputs": [], - "source": [ - "from elasticsearch import Elasticsearch\n", - "from urllib.request import urlopen" - ] - }, { "cell_type": "markdown", "id": "28AH8LhI-0UD", @@ -88,7 +65,8 @@ "id": "28AH8LhI-0UD" }, "source": [ - "and add the sentence transformer" + "# Setup the Embedding Model\n", + "For this example, we're using the all-MiniLM-L6-v2, part of the sentence_transformers library. You can read more about this model in [hugging face](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2)." ] }, { @@ -130,10 +108,16 @@ }, "outputs": [], "source": [ + "from elasticsearch import Elasticsearch\n", + "from getpass import getpass\n", + "\n", + "CLOUD_ID = getpass(\"Elastic Cloud ID\")\n", + "CLOUD_PASSWORD = getpass(\"Elastic Password\")\n", + "\n", "# Create the client instance\n", "client = Elasticsearch(\n", - " cloud_id=\"\",\n", - " basic_auth=(\"elastic\", \"\")\n", + " cloud_id=CLOUD_ID,\n", + " basic_auth=(\"elastic\", CLOUD_PASSWORD)\n", ")" ] }, @@ -216,7 +200,7 @@ " \"type\": \"dense_vector\",\n", " \"dims\": 384,\n", " \"index\": \"true\",\n", - " \"similarity\": \"dot_product\"\n", + " \"similarity\": \"cosine\"\n", " }\n", " }\n", " }\n", @@ -235,7 +219,7 @@ "source": [ "### Index test data\n", "\n", - "Run the following command to upload some test data, containing information about 10 popular programming books." + "Run the following command to upload some test data, containing information about 10 popular programming books from this [dataset](https://raw.githubusercontent.com/joemcelroy/elasticsearch-labs/notebooks-guides/colab-notebooks-examples/search/data.json)." ] }, { @@ -247,96 +231,19 @@ }, "outputs": [], "source": [ - "books = [\n", - " {\n", - " \"title\": \"The Pragmatic Programmer: Your Journey to Mastery\",\n", - " \"authors\": [\"andrew hunt\", \"david thomas\"],\n", - " \"summary\": \"A guide to pragmatic programming for software engineers and developers\",\n", - " \"publish_date\": \"2019-10-29\",\n", - " \"num_reviews\": 30,\n", - " \"publisher\": \"addison-wesley\"\n", - " },\n", - " {\n", - " \"title\": \"Python Crash Course\",\n", - " \"authors\": [\"eric matthes\"],\n", - " \"summary\": \"A fast-paced, no-nonsense guide to programming in Python\",\n", - " \"publish_date\": \"2019-05-03\",\n", - " \"num_reviews\": 42,\n", - " \"publisher\": \"no starch press\"\n", - " },\n", - " {\n", - " \"title\": \"Artificial Intelligence: A Modern Approach\",\n", - " \"authors\": [\"stuart russell\", \"peter norvig\"],\n", - " \"summary\": \"Comprehensive introduction to the theory and practice of artificial intelligence\",\n", - " \"publish_date\": \"2020-04-06\",\n", - " \"num_reviews\": 39,\n", - " \"publisher\": \"pearson\"\n", - " },\n", - " {\n", - " \"title\": \"Clean Code: A Handbook of Agile Software Craftsmanship\",\n", - " \"authors\": [\"robert c. martin\"],\n", - " \"summary\": \"A guide to writing code that is easy to read, understand and maintain\",\n", - " \"publish_date\": \"2008-08-11\",\n", - " \"num_reviews\": 55,\n", - " \"publisher\": \"prentice hall\"\n", - " },\n", - " {\n", - " \"title\": \"You Don't Know JS: Up & Going\",\n", - " \"authors\": [\"kyle simpson\"],\n", - " \"summary\": \"Introduction to JavaScript and programming as a whole\",\n", - " \"publish_date\": \"2015-03-27\",\n", - " \"num_reviews\": 36,\n", - " \"publisher\": \"oreilly\"\n", - " },\n", - " {\n", - " \"title\": \"Eloquent JavaScript\",\n", - " \"authors\": [\"marijn haverbeke\"],\n", - " \"summary\": \"A modern introduction to programming\",\n", - " \"publish_date\": \"2018-12-04\",\n", - " \"num_reviews\": 38,\n", - " \"publisher\": \"no starch press\"\n", - " },\n", - " {\n", - " \"title\": \"Design Patterns: Elements of Reusable Object-Oriented Software\",\n", - " \"authors\": [\"erich gamma\", \"richard helm\", \"ralph johnson\", \"john vlissides\"],\n", - " \"summary\": \"Guide to design patterns that can be used in any object-oriented language\",\n", - " \"publish_date\": \"1994-10-31\",\n", - " \"num_reviews\": 45,\n", - " \"publisher\": \"addison-wesley\"\n", - " },\n", - " {\n", - " \"title\": \"The Clean Coder: A Code of Conduct for Professional Programmers\",\n", - " \"authors\": [\"robert c. martin\"],\n", - " \"summary\": \"A guide to professional conduct in the field of software engineering\",\n", - " \"publish_date\": \"2011-05-13\",\n", - " \"num_reviews\": 20,\n", - " \"publisher\": \"prentice hall\"\n", - " },\n", - " {\n", - " \"title\": \"JavaScript: The Good Parts\",\n", - " \"authors\": [\"douglas crockford\"],\n", - " \"summary\": \"A deep dive into the parts of JavaScript that are essential to writing maintainable code\",\n", - " \"publish_date\": \"2008-05-15\",\n", - " \"num_reviews\": 51,\n", - " \"publisher\": \"oreilly\"\n", - " },\n", - " {\n", - " \"title\": \"Introduction to the Theory of Computation\",\n", - " \"authors\": [\"michael sipser\"],\n", - " \"summary\": \"Introduction to the theory of computation and complexity theory\",\n", - " \"publish_date\": \"2012-06-27\",\n", - " \"num_reviews\": 33,\n", - " \"publisher\": \"cengage learning\"\n", - " },\n", - "]\n", + "import json\n", + "from urllib.request import urlopen\n", + "\n", + "url = \"https://raw.githubusercontent.com/joemcelroy/elasticsearch-labs/notebooks-guides/colab-notebooks-examples/search/data.json\"\n", + "response = urlopen(url)\n", + "books = json.loads(response.read())\n", "\n", "actions = []\n", "for book in books:\n", " actions.append({\"index\": {\"_index\": \"book_index\"}})\n", - " titleEmbedding = model.encode(book[\"title\"]).tolist()\n", - " book[\"title_vector\"] = titleEmbedding\n", + " # Transforming the title into an embedding using the model\n", + " book[\"title_vector\"] = model.encode(book[\"title\"]).tolist()\n", " actions.append(book)\n", - "\n", "client.bulk(index=\"book_index\", operations=actions)\n" ] }, @@ -355,7 +262,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 26, "id": "f12ce2c9", "metadata": { "id": "f12ce2c9" @@ -380,95 +287,95 @@ "id": "39bdefe0" }, "source": [ - "##Querying\n", + "##Making Queries\n", "\n", - "Let's start by looking at simple queries which search for a particular value in a particular field." + "Now that we have indexed the books, we want to perform a semantic search for books that similarly match the query. We embed the query and perform a search." ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 27, "id": "Df7hwcIjYwMT", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "Df7hwcIjYwMT", - "outputId": "a5569fa1-163a-45a0-d9d6-bc779feb59db" + "outputId": "e12d312f-812b-4c1f-8a0e-eadfc01e3321" }, "outputs": [ { - "name": "stderr", "output_type": "stream", - "text": [ - ":11: DeprecationWarning: The 'body' parameter is deprecated and will be removed in a future version. Instead use individual parameters.\n", - " response = client.search(index=\"book_index\", body={\n" - ] - }, - { "name": "stdout", - "output_type": "stream", "text": [ "\n", - "ID: fGsDIIkB6SgI-NN4Uquf\n", - "Publication date: 2008-08-11\n", - "Title: Clean Code: A Handbook of Agile Software Craftsmanship\n", - "Summary: A guide to writing code that is easy to read, understand and maintain\n", - "Score: 0.6285683\n", + "ID: OOlWP4kB-GB5Evg6zHVx\n", + "Publication date: 2008-05-15\n", + "Title: JavaScript: The Good Parts\n", + "Summary: A deep dive into the parts of JavaScript that are essential to writing maintainable code\n", + "Score: 0.8075247\n", + "\n", + "ID: NOlWP4kB-GB5Evg6zHVx\n", + "Publication date: 2015-03-27\n", + "Title: You Don't Know JS: Up & Going\n", + "Summary: Introduction to JavaScript and programming as a whole\n", + "Score: 0.6946182\n", + "\n", + "ID: NelWP4kB-GB5Evg6zHVx\n", + "Publication date: 2018-12-04\n", + "Title: Eloquent JavaScript\n", + "Summary: A modern introduction to programming\n", + "Score: 0.66179085\n", "\n", - "ID: eWsDIIkB6SgI-NN4Uquf\n", + "ID: MOlWP4kB-GB5Evg6zHVx\n", "Publication date: 2019-10-29\n", "Title: The Pragmatic Programmer: Your Journey to Mastery\n", "Summary: A guide to pragmatic programming for software engineers and developers\n", - "Score: 0.62295747\n", + "Score: 0.61159486\n", "\n", - "ID: gGsDIIkB6SgI-NN4Uquf\n", + "ID: OelWP4kB-GB5Evg6zHVx\n", + "Publication date: 2012-06-27\n", + "Title: Introduction to the Theory of Computation\n", + "Summary: Introduction to the theory of computation and complexity theory\n", + "Score: 0.58697784\n", + "\n", + "ID: N-lWP4kB-GB5Evg6zHVx\n", "Publication date: 2011-05-13\n", "Title: The Clean Coder: A Code of Conduct for Professional Programmers\n", "Summary: A guide to professional conduct in the field of software engineering\n", - "Score: 0.5932041\n", + "Score: 0.57042736\n", "\n", - "ID: f2sDIIkB6SgI-NN4Uquf\n", + "ID: NulWP4kB-GB5Evg6zHVx\n", "Publication date: 1994-10-31\n", "Title: Design Patterns: Elements of Reusable Object-Oriented Software\n", "Summary: Guide to design patterns that can be used in any object-oriented language\n", - "Score: 0.5909667\n", - "\n", - "ID: gmsDIIkB6SgI-NN4Uquf\n", - "Publication date: 2012-06-27\n", - "Title: Introduction to the Theory of Computation\n", - "Summary: Introduction to the theory of computation and complexity theory\n", - "Score: 0.5843217\n", + "Score: 0.56175697\n", "\n", - "ID: gWsDIIkB6SgI-NN4Uquf\n", - "Publication date: 2008-05-15\n", - "Title: JavaScript: The Good Parts\n", - "Summary: A deep dive into the parts of JavaScript that are essential to writing maintainable code\n", - "Score: 0.5775348\n", + "ID: M-lWP4kB-GB5Evg6zHVx\n", + "Publication date: 2008-08-11\n", + "Title: Clean Code: A Handbook of Agile Software Craftsmanship\n", + "Summary: A guide to writing code that is easy to read, understand and maintain\n", + "Score: 0.55407417\n", "\n", - "ID: e2sDIIkB6SgI-NN4Uquf\n", + "ID: MulWP4kB-GB5Evg6zHVx\n", "Publication date: 2020-04-06\n", "Title: Artificial Intelligence: A Modern Approach\n", "Summary: Comprehensive introduction to the theory and practice of artificial intelligence\n", - "Score: 0.5705365\n", + "Score: 0.5461982\n", "\n", - "ID: emsDIIkB6SgI-NN4Uquf\n", + "ID: MelWP4kB-GB5Evg6zHVx\n", "Publication date: 2019-05-03\n", "Title: Python Crash Course\n", "Summary: A fast-paced, no-nonsense guide to programming in Python\n", - "Score: 0.55375147\n", - "\n", - "ID: fmsDIIkB6SgI-NN4Uquf\n", - "Publication date: 2018-12-04\n", - "Title: Eloquent JavaScript\n", - "Summary: A modern introduction to programming\n", - "Score: 0.531436\n", - "\n", - "ID: fWsDIIkB6SgI-NN4Uquf\n", - "Publication date: 2015-03-27\n", - "Title: You Don't Know JS: Up & Going\n", - "Summary: Introduction to JavaScript and programming as a whole\n", - "Score: 0.52609706\n" + "Score: 0.536102\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + ":1: DeprecationWarning: The 'body' parameter is deprecated and will be removed in a future version. Instead use individual parameters.\n", + " response = client.search(index=\"book_index\", body={\n" ] } ], @@ -476,7 +383,7 @@ "response = client.search(index=\"book_index\", body={\n", " \"knn\": {\n", " \"field\": \"title_vector\",\n", - " \"query_vector\": model.encode(\"Best Project management books?\"),\n", + " \"query_vector\": model.encode(\"Best javascript books?\"),\n", " \"k\": 10,\n", " \"num_candidates\": 100\n", " }\n", @@ -486,16 +393,18 @@ ] }, { - "cell_type": "code", - "execution_count": null, - "id": "pWYkmofeaSk8", + "cell_type": "markdown", + "source": [ + "## Next Steps\n", + "\n", + "In the next guide, we will learn more on how to apply metadata filtering and querying to your search\n", + "\n", + "[Metadata Filtering and Querying](https://)" + ], "metadata": { - "id": "pWYkmofeaSk8" + "id": "nPUBPjzNV0cE" }, - "outputs": [], - "source": [ - "client.indices.delete(index=\"book_index\")" - ] + "id": "nPUBPjzNV0cE" } ], "metadata": { From 9f329807d1ecef9c3f8f3b8f2bbe04201bf8c27c Mon Sep 17 00:00:00 2001 From: Joseph McElroy Date: Mon, 10 Jul 2023 07:34:11 -0400 Subject: [PATCH 49/54] updates to quick start --- .../search/00-quick-start.ipynb | 352 +++++++++++++++++- 1 file changed, 336 insertions(+), 16 deletions(-) diff --git a/colab-notebooks-examples/search/00-quick-start.ipynb b/colab-notebooks-examples/search/00-quick-start.ipynb index 534a8318..54a44777 100644 --- a/colab-notebooks-examples/search/00-quick-start.ipynb +++ b/colab-notebooks-examples/search/00-quick-start.ipynb @@ -101,12 +101,25 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 30, "id": "f38e0397", "metadata": { - "id": "f38e0397" + "id": "f38e0397", + "outputId": "ad6df489-d242-4229-a42a-39c5ca19d124", + "colab": { + "base_uri": "https://localhost:8080/" + } }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Elastic Cloud ID··········\n", + "Elastic Password··········\n" + ] + } + ], "source": [ "from elasticsearch import Elasticsearch\n", "from getpass import getpass\n", @@ -143,12 +156,24 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 31, "id": "25c618eb", "metadata": { - "id": "25c618eb" + "id": "25c618eb", + "outputId": "30a6ba5b-5109-4457-ddfe-5633a077ca9b", + "colab": { + "base_uri": "https://localhost:8080/" + } }, - "outputs": [], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "{'name': 'instance-0000000000', 'cluster_name': '1a56ad21587c44d3930932eb9fa1d8e8', 'cluster_uuid': 'gX4zlwtlR4qhZpp1SPm4Yg', 'version': {'number': '8.8.2', 'build_flavor': 'default', 'build_type': 'docker', 'build_hash': '98e1271edf932a480e4262a471281f1ee295ce6b', 'build_date': '2023-06-26T05:16:16.196344851Z', 'build_snapshot': False, 'lucene_version': '9.6.0', 'minimum_wire_compatibility_version': '7.17.0', 'minimum_index_compatibility_version': '7.0.0'}, 'tagline': 'You Know, for Search'}\n" + ] + } + ], "source": [ "print(client.info())" ] @@ -262,7 +287,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 40, "id": "f12ce2c9", "metadata": { "id": "f12ce2c9" @@ -276,7 +301,10 @@ " score = hit['_score']\n", " title = hit['_source']['title']\n", " summary = hit['_source']['summary']\n", - " pretty_output = (f\"\\nID: {id}\\nPublication date: {publication_date}\\nTitle: {title}\\nSummary: {summary}\\nScore: {score}\")\n", + " publisher = hit[\"_source\"][\"publisher\"]\n", + " num_reviews = hit[\"_source\"][\"num_reviews\"]\n", + " authors = hit[\"_source\"][\"authors\"]\n", + " pretty_output = (f\"\\nID: {id}\\nPublication date: {publication_date}\\nTitle: {title}\\nSummary: {summary}\\nPublisher: {publisher}\\nReviews: {num_reviews}\\nAuthors: {authors}\\nScore: {score}\")\n", " print(pretty_output)" ] }, @@ -294,14 +322,14 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 41, "id": "Df7hwcIjYwMT", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "Df7hwcIjYwMT", - "outputId": "e12d312f-812b-4c1f-8a0e-eadfc01e3321" + "outputId": "e63884d7-d4a5-4f5d-ea43-fc2f0793f040" }, "outputs": [ { @@ -313,60 +341,90 @@ "Publication date: 2008-05-15\n", "Title: JavaScript: The Good Parts\n", "Summary: A deep dive into the parts of JavaScript that are essential to writing maintainable code\n", + "Publisher: oreilly\n", + "Reviews: 51\n", + "Authors: ['douglas crockford']\n", "Score: 0.8075247\n", "\n", "ID: NOlWP4kB-GB5Evg6zHVx\n", "Publication date: 2015-03-27\n", "Title: You Don't Know JS: Up & Going\n", "Summary: Introduction to JavaScript and programming as a whole\n", + "Publisher: oreilly\n", + "Reviews: 36\n", + "Authors: ['kyle simpson']\n", "Score: 0.6946182\n", "\n", "ID: NelWP4kB-GB5Evg6zHVx\n", "Publication date: 2018-12-04\n", "Title: Eloquent JavaScript\n", "Summary: A modern introduction to programming\n", + "Publisher: no starch press\n", + "Reviews: 38\n", + "Authors: ['marijn haverbeke']\n", "Score: 0.66179085\n", "\n", "ID: MOlWP4kB-GB5Evg6zHVx\n", "Publication date: 2019-10-29\n", "Title: The Pragmatic Programmer: Your Journey to Mastery\n", "Summary: A guide to pragmatic programming for software engineers and developers\n", + "Publisher: addison-wesley\n", + "Reviews: 30\n", + "Authors: ['andrew hunt', 'david thomas']\n", "Score: 0.61159486\n", "\n", "ID: OelWP4kB-GB5Evg6zHVx\n", "Publication date: 2012-06-27\n", "Title: Introduction to the Theory of Computation\n", "Summary: Introduction to the theory of computation and complexity theory\n", + "Publisher: cengage learning\n", + "Reviews: 33\n", + "Authors: ['michael sipser']\n", "Score: 0.58697784\n", "\n", "ID: N-lWP4kB-GB5Evg6zHVx\n", "Publication date: 2011-05-13\n", "Title: The Clean Coder: A Code of Conduct for Professional Programmers\n", "Summary: A guide to professional conduct in the field of software engineering\n", + "Publisher: prentice hall\n", + "Reviews: 20\n", + "Authors: ['robert c. martin']\n", "Score: 0.57042736\n", "\n", "ID: NulWP4kB-GB5Evg6zHVx\n", "Publication date: 1994-10-31\n", "Title: Design Patterns: Elements of Reusable Object-Oriented Software\n", "Summary: Guide to design patterns that can be used in any object-oriented language\n", + "Publisher: addison-wesley\n", + "Reviews: 45\n", + "Authors: ['erich gamma', 'richard helm', 'ralph johnson', 'john vlissides']\n", "Score: 0.56175697\n", "\n", "ID: M-lWP4kB-GB5Evg6zHVx\n", "Publication date: 2008-08-11\n", "Title: Clean Code: A Handbook of Agile Software Craftsmanship\n", "Summary: A guide to writing code that is easy to read, understand and maintain\n", + "Publisher: prentice hall\n", + "Reviews: 55\n", + "Authors: ['robert c. martin']\n", "Score: 0.55407417\n", "\n", "ID: MulWP4kB-GB5Evg6zHVx\n", "Publication date: 2020-04-06\n", "Title: Artificial Intelligence: A Modern Approach\n", "Summary: Comprehensive introduction to the theory and practice of artificial intelligence\n", + "Publisher: pearson\n", + "Reviews: 39\n", + "Authors: ['stuart russell', 'peter norvig']\n", "Score: 0.5461982\n", "\n", "ID: MelWP4kB-GB5Evg6zHVx\n", "Publication date: 2019-05-03\n", "Title: Python Crash Course\n", "Summary: A fast-paced, no-nonsense guide to programming in Python\n", + "Publisher: no starch press\n", + "Reviews: 42\n", + "Authors: ['eric matthes']\n", "Score: 0.536102\n" ] }, @@ -374,7 +432,7 @@ "output_type": "stream", "name": "stderr", "text": [ - ":1: DeprecationWarning: The 'body' parameter is deprecated and will be removed in a future version. Instead use individual parameters.\n", + ":1: DeprecationWarning: The 'body' parameter is deprecated and will be removed in a future version. Instead use individual parameters.\n", " response = client.search(index=\"book_index\", body={\n" ] } @@ -395,16 +453,278 @@ { "cell_type": "markdown", "source": [ - "## Next Steps\n", + "## Filtering\n", + "\n", + "Filter context is mostly used for filtering structured data, for example:\n", + "\n", + "Does this timestamp fall into the range 2015 to 2016?\n", + "Is the status field set to \"published\"?\n", + "Filter context is in effect whenever a query clause is passed to a filter parameter, such as the filter or must_not parameters in the bool query.\n", + "\n", + "[Read more](https://)" + ], + "metadata": { + "id": "LdJCpbQMeml5" + }, + "id": "LdJCpbQMeml5" + }, + { + "cell_type": "markdown", + "source": [ + "### Example: Keyword Filtering\n", "\n", - "In the next guide, we will learn more on how to apply metadata filtering and querying to your search\n", + "This is an example of adding a keyword filter to the query.\n", + "\n", + "It narrows down the results by including only documents where the \"publisher\" field is equal to \"addison-wesley\".\n", + "\n", + "Overall, the code retrieves the top books that are similar to \"Best javascript books?\" based on their title vectors and have \"addison-wesley\" as the publisher." + ], + "metadata": { + "id": "dRSrPMyFf7w7" + }, + "id": "dRSrPMyFf7w7" + }, + { + "cell_type": "code", + "source": [ + "response = client.search(index=\"book_index\", body={\n", + " \"knn\": {\n", + " \"field\": \"title_vector\",\n", + " \"query_vector\": model.encode(\"Best javascript books?\"),\n", + " \"k\": 10,\n", + " \"num_candidates\": 100,\n", + " \"filter\": {\n", + " \"term\": {\n", + " \"publisher\": \"addison-wesley\"\n", + " }\n", + " }\n", + " }\n", + "})\n", + "\n", + "pretty_response(response)" + ], + "metadata": { + "id": "WoE0yTchfj3A" + }, + "id": "WoE0yTchfj3A", + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "### Example: Optional Filtering\n", + "\n", + "TODO: Help the developer understand more about the different types of filtering they can do.\n", + "\n", + "Provide a link to show more advanced use cases of filtering on date-range, geo-location etc." + ], + "metadata": { + "id": "YY2SrWDtgnF3" + }, + "id": "YY2SrWDtgnF3" + }, + { + "cell_type": "code", + "source": [ + "response = client.search(index=\"book_index\", body={\n", + " \"knn\": {\n", + " \"field\": \"title_vector\",\n", + " \"query_vector\": model.encode(\"Best javascript books?\"),\n", + " \"k\": 10,\n", + " \"num_candidates\": 100,\n", + " \"filter\": {\n", + " \"bool\": {\n", + " \"should\": [\n", + " {\n", + " \"term\": {\n", + " \"publisher\": \"addison-wesley\"\n", + " }\n", + " },\n", + " {\n", + " \"term\": {\n", + " \"authors\": \"robert c. martin\"\n", + " }\n", + " }\n", + " ],\n", + "\n", + " }\n", + " }\n", + " }\n", + "})\n", + "\n", + "pretty_response(response)" + ], + "metadata": { + "id": "fcDfiJC9g6AX", + "outputId": "0909515a-5b94-4863-94c6-e67015baeadf", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "id": "fcDfiJC9g6AX", + "execution_count": 43, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n", + "ID: MOlWP4kB-GB5Evg6zHVx\n", + "Publication date: 2019-10-29\n", + "Title: The Pragmatic Programmer: Your Journey to Mastery\n", + "Summary: A guide to pragmatic programming for software engineers and developers\n", + "Publisher: addison-wesley\n", + "Reviews: 30\n", + "Authors: ['andrew hunt', 'david thomas']\n", + "Score: 0.61159486\n", + "\n", + "ID: N-lWP4kB-GB5Evg6zHVx\n", + "Publication date: 2011-05-13\n", + "Title: The Clean Coder: A Code of Conduct for Professional Programmers\n", + "Summary: A guide to professional conduct in the field of software engineering\n", + "Publisher: prentice hall\n", + "Reviews: 20\n", + "Authors: ['robert c. martin']\n", + "Score: 0.57042736\n", + "\n", + "ID: NulWP4kB-GB5Evg6zHVx\n", + "Publication date: 1994-10-31\n", + "Title: Design Patterns: Elements of Reusable Object-Oriented Software\n", + "Summary: Guide to design patterns that can be used in any object-oriented language\n", + "Publisher: addison-wesley\n", + "Reviews: 45\n", + "Authors: ['erich gamma', 'richard helm', 'ralph johnson', 'john vlissides']\n", + "Score: 0.56175697\n", + "\n", + "ID: M-lWP4kB-GB5Evg6zHVx\n", + "Publication date: 2008-08-11\n", + "Title: Clean Code: A Handbook of Agile Software Craftsmanship\n", + "Summary: A guide to writing code that is easy to read, understand and maintain\n", + "Publisher: prentice hall\n", + "Reviews: 55\n", + "Authors: ['robert c. martin']\n", + "Score: 0.55407417\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + ":1: DeprecationWarning: The 'body' parameter is deprecated and will be removed in a future version. Instead use individual parameters.\n", + " response = client.search(index=\"book_index\", body={\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "## Hybrid Search\n", + "\n" + ], + "metadata": { + "id": "IUMOK8h-iYrq" + }, + "id": "IUMOK8h-iYrq" + }, + { + "cell_type": "code", + "source": [ + "response = client.search(index=\"book_index\", body={\n", + " \"size\": 5,\n", + " \"query\": {\n", + " \"match\": {\n", + " \"summary\": \"python\"\n", + " }\n", + " },\n", + " \"knn\": {\n", + " \"field\": \"title_vector\",\n", + " # generate embedding for query so it can be compared to `title_vector`\n", + " \"query_vector\" : model.encode(\"python programming\").tolist(),\n", + " \"k\": 5,\n", + " \"num_candidates\": 10\n", + " },\n", + " \"rank\": {\n", + " \"rrf\": {\n", + " \"window_size\": 5,\n", + " \"rank_constant\": 20\n", + " }\n", + " }\n", + "})\n", "\n", - "[Metadata Filtering and Querying](https://)" + "pretty_response(response)" ], "metadata": { - "id": "nPUBPjzNV0cE" + "id": "1BwZ-yjli7xA", + "outputId": "dd2cc266-acd3-4cef-f7e7-3866a486aa04", + "colab": { + "base_uri": "https://localhost:8080/" + } }, - "id": "nPUBPjzNV0cE" + "id": "1BwZ-yjli7xA", + "execution_count": 48, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n", + "ID: MelWP4kB-GB5Evg6zHVx\n", + "Publication date: 2019-05-03\n", + "Title: Python Crash Course\n", + "Summary: A fast-paced, no-nonsense guide to programming in Python\n", + "Publisher: no starch press\n", + "Reviews: 42\n", + "Authors: ['eric matthes']\n", + "Score: None\n", + "\n", + "ID: MOlWP4kB-GB5Evg6zHVx\n", + "Publication date: 2019-10-29\n", + "Title: The Pragmatic Programmer: Your Journey to Mastery\n", + "Summary: A guide to pragmatic programming for software engineers and developers\n", + "Publisher: addison-wesley\n", + "Reviews: 30\n", + "Authors: ['andrew hunt', 'david thomas']\n", + "Score: None\n", + "\n", + "ID: OelWP4kB-GB5Evg6zHVx\n", + "Publication date: 2012-06-27\n", + "Title: Introduction to the Theory of Computation\n", + "Summary: Introduction to the theory of computation and complexity theory\n", + "Publisher: cengage learning\n", + "Reviews: 33\n", + "Authors: ['michael sipser']\n", + "Score: None\n", + "\n", + "ID: N-lWP4kB-GB5Evg6zHVx\n", + "Publication date: 2011-05-13\n", + "Title: The Clean Coder: A Code of Conduct for Professional Programmers\n", + "Summary: A guide to professional conduct in the field of software engineering\n", + "Publisher: prentice hall\n", + "Reviews: 20\n", + "Authors: ['robert c. martin']\n", + "Score: None\n", + "\n", + "ID: MulWP4kB-GB5Evg6zHVx\n", + "Publication date: 2020-04-06\n", + "Title: Artificial Intelligence: A Modern Approach\n", + "Summary: Comprehensive introduction to the theory and practice of artificial intelligence\n", + "Publisher: pearson\n", + "Reviews: 39\n", + "Authors: ['stuart russell', 'peter norvig']\n", + "Score: None\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + ":1: DeprecationWarning: The 'body' parameter is deprecated and will be removed in a future version. Instead use individual parameters.\n", + " response = client.search(index=\"book_index\", body={\n" + ] + } + ] } ], "metadata": { From acc20ac5299339f658906901708f387fbc65a329 Mon Sep 17 00:00:00 2001 From: Joseph McElroy Date: Mon, 10 Jul 2023 08:09:02 -0400 Subject: [PATCH 50/54] updates --- .../search/00-quick-start.ipynb | 32 ++++++++++--------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/colab-notebooks-examples/search/00-quick-start.ipynb b/colab-notebooks-examples/search/00-quick-start.ipynb index 54a44777..e5079be2 100644 --- a/colab-notebooks-examples/search/00-quick-start.ipynb +++ b/colab-notebooks-examples/search/00-quick-start.ipynb @@ -105,10 +105,10 @@ "id": "f38e0397", "metadata": { "id": "f38e0397", - "outputId": "ad6df489-d242-4229-a42a-39c5ca19d124", "colab": { "base_uri": "https://localhost:8080/" - } + }, + "outputId": "ad6df489-d242-4229-a42a-39c5ca19d124" }, "outputs": [ { @@ -160,10 +160,10 @@ "id": "25c618eb", "metadata": { "id": "25c618eb", - "outputId": "30a6ba5b-5109-4457-ddfe-5633a077ca9b", "colab": { "base_uri": "https://localhost:8080/" - } + }, + "outputId": "30a6ba5b-5109-4457-ddfe-5633a077ca9b" }, "outputs": [ { @@ -556,11 +556,11 @@ "pretty_response(response)" ], "metadata": { - "id": "fcDfiJC9g6AX", - "outputId": "0909515a-5b94-4863-94c6-e67015baeadf", "colab": { "base_uri": "https://localhost:8080/" - } + }, + "id": "fcDfiJC9g6AX", + "outputId": "0909515a-5b94-4863-94c6-e67015baeadf" }, "id": "fcDfiJC9g6AX", "execution_count": 43, @@ -621,7 +621,10 @@ "cell_type": "markdown", "source": [ "## Hybrid Search\n", - "\n" + "\n", + "In this example, we are investigating the combination of two search algorithms: BM25 for text search and HNSW for nearest neighbor search. By combining multiple ranking methods, such as BM25 and an ML model that generates dense vector embeddings, we can achieve the best ranking results. This approach allows us to leverage the strengths of each algorithm and improve the overall search performance.\n", + "\n", + "TODO: Explain why we use RRF here\n" ], "metadata": { "id": "IUMOK8h-iYrq" @@ -632,7 +635,6 @@ "cell_type": "code", "source": [ "response = client.search(index=\"book_index\", body={\n", - " \"size\": 5,\n", " \"query\": {\n", " \"match\": {\n", " \"summary\": \"python\"\n", @@ -647,7 +649,7 @@ " },\n", " \"rank\": {\n", " \"rrf\": {\n", - " \"window_size\": 5,\n", + " \"window_size\": 100,\n", " \"rank_constant\": 20\n", " }\n", " }\n", @@ -656,14 +658,14 @@ "pretty_response(response)" ], "metadata": { - "id": "1BwZ-yjli7xA", - "outputId": "dd2cc266-acd3-4cef-f7e7-3866a486aa04", "colab": { "base_uri": "https://localhost:8080/" - } + }, + "id": "1BwZ-yjli7xA", + "outputId": "26eea86c-5cda-42d0-ba1e-2904e2b7865a" }, "id": "1BwZ-yjli7xA", - "execution_count": 48, + "execution_count": 51, "outputs": [ { "output_type": "stream", @@ -720,7 +722,7 @@ "output_type": "stream", "name": "stderr", "text": [ - ":1: DeprecationWarning: The 'body' parameter is deprecated and will be removed in a future version. Instead use individual parameters.\n", + ":1: DeprecationWarning: The 'body' parameter is deprecated and will be removed in a future version. Instead use individual parameters.\n", " response = client.search(index=\"book_index\", body={\n" ] } From 0779515c905ed2a04d0fc282449074d490311c00 Mon Sep 17 00:00:00 2001 From: Joseph McElroy Date: Mon, 10 Jul 2023 08:12:26 -0400 Subject: [PATCH 51/54] fix link --- .../search/00-quick-start.ipynb | 29 ++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/colab-notebooks-examples/search/00-quick-start.ipynb b/colab-notebooks-examples/search/00-quick-start.ipynb index e5079be2..764f5fbb 100644 --- a/colab-notebooks-examples/search/00-quick-start.ipynb +++ b/colab-notebooks-examples/search/00-quick-start.ipynb @@ -9,7 +9,7 @@ "source": [ "# Elasticsearch Quick Start\n", "\n", - "\"Open\n", + "\"Open\n", "\n", "This interactive notebook will introduce you to the very basics of getting started with simple Elasticsearch queries, using the official [Elasticsearch Python client](https://www.elastic.co/guide/en/elasticsearch/client/python-api/current/connecting.html).\n", "We'll run through getting the client up and running, indexing a small data set into Elasticsearch, and performing basic searches against your data." @@ -727,6 +727,33 @@ ] } ] + }, + { + "cell_type": "code", + "source": [ + "client.indices.delete(index=\"book_index\")" + ], + "metadata": { + "id": "_OAahfg-tqrf", + "outputId": "d8f81ba4-cdc9-4e30-edf7-6d5bb16920eb", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "id": "_OAahfg-tqrf", + "execution_count": 52, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "ObjectApiResponse({'acknowledged': True})" + ] + }, + "metadata": {}, + "execution_count": 52 + } + ] } ], "metadata": { From da7c5dc32c5ee5db507ca7e5ee01a310e970b65d Mon Sep 17 00:00:00 2001 From: Joseph McElroy Date: Mon, 10 Jul 2023 08:20:20 -0400 Subject: [PATCH 52/54] updates --- .../search/00-quick-start.ipynb | 209 +++++++++--------- 1 file changed, 106 insertions(+), 103 deletions(-) diff --git a/colab-notebooks-examples/search/00-quick-start.ipynb b/colab-notebooks-examples/search/00-quick-start.ipynb index 764f5fbb..1e7a4c2a 100644 --- a/colab-notebooks-examples/search/00-quick-start.ipynb +++ b/colab-notebooks-examples/search/00-quick-start.ipynb @@ -47,7 +47,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 53, "id": "ffc5fa6f", "metadata": { "id": "ffc5fa6f", @@ -101,14 +101,14 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 55, "id": "f38e0397", "metadata": { "id": "f38e0397", "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "ad6df489-d242-4229-a42a-39c5ca19d124" + "outputId": "2c98adc6-92f1-4ddd-a471-83617c9d6562" }, "outputs": [ { @@ -156,24 +156,12 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": null, "id": "25c618eb", "metadata": { - "id": "25c618eb", - "colab": { - "base_uri": "https://localhost:8080/" - }, - "outputId": "30a6ba5b-5109-4457-ddfe-5633a077ca9b" + "id": "25c618eb" }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "{'name': 'instance-0000000000', 'cluster_name': '1a56ad21587c44d3930932eb9fa1d8e8', 'cluster_uuid': 'gX4zlwtlR4qhZpp1SPm4Yg', 'version': {'number': '8.8.2', 'build_flavor': 'default', 'build_type': 'docker', 'build_hash': '98e1271edf932a480e4262a471281f1ee295ce6b', 'build_date': '2023-06-26T05:16:16.196344851Z', 'build_snapshot': False, 'lucene_version': '9.6.0', 'minimum_wire_compatibility_version': '7.17.0', 'minimum_index_compatibility_version': '7.0.0'}, 'tagline': 'You Know, for Search'}\n" - ] - } - ], + "outputs": [], "source": [ "print(client.info())" ] @@ -272,42 +260,6 @@ "client.bulk(index=\"book_index\", operations=actions)\n" ] }, - { - "cell_type": "markdown", - "id": "cd8b03e0", - "metadata": { - "id": "cd8b03e0" - }, - "source": [ - "## Aside: Pretty printing Elasticsearch responses\n", - "\n", - "Your API calls will return hard-to-read nested JSON.\n", - "We'll create a little function called `pretty_response` to return nice, human-readable outputs from our examples." - ] - }, - { - "cell_type": "code", - "execution_count": 40, - "id": "f12ce2c9", - "metadata": { - "id": "f12ce2c9" - }, - "outputs": [], - "source": [ - "def pretty_response(response):\n", - " for hit in response['hits']['hits']:\n", - " id = hit['_id']\n", - " publication_date = hit['_source']['publish_date']\n", - " score = hit['_score']\n", - " title = hit['_source']['title']\n", - " summary = hit['_source']['summary']\n", - " publisher = hit[\"_source\"][\"publisher\"]\n", - " num_reviews = hit[\"_source\"][\"num_reviews\"]\n", - " authors = hit[\"_source\"][\"authors\"]\n", - " pretty_output = (f\"\\nID: {id}\\nPublication date: {publication_date}\\nTitle: {title}\\nSummary: {summary}\\nPublisher: {publisher}\\nReviews: {num_reviews}\\nAuthors: {authors}\\nScore: {score}\")\n", - " print(pretty_output)" - ] - }, { "cell_type": "markdown", "id": "39bdefe0", @@ -322,14 +274,14 @@ }, { "cell_type": "code", - "execution_count": 41, + "execution_count": 60, "id": "Df7hwcIjYwMT", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "Df7hwcIjYwMT", - "outputId": "e63884d7-d4a5-4f5d-ea43-fc2f0793f040" + "outputId": "a82a791f-362c-4ae4-abbd-b03306b9d711" }, "outputs": [ { @@ -337,7 +289,7 @@ "name": "stdout", "text": [ "\n", - "ID: OOlWP4kB-GB5Evg6zHVx\n", + "ID: S-m5P4kB-GB5Evg6PXVG\n", "Publication date: 2008-05-15\n", "Title: JavaScript: The Good Parts\n", "Summary: A deep dive into the parts of JavaScript that are essential to writing maintainable code\n", @@ -346,7 +298,7 @@ "Authors: ['douglas crockford']\n", "Score: 0.8075247\n", "\n", - "ID: NOlWP4kB-GB5Evg6zHVx\n", + "ID: R-m5P4kB-GB5Evg6PXVG\n", "Publication date: 2015-03-27\n", "Title: You Don't Know JS: Up & Going\n", "Summary: Introduction to JavaScript and programming as a whole\n", @@ -355,7 +307,7 @@ "Authors: ['kyle simpson']\n", "Score: 0.6946182\n", "\n", - "ID: NelWP4kB-GB5Evg6zHVx\n", + "ID: SOm5P4kB-GB5Evg6PXVG\n", "Publication date: 2018-12-04\n", "Title: Eloquent JavaScript\n", "Summary: A modern introduction to programming\n", @@ -364,7 +316,7 @@ "Authors: ['marijn haverbeke']\n", "Score: 0.66179085\n", "\n", - "ID: MOlWP4kB-GB5Evg6zHVx\n", + "ID: Q-m5P4kB-GB5Evg6PXVG\n", "Publication date: 2019-10-29\n", "Title: The Pragmatic Programmer: Your Journey to Mastery\n", "Summary: A guide to pragmatic programming for software engineers and developers\n", @@ -373,7 +325,7 @@ "Authors: ['andrew hunt', 'david thomas']\n", "Score: 0.61159486\n", "\n", - "ID: OelWP4kB-GB5Evg6zHVx\n", + "ID: TOm5P4kB-GB5Evg6PXVG\n", "Publication date: 2012-06-27\n", "Title: Introduction to the Theory of Computation\n", "Summary: Introduction to the theory of computation and complexity theory\n", @@ -382,7 +334,7 @@ "Authors: ['michael sipser']\n", "Score: 0.58697784\n", "\n", - "ID: N-lWP4kB-GB5Evg6zHVx\n", + "ID: Sum5P4kB-GB5Evg6PXVG\n", "Publication date: 2011-05-13\n", "Title: The Clean Coder: A Code of Conduct for Professional Programmers\n", "Summary: A guide to professional conduct in the field of software engineering\n", @@ -391,7 +343,7 @@ "Authors: ['robert c. martin']\n", "Score: 0.57042736\n", "\n", - "ID: NulWP4kB-GB5Evg6zHVx\n", + "ID: Sem5P4kB-GB5Evg6PXVG\n", "Publication date: 1994-10-31\n", "Title: Design Patterns: Elements of Reusable Object-Oriented Software\n", "Summary: Guide to design patterns that can be used in any object-oriented language\n", @@ -400,7 +352,7 @@ "Authors: ['erich gamma', 'richard helm', 'ralph johnson', 'john vlissides']\n", "Score: 0.56175697\n", "\n", - "ID: M-lWP4kB-GB5Evg6zHVx\n", + "ID: Rum5P4kB-GB5Evg6PXVG\n", "Publication date: 2008-08-11\n", "Title: Clean Code: A Handbook of Agile Software Craftsmanship\n", "Summary: A guide to writing code that is easy to read, understand and maintain\n", @@ -409,7 +361,7 @@ "Authors: ['robert c. martin']\n", "Score: 0.55407417\n", "\n", - "ID: MulWP4kB-GB5Evg6zHVx\n", + "ID: Rem5P4kB-GB5Evg6PXVG\n", "Publication date: 2020-04-06\n", "Title: Artificial Intelligence: A Modern Approach\n", "Summary: Comprehensive introduction to the theory and practice of artificial intelligence\n", @@ -418,7 +370,7 @@ "Authors: ['stuart russell', 'peter norvig']\n", "Score: 0.5461982\n", "\n", - "ID: MelWP4kB-GB5Evg6zHVx\n", + "ID: ROm5P4kB-GB5Evg6PXVG\n", "Publication date: 2019-05-03\n", "Title: Python Crash Course\n", "Summary: A fast-paced, no-nonsense guide to programming in Python\n", @@ -432,7 +384,7 @@ "output_type": "stream", "name": "stderr", "text": [ - ":1: DeprecationWarning: The 'body' parameter is deprecated and will be removed in a future version. Instead use individual parameters.\n", + ":1: DeprecationWarning: The 'body' parameter is deprecated and will be removed in a future version. Instead use individual parameters.\n", " response = client.search(index=\"book_index\", body={\n" ] } @@ -447,6 +399,20 @@ " }\n", "})\n", "\n", + "# a helper function to return nice human-readable outputs from our following examples\n", + "def pretty_response(response):\n", + " for hit in response['hits']['hits']:\n", + " id = hit['_id']\n", + " publication_date = hit['_source']['publish_date']\n", + " score = hit['_score']\n", + " title = hit['_source']['title']\n", + " summary = hit['_source']['summary']\n", + " publisher = hit[\"_source\"][\"publisher\"]\n", + " num_reviews = hit[\"_source\"][\"num_reviews\"]\n", + " authors = hit[\"_source\"][\"authors\"]\n", + " pretty_output = (f\"\\nID: {id}\\nPublication date: {publication_date}\\nTitle: {title}\\nSummary: {summary}\\nPublisher: {publisher}\\nReviews: {num_reviews}\\nAuthors: {authors}\\nScore: {score}\")\n", + " print(pretty_output)\n", + "\n", "pretty_response(response)" ] }, @@ -504,11 +470,48 @@ "pretty_response(response)" ], "metadata": { - "id": "WoE0yTchfj3A" + "id": "WoE0yTchfj3A", + "outputId": "4c1d1b6a-a24c-48c5-cc7e-9d16c6cdecda", + "colab": { + "base_uri": "https://localhost:8080/" + } }, "id": "WoE0yTchfj3A", - "execution_count": null, - "outputs": [] + "execution_count": 61, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n", + "ID: Q-m5P4kB-GB5Evg6PXVG\n", + "Publication date: 2019-10-29\n", + "Title: The Pragmatic Programmer: Your Journey to Mastery\n", + "Summary: A guide to pragmatic programming for software engineers and developers\n", + "Publisher: addison-wesley\n", + "Reviews: 30\n", + "Authors: ['andrew hunt', 'david thomas']\n", + "Score: 0.61159486\n", + "\n", + "ID: Sem5P4kB-GB5Evg6PXVG\n", + "Publication date: 1994-10-31\n", + "Title: Design Patterns: Elements of Reusable Object-Oriented Software\n", + "Summary: Guide to design patterns that can be used in any object-oriented language\n", + "Publisher: addison-wesley\n", + "Reviews: 45\n", + "Authors: ['erich gamma', 'richard helm', 'ralph johnson', 'john vlissides']\n", + "Score: 0.56175697\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + ":1: DeprecationWarning: The 'body' parameter is deprecated and will be removed in a future version. Instead use individual parameters.\n", + " response = client.search(index=\"book_index\", body={\n" + ] + } + ] }, { "cell_type": "markdown", @@ -560,17 +563,17 @@ "base_uri": "https://localhost:8080/" }, "id": "fcDfiJC9g6AX", - "outputId": "0909515a-5b94-4863-94c6-e67015baeadf" + "outputId": "ea4120a5-9ef3-4387-9fa4-09c3e2e25440" }, "id": "fcDfiJC9g6AX", - "execution_count": 43, + "execution_count": 62, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "\n", - "ID: MOlWP4kB-GB5Evg6zHVx\n", + "ID: Q-m5P4kB-GB5Evg6PXVG\n", "Publication date: 2019-10-29\n", "Title: The Pragmatic Programmer: Your Journey to Mastery\n", "Summary: A guide to pragmatic programming for software engineers and developers\n", @@ -579,7 +582,7 @@ "Authors: ['andrew hunt', 'david thomas']\n", "Score: 0.61159486\n", "\n", - "ID: N-lWP4kB-GB5Evg6zHVx\n", + "ID: Sum5P4kB-GB5Evg6PXVG\n", "Publication date: 2011-05-13\n", "Title: The Clean Coder: A Code of Conduct for Professional Programmers\n", "Summary: A guide to professional conduct in the field of software engineering\n", @@ -588,7 +591,7 @@ "Authors: ['robert c. martin']\n", "Score: 0.57042736\n", "\n", - "ID: NulWP4kB-GB5Evg6zHVx\n", + "ID: Sem5P4kB-GB5Evg6PXVG\n", "Publication date: 1994-10-31\n", "Title: Design Patterns: Elements of Reusable Object-Oriented Software\n", "Summary: Guide to design patterns that can be used in any object-oriented language\n", @@ -597,7 +600,7 @@ "Authors: ['erich gamma', 'richard helm', 'ralph johnson', 'john vlissides']\n", "Score: 0.56175697\n", "\n", - "ID: M-lWP4kB-GB5Evg6zHVx\n", + "ID: Rum5P4kB-GB5Evg6PXVG\n", "Publication date: 2008-08-11\n", "Title: Clean Code: A Handbook of Agile Software Craftsmanship\n", "Summary: A guide to writing code that is easy to read, understand and maintain\n", @@ -611,7 +614,7 @@ "output_type": "stream", "name": "stderr", "text": [ - ":1: DeprecationWarning: The 'body' parameter is deprecated and will be removed in a future version. Instead use individual parameters.\n", + ":1: DeprecationWarning: The 'body' parameter is deprecated and will be removed in a future version. Instead use individual parameters.\n", " response = client.search(index=\"book_index\", body={\n" ] } @@ -662,17 +665,17 @@ "base_uri": "https://localhost:8080/" }, "id": "1BwZ-yjli7xA", - "outputId": "26eea86c-5cda-42d0-ba1e-2904e2b7865a" + "outputId": "74c69581-00a1-4b7b-fa84-9a3f96ae7a9b" }, "id": "1BwZ-yjli7xA", - "execution_count": 51, + "execution_count": 63, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "\n", - "ID: MelWP4kB-GB5Evg6zHVx\n", + "ID: ROm5P4kB-GB5Evg6PXVG\n", "Publication date: 2019-05-03\n", "Title: Python Crash Course\n", "Summary: A fast-paced, no-nonsense guide to programming in Python\n", @@ -681,7 +684,7 @@ "Authors: ['eric matthes']\n", "Score: None\n", "\n", - "ID: MOlWP4kB-GB5Evg6zHVx\n", + "ID: Q-m5P4kB-GB5Evg6PXVG\n", "Publication date: 2019-10-29\n", "Title: The Pragmatic Programmer: Your Journey to Mastery\n", "Summary: A guide to pragmatic programming for software engineers and developers\n", @@ -690,7 +693,7 @@ "Authors: ['andrew hunt', 'david thomas']\n", "Score: None\n", "\n", - "ID: OelWP4kB-GB5Evg6zHVx\n", + "ID: TOm5P4kB-GB5Evg6PXVG\n", "Publication date: 2012-06-27\n", "Title: Introduction to the Theory of Computation\n", "Summary: Introduction to the theory of computation and complexity theory\n", @@ -699,7 +702,7 @@ "Authors: ['michael sipser']\n", "Score: None\n", "\n", - "ID: N-lWP4kB-GB5Evg6zHVx\n", + "ID: Sum5P4kB-GB5Evg6PXVG\n", "Publication date: 2011-05-13\n", "Title: The Clean Coder: A Code of Conduct for Professional Programmers\n", "Summary: A guide to professional conduct in the field of software engineering\n", @@ -708,7 +711,7 @@ "Authors: ['robert c. martin']\n", "Score: None\n", "\n", - "ID: MulWP4kB-GB5Evg6zHVx\n", + "ID: Rem5P4kB-GB5Evg6PXVG\n", "Publication date: 2020-04-06\n", "Title: Artificial Intelligence: A Modern Approach\n", "Summary: Comprehensive introduction to the theory and practice of artificial intelligence\n", @@ -722,38 +725,38 @@ "output_type": "stream", "name": "stderr", "text": [ - ":1: DeprecationWarning: The 'body' parameter is deprecated and will be removed in a future version. Instead use individual parameters.\n", + ":1: DeprecationWarning: The 'body' parameter is deprecated and will be removed in a future version. Instead use individual parameters.\n", " response = client.search(index=\"book_index\", body={\n" ] } ] }, + { + "cell_type": "markdown", + "source": [ + "## Next Steps\n", + "\n", + "Now that you have learned the basics of Elasticsearch's query features, you can further explore additional features such as:\n", + "\n", + "- [ELSER](fff)\n", + "- [Faceting with aggregations](fff)\n" + ], + "metadata": { + "id": "xGSMSICZu67I" + }, + "id": "xGSMSICZu67I" + }, { "cell_type": "code", "source": [ "client.indices.delete(index=\"book_index\")" ], "metadata": { - "id": "_OAahfg-tqrf", - "outputId": "d8f81ba4-cdc9-4e30-edf7-6d5bb16920eb", - "colab": { - "base_uri": "https://localhost:8080/" - } + "id": "_OAahfg-tqrf" }, "id": "_OAahfg-tqrf", - "execution_count": 52, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "ObjectApiResponse({'acknowledged': True})" - ] - }, - "metadata": {}, - "execution_count": 52 - } - ] + "execution_count": null, + "outputs": [] } ], "metadata": { From 0d96e741f43ba201d339587116dd1fdfc8626a05 Mon Sep 17 00:00:00 2001 From: Joseph McElroy Date: Mon, 10 Jul 2023 08:23:52 -0400 Subject: [PATCH 53/54] updates --- .../search/00-quick-start.ipynb | 211 +++++++++--------- 1 file changed, 104 insertions(+), 107 deletions(-) diff --git a/colab-notebooks-examples/search/00-quick-start.ipynb b/colab-notebooks-examples/search/00-quick-start.ipynb index 1e7a4c2a..daccb2fe 100644 --- a/colab-notebooks-examples/search/00-quick-start.ipynb +++ b/colab-notebooks-examples/search/00-quick-start.ipynb @@ -47,7 +47,7 @@ }, { "cell_type": "code", - "execution_count": 53, + "execution_count": null, "id": "ffc5fa6f", "metadata": { "id": "ffc5fa6f", @@ -101,14 +101,14 @@ }, { "cell_type": "code", - "execution_count": 55, + "execution_count": 30, "id": "f38e0397", "metadata": { "id": "f38e0397", "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "2c98adc6-92f1-4ddd-a471-83617c9d6562" + "outputId": "ad6df489-d242-4229-a42a-39c5ca19d124" }, "outputs": [ { @@ -156,12 +156,24 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 31, "id": "25c618eb", "metadata": { - "id": "25c618eb" + "id": "25c618eb", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "30a6ba5b-5109-4457-ddfe-5633a077ca9b" }, - "outputs": [], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "{'name': 'instance-0000000000', 'cluster_name': '1a56ad21587c44d3930932eb9fa1d8e8', 'cluster_uuid': 'gX4zlwtlR4qhZpp1SPm4Yg', 'version': {'number': '8.8.2', 'build_flavor': 'default', 'build_type': 'docker', 'build_hash': '98e1271edf932a480e4262a471281f1ee295ce6b', 'build_date': '2023-06-26T05:16:16.196344851Z', 'build_snapshot': False, 'lucene_version': '9.6.0', 'minimum_wire_compatibility_version': '7.17.0', 'minimum_index_compatibility_version': '7.0.0'}, 'tagline': 'You Know, for Search'}\n" + ] + } + ], "source": [ "print(client.info())" ] @@ -260,6 +272,42 @@ "client.bulk(index=\"book_index\", operations=actions)\n" ] }, + { + "cell_type": "markdown", + "id": "cd8b03e0", + "metadata": { + "id": "cd8b03e0" + }, + "source": [ + "## Aside: Pretty printing Elasticsearch responses\n", + "\n", + "Your API calls will return hard-to-read nested JSON.\n", + "We'll create a little function called `pretty_response` to return nice, human-readable outputs from our examples." + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "id": "f12ce2c9", + "metadata": { + "id": "f12ce2c9" + }, + "outputs": [], + "source": [ + "def pretty_response(response):\n", + " for hit in response['hits']['hits']:\n", + " id = hit['_id']\n", + " publication_date = hit['_source']['publish_date']\n", + " score = hit['_score']\n", + " title = hit['_source']['title']\n", + " summary = hit['_source']['summary']\n", + " publisher = hit[\"_source\"][\"publisher\"]\n", + " num_reviews = hit[\"_source\"][\"num_reviews\"]\n", + " authors = hit[\"_source\"][\"authors\"]\n", + " pretty_output = (f\"\\nID: {id}\\nPublication date: {publication_date}\\nTitle: {title}\\nSummary: {summary}\\nPublisher: {publisher}\\nReviews: {num_reviews}\\nAuthors: {authors}\\nScore: {score}\")\n", + " print(pretty_output)" + ] + }, { "cell_type": "markdown", "id": "39bdefe0", @@ -274,14 +322,14 @@ }, { "cell_type": "code", - "execution_count": 60, + "execution_count": 41, "id": "Df7hwcIjYwMT", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "Df7hwcIjYwMT", - "outputId": "a82a791f-362c-4ae4-abbd-b03306b9d711" + "outputId": "e63884d7-d4a5-4f5d-ea43-fc2f0793f040" }, "outputs": [ { @@ -289,7 +337,7 @@ "name": "stdout", "text": [ "\n", - "ID: S-m5P4kB-GB5Evg6PXVG\n", + "ID: OOlWP4kB-GB5Evg6zHVx\n", "Publication date: 2008-05-15\n", "Title: JavaScript: The Good Parts\n", "Summary: A deep dive into the parts of JavaScript that are essential to writing maintainable code\n", @@ -298,7 +346,7 @@ "Authors: ['douglas crockford']\n", "Score: 0.8075247\n", "\n", - "ID: R-m5P4kB-GB5Evg6PXVG\n", + "ID: NOlWP4kB-GB5Evg6zHVx\n", "Publication date: 2015-03-27\n", "Title: You Don't Know JS: Up & Going\n", "Summary: Introduction to JavaScript and programming as a whole\n", @@ -307,7 +355,7 @@ "Authors: ['kyle simpson']\n", "Score: 0.6946182\n", "\n", - "ID: SOm5P4kB-GB5Evg6PXVG\n", + "ID: NelWP4kB-GB5Evg6zHVx\n", "Publication date: 2018-12-04\n", "Title: Eloquent JavaScript\n", "Summary: A modern introduction to programming\n", @@ -316,7 +364,7 @@ "Authors: ['marijn haverbeke']\n", "Score: 0.66179085\n", "\n", - "ID: Q-m5P4kB-GB5Evg6PXVG\n", + "ID: MOlWP4kB-GB5Evg6zHVx\n", "Publication date: 2019-10-29\n", "Title: The Pragmatic Programmer: Your Journey to Mastery\n", "Summary: A guide to pragmatic programming for software engineers and developers\n", @@ -325,7 +373,7 @@ "Authors: ['andrew hunt', 'david thomas']\n", "Score: 0.61159486\n", "\n", - "ID: TOm5P4kB-GB5Evg6PXVG\n", + "ID: OelWP4kB-GB5Evg6zHVx\n", "Publication date: 2012-06-27\n", "Title: Introduction to the Theory of Computation\n", "Summary: Introduction to the theory of computation and complexity theory\n", @@ -334,7 +382,7 @@ "Authors: ['michael sipser']\n", "Score: 0.58697784\n", "\n", - "ID: Sum5P4kB-GB5Evg6PXVG\n", + "ID: N-lWP4kB-GB5Evg6zHVx\n", "Publication date: 2011-05-13\n", "Title: The Clean Coder: A Code of Conduct for Professional Programmers\n", "Summary: A guide to professional conduct in the field of software engineering\n", @@ -343,7 +391,7 @@ "Authors: ['robert c. martin']\n", "Score: 0.57042736\n", "\n", - "ID: Sem5P4kB-GB5Evg6PXVG\n", + "ID: NulWP4kB-GB5Evg6zHVx\n", "Publication date: 1994-10-31\n", "Title: Design Patterns: Elements of Reusable Object-Oriented Software\n", "Summary: Guide to design patterns that can be used in any object-oriented language\n", @@ -352,7 +400,7 @@ "Authors: ['erich gamma', 'richard helm', 'ralph johnson', 'john vlissides']\n", "Score: 0.56175697\n", "\n", - "ID: Rum5P4kB-GB5Evg6PXVG\n", + "ID: M-lWP4kB-GB5Evg6zHVx\n", "Publication date: 2008-08-11\n", "Title: Clean Code: A Handbook of Agile Software Craftsmanship\n", "Summary: A guide to writing code that is easy to read, understand and maintain\n", @@ -361,7 +409,7 @@ "Authors: ['robert c. martin']\n", "Score: 0.55407417\n", "\n", - "ID: Rem5P4kB-GB5Evg6PXVG\n", + "ID: MulWP4kB-GB5Evg6zHVx\n", "Publication date: 2020-04-06\n", "Title: Artificial Intelligence: A Modern Approach\n", "Summary: Comprehensive introduction to the theory and practice of artificial intelligence\n", @@ -370,7 +418,7 @@ "Authors: ['stuart russell', 'peter norvig']\n", "Score: 0.5461982\n", "\n", - "ID: ROm5P4kB-GB5Evg6PXVG\n", + "ID: MelWP4kB-GB5Evg6zHVx\n", "Publication date: 2019-05-03\n", "Title: Python Crash Course\n", "Summary: A fast-paced, no-nonsense guide to programming in Python\n", @@ -384,7 +432,7 @@ "output_type": "stream", "name": "stderr", "text": [ - ":1: DeprecationWarning: The 'body' parameter is deprecated and will be removed in a future version. Instead use individual parameters.\n", + ":1: DeprecationWarning: The 'body' parameter is deprecated and will be removed in a future version. Instead use individual parameters.\n", " response = client.search(index=\"book_index\", body={\n" ] } @@ -399,20 +447,6 @@ " }\n", "})\n", "\n", - "# a helper function to return nice human-readable outputs from our following examples\n", - "def pretty_response(response):\n", - " for hit in response['hits']['hits']:\n", - " id = hit['_id']\n", - " publication_date = hit['_source']['publish_date']\n", - " score = hit['_score']\n", - " title = hit['_source']['title']\n", - " summary = hit['_source']['summary']\n", - " publisher = hit[\"_source\"][\"publisher\"]\n", - " num_reviews = hit[\"_source\"][\"num_reviews\"]\n", - " authors = hit[\"_source\"][\"authors\"]\n", - " pretty_output = (f\"\\nID: {id}\\nPublication date: {publication_date}\\nTitle: {title}\\nSummary: {summary}\\nPublisher: {publisher}\\nReviews: {num_reviews}\\nAuthors: {authors}\\nScore: {score}\")\n", - " print(pretty_output)\n", - "\n", "pretty_response(response)" ] }, @@ -470,53 +504,16 @@ "pretty_response(response)" ], "metadata": { - "id": "WoE0yTchfj3A", - "outputId": "4c1d1b6a-a24c-48c5-cc7e-9d16c6cdecda", - "colab": { - "base_uri": "https://localhost:8080/" - } + "id": "WoE0yTchfj3A" }, "id": "WoE0yTchfj3A", - "execution_count": 61, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "\n", - "ID: Q-m5P4kB-GB5Evg6PXVG\n", - "Publication date: 2019-10-29\n", - "Title: The Pragmatic Programmer: Your Journey to Mastery\n", - "Summary: A guide to pragmatic programming for software engineers and developers\n", - "Publisher: addison-wesley\n", - "Reviews: 30\n", - "Authors: ['andrew hunt', 'david thomas']\n", - "Score: 0.61159486\n", - "\n", - "ID: Sem5P4kB-GB5Evg6PXVG\n", - "Publication date: 1994-10-31\n", - "Title: Design Patterns: Elements of Reusable Object-Oriented Software\n", - "Summary: Guide to design patterns that can be used in any object-oriented language\n", - "Publisher: addison-wesley\n", - "Reviews: 45\n", - "Authors: ['erich gamma', 'richard helm', 'ralph johnson', 'john vlissides']\n", - "Score: 0.56175697\n" - ] - }, - { - "output_type": "stream", - "name": "stderr", - "text": [ - ":1: DeprecationWarning: The 'body' parameter is deprecated and will be removed in a future version. Instead use individual parameters.\n", - " response = client.search(index=\"book_index\", body={\n" - ] - } - ] + "execution_count": null, + "outputs": [] }, { "cell_type": "markdown", "source": [ - "### Example: Optional Filtering\n", + "### Example: Advanced Filtering\n", "\n", "TODO: Help the developer understand more about the different types of filtering they can do.\n", "\n", @@ -563,17 +560,17 @@ "base_uri": "https://localhost:8080/" }, "id": "fcDfiJC9g6AX", - "outputId": "ea4120a5-9ef3-4387-9fa4-09c3e2e25440" + "outputId": "0909515a-5b94-4863-94c6-e67015baeadf" }, "id": "fcDfiJC9g6AX", - "execution_count": 62, + "execution_count": 43, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "\n", - "ID: Q-m5P4kB-GB5Evg6PXVG\n", + "ID: MOlWP4kB-GB5Evg6zHVx\n", "Publication date: 2019-10-29\n", "Title: The Pragmatic Programmer: Your Journey to Mastery\n", "Summary: A guide to pragmatic programming for software engineers and developers\n", @@ -582,7 +579,7 @@ "Authors: ['andrew hunt', 'david thomas']\n", "Score: 0.61159486\n", "\n", - "ID: Sum5P4kB-GB5Evg6PXVG\n", + "ID: N-lWP4kB-GB5Evg6zHVx\n", "Publication date: 2011-05-13\n", "Title: The Clean Coder: A Code of Conduct for Professional Programmers\n", "Summary: A guide to professional conduct in the field of software engineering\n", @@ -591,7 +588,7 @@ "Authors: ['robert c. martin']\n", "Score: 0.57042736\n", "\n", - "ID: Sem5P4kB-GB5Evg6PXVG\n", + "ID: NulWP4kB-GB5Evg6zHVx\n", "Publication date: 1994-10-31\n", "Title: Design Patterns: Elements of Reusable Object-Oriented Software\n", "Summary: Guide to design patterns that can be used in any object-oriented language\n", @@ -600,7 +597,7 @@ "Authors: ['erich gamma', 'richard helm', 'ralph johnson', 'john vlissides']\n", "Score: 0.56175697\n", "\n", - "ID: Rum5P4kB-GB5Evg6PXVG\n", + "ID: M-lWP4kB-GB5Evg6zHVx\n", "Publication date: 2008-08-11\n", "Title: Clean Code: A Handbook of Agile Software Craftsmanship\n", "Summary: A guide to writing code that is easy to read, understand and maintain\n", @@ -614,7 +611,7 @@ "output_type": "stream", "name": "stderr", "text": [ - ":1: DeprecationWarning: The 'body' parameter is deprecated and will be removed in a future version. Instead use individual parameters.\n", + ":1: DeprecationWarning: The 'body' parameter is deprecated and will be removed in a future version. Instead use individual parameters.\n", " response = client.search(index=\"book_index\", body={\n" ] } @@ -665,17 +662,17 @@ "base_uri": "https://localhost:8080/" }, "id": "1BwZ-yjli7xA", - "outputId": "74c69581-00a1-4b7b-fa84-9a3f96ae7a9b" + "outputId": "26eea86c-5cda-42d0-ba1e-2904e2b7865a" }, "id": "1BwZ-yjli7xA", - "execution_count": 63, + "execution_count": 51, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "\n", - "ID: ROm5P4kB-GB5Evg6PXVG\n", + "ID: MelWP4kB-GB5Evg6zHVx\n", "Publication date: 2019-05-03\n", "Title: Python Crash Course\n", "Summary: A fast-paced, no-nonsense guide to programming in Python\n", @@ -684,7 +681,7 @@ "Authors: ['eric matthes']\n", "Score: None\n", "\n", - "ID: Q-m5P4kB-GB5Evg6PXVG\n", + "ID: MOlWP4kB-GB5Evg6zHVx\n", "Publication date: 2019-10-29\n", "Title: The Pragmatic Programmer: Your Journey to Mastery\n", "Summary: A guide to pragmatic programming for software engineers and developers\n", @@ -693,7 +690,7 @@ "Authors: ['andrew hunt', 'david thomas']\n", "Score: None\n", "\n", - "ID: TOm5P4kB-GB5Evg6PXVG\n", + "ID: OelWP4kB-GB5Evg6zHVx\n", "Publication date: 2012-06-27\n", "Title: Introduction to the Theory of Computation\n", "Summary: Introduction to the theory of computation and complexity theory\n", @@ -702,7 +699,7 @@ "Authors: ['michael sipser']\n", "Score: None\n", "\n", - "ID: Sum5P4kB-GB5Evg6PXVG\n", + "ID: N-lWP4kB-GB5Evg6zHVx\n", "Publication date: 2011-05-13\n", "Title: The Clean Coder: A Code of Conduct for Professional Programmers\n", "Summary: A guide to professional conduct in the field of software engineering\n", @@ -711,7 +708,7 @@ "Authors: ['robert c. martin']\n", "Score: None\n", "\n", - "ID: Rem5P4kB-GB5Evg6PXVG\n", + "ID: MulWP4kB-GB5Evg6zHVx\n", "Publication date: 2020-04-06\n", "Title: Artificial Intelligence: A Modern Approach\n", "Summary: Comprehensive introduction to the theory and practice of artificial intelligence\n", @@ -725,38 +722,38 @@ "output_type": "stream", "name": "stderr", "text": [ - ":1: DeprecationWarning: The 'body' parameter is deprecated and will be removed in a future version. Instead use individual parameters.\n", + ":1: DeprecationWarning: The 'body' parameter is deprecated and will be removed in a future version. Instead use individual parameters.\n", " response = client.search(index=\"book_index\", body={\n" ] } ] }, - { - "cell_type": "markdown", - "source": [ - "## Next Steps\n", - "\n", - "Now that you have learned the basics of Elasticsearch's query features, you can further explore additional features such as:\n", - "\n", - "- [ELSER](fff)\n", - "- [Faceting with aggregations](fff)\n" - ], - "metadata": { - "id": "xGSMSICZu67I" - }, - "id": "xGSMSICZu67I" - }, { "cell_type": "code", "source": [ "client.indices.delete(index=\"book_index\")" ], "metadata": { - "id": "_OAahfg-tqrf" + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "_OAahfg-tqrf", + "outputId": "d8f81ba4-cdc9-4e30-edf7-6d5bb16920eb" }, "id": "_OAahfg-tqrf", - "execution_count": null, - "outputs": [] + "execution_count": 52, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "ObjectApiResponse({'acknowledged': True})" + ] + }, + "metadata": {}, + "execution_count": 52 + } + ] } ], "metadata": { From b16b7e4e43232bc529a912018f961949ddaa7fd8 Mon Sep 17 00:00:00 2001 From: Joseph McElroy Date: Thu, 20 Jul 2023 13:33:56 +0100 Subject: [PATCH 54/54] remove cohere notebook --- colab-notebooks-examples/integrations/cohere/cohere-intro.ipynb | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 colab-notebooks-examples/integrations/cohere/cohere-intro.ipynb diff --git a/colab-notebooks-examples/integrations/cohere/cohere-intro.ipynb b/colab-notebooks-examples/integrations/cohere/cohere-intro.ipynb deleted file mode 100644 index e69de29b..00000000