From 680b27c3aa03329cea545a92d8c87722bf9aed88 Mon Sep 17 00:00:00 2001
From: Quynh Nguyen <quynh.nguyen@elastic.co>
Date: Sun, 28 Sep 2025 19:30:52 -0500
Subject: [PATCH 1/5] Add new python notebook

---
 .../multilingual_embedding.ipynb              | 225 ++++++++++++++++++
 1 file changed, 225 insertions(+)
 create mode 100644 supporting-blog-content/multilingual-embedding/multilingual_embedding.ipynb

diff --git a/supporting-blog-content/multilingual-embedding/multilingual_embedding.ipynb b/supporting-blog-content/multilingual-embedding/multilingual_embedding.ipynb
new file mode 100644
index 00000000..57bc36aa
--- /dev/null
+++ b/supporting-blog-content/multilingual-embedding/multilingual_embedding.ipynb
@@ -0,0 +1,225 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Data successfully downloaded and saved to multilingual_coco_sample.json\n"
+     ]
+    }
+   ],
+   "source": [
+    "import requests\n",
+    "import json\n",
+    "import os\n",
+    "\n",
+    "### Download multilingual coco dataset\n",
+    "### Here we are retrieving first 100 rows for this example\n",
+    "### Alternatively, you can use dataset library from Hugging Face\n",
+    "url = \"https://datasets-server.huggingface.co/rows?dataset=romrawinjp%2Fmultilingual-coco&config=default&split=restval&offset=0&length=100\"\n",
+    "# Make the GET request\n",
+    "response = requests.get(url)\n",
+    "\n",
+    "# Check if the request was successful\n",
+    "if response.status_code == 200:\n",
+    "    # Parse the JSON response\n",
+    "    data = response.json()\n",
+    "\n",
+    "    # Define the output file path\n",
+    "    output_file = \"multilingual_coco_sample.json\"\n",
+    "\n",
+    "    # Save the JSON data to a file\n",
+    "    with open(output_file, \"w\", encoding=\"utf-8\") as f:\n",
+    "        json.dump(data, f, indent=4, ensure_ascii=False)\n",
+    "\n",
+    "    print(f\"Data successfully downloaded and saved to {output_file}\")\n",
+    "else:\n",
+    "    print(f\"Failed to download data: {response.status_code}\")\n",
+    "    print(response.text)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from getpass import getpass\n",
+    "\n",
+    "# Get credentials securely for localhost Elasticsearch\n",
+    "print(\"Enter your Elasticsearch credentials:\")\n",
+    "cloud_id = input(\"Enter your cloud_id: \")\n",
+    "api_key = getpass(\"Enter your api_key: \")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Successfully connected to Elasticsearch\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/elasticsearch/_sync/client/__init__.py:311: SecurityWarning: Connecting to 'https://localhost:9200' using TLS with verify_certs=False is insecure\n",
+      "  _transport = transport_class(\n",
+      "/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/urllib3/connectionpool.py:1099: InsecureRequestWarning: Unverified HTTPS request is being made to host 'localhost'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n",
+      "  warnings.warn(\n"
+     ]
+    }
+   ],
+   "source": [
+    "from elasticsearch import Elasticsearch\n",
+    "try:\n",
+    "    es = Elasticsearch(\n",
+    "        hosts=[{\"host\": \"localhost\", \"port\": 9200, \"scheme\": \"https\"}],\n",
+    "        basic_auth=(\"elastic\", \"qaf_admin\"),\n",
+    "        verify_certs=False,  # Set to True if you have valid SSL certificates\n",
+    "        # Alternatively, you can use Elastic cloud_id and api_key\n",
+    "        #api_key=getpass(\"API Key: \")\n",
+    "        #cloud_id=getpass(\"Cloud ID: \"),\n",
+    "    )\n",
+    "\n",
+    "    # Test the connection\n",
+    "    if not es.ping():\n",
+    "        raise Exception(\"Failed to connect to Elasticsearch\")\n",
+    "\n",
+    "    print(\"Successfully connected to Elasticsearch\")\n",
+    "\n",
+    "except Exception as e:\n",
+    "    print(f\"Error connecting to Elasticsearch: {e}\")\n",
+    "    print(\"Please check your credentials\")\n",
+    "    raise\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Successfully bulk indexed 4840 documents\n",
+      "Indexing complete!\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/urllib3/connectionpool.py:1099: InsecureRequestWarning: Unverified HTTPS request is being made to host 'localhost'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n",
+      "  warnings.warn(\n",
+      "/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/urllib3/connectionpool.py:1099: InsecureRequestWarning: Unverified HTTPS request is being made to host 'localhost'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n",
+      "  warnings.warn(\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Define the index mapping\n",
+    "index_name = \"coco\"\n",
+    "mapping = {\n",
+    "    \"mappings\": {\n",
+    "        \"properties\": {\n",
+    "            \"language\": {\"type\": \"keyword\"},\n",
+    "            \"description\": {\"type\": \"text\"},\n",
+    "            \"en\": {\"type\": \"text\"},\n",
+    "            \"image_url\": {\"type\": \"keyword\"},\n",
+    "        }\n",
+    "    }\n",
+    "}\n",
+    "\n",
+    "# Create the index if it doesn't exist\n",
+    "if not es.indices.exists(index=index_name):\n",
+    "    es.indices.create(index=index_name, body=mapping)\n",
+    "\n",
+    "# Load the JSON data\n",
+    "with open('./multilingual_coco_sample.json', 'r') as f:\n",
+    "    data = json.load(f)\n",
+    "\n",
+    "rows = data[\"rows\"]\n",
+    "# List of languages to process\n",
+    "languages = [\"en\", \"es\", \"de\", \"it\", \"vi\", \"th\"]\n",
+    "\n",
+    "bulk_data = []\n",
+    "for obj in rows:\n",
+    "    row = obj[\"row\"]\n",
+    "    image_url = row.get(\"image\")\n",
+    "    image_url = image_url[\"src\"]\n",
+    "\n",
+    "    # Process each language\n",
+    "    for lang in languages:\n",
+    "        # Skip if language not present in this row\n",
+    "        if lang not in row:\n",
+    "            continue\n",
+    "\n",
+    "        # Get all descriptions for this language\n",
+    "        descriptions = row[lang]\n",
+    "        first_eng_caption = row[\"en\"][0]\n",
+    "\n",
+    "        # Prepare bulk indexing data\n",
+    "        for description in descriptions:\n",
+    "            if description == \"\":\n",
+    "                continue\n",
+    "            # Add index operation\n",
+    "            bulk_data.append(\n",
+    "                {\"index\": {\"_index\": index_name}}\n",
+    "            )\n",
+    "            # Add document\n",
+    "            bulk_data.append({\n",
+    "                \"language\": lang,\n",
+    "                \"description\": description,\n",
+    "                \"en\": first_eng_caption,\n",
+    "                \"image_url\": image_url,\n",
+    "            })\n",
+    "\n",
+    "# Perform bulk indexing\n",
+    "if bulk_data:\n",
+    "    try:\n",
+    "        response = es.bulk(operations=bulk_data)\n",
+    "        if response[\"errors\"]:\n",
+    "            print(\"Some documents failed to index\")\n",
+    "        else:\n",
+    "            print(f\"Successfully bulk indexed {len(bulk_data)} documents\")\n",
+    "    except Exception as e:\n",
+    "        print(f\"Error during bulk indexing: {str(e)}\")\n",
+    "\n",
+    "print(\"Indexing complete!\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

From 3702553fcb2e91d7abd9e2cd9e6a7bf2c72f9235 Mon Sep 17 00:00:00 2001
From: Carly Richmond <carly.richmond@elastic.co>
Date: Wed, 8 Oct 2025 13:12:31 +0200
Subject: [PATCH 2/5] Adding title to make the build pass

---
 .../multilingual_embedding.ipynb              | 38 ++++++++++++-------
 1 file changed, 24 insertions(+), 14 deletions(-)

diff --git a/supporting-blog-content/multilingual-embedding/multilingual_embedding.ipynb b/supporting-blog-content/multilingual-embedding/multilingual_embedding.ipynb
index 57bc36aa..d8793bf5 100644
--- a/supporting-blog-content/multilingual-embedding/multilingual_embedding.ipynb
+++ b/supporting-blog-content/multilingual-embedding/multilingual_embedding.ipynb
@@ -1,5 +1,14 @@
 {
  "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Lost In Translation? Multilingual Embedding Models Are All You Need*\n",
+    "\n",
+    "This notebook by Quynh Nguyen shows how cross-lingual vector search overcomes language barriers, enabling you to query and retrieve information in any language from both single and multilingual datasets. It accompanies the piece *Lost In Translation? Multilingual Embedding Models Are All You Need* from [Elasticsearch Labs](https://www.elastic.co/search-labs)."
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -40,7 +49,7 @@
     "    print(f\"Data successfully downloaded and saved to {output_file}\")\n",
     "else:\n",
     "    print(f\"Failed to download data: {response.status_code}\")\n",
-    "    print(response.text)\n"
+    "    print(response.text)"
    ]
   },
   {
@@ -54,7 +63,7 @@
     "# Get credentials securely for localhost Elasticsearch\n",
     "print(\"Enter your Elasticsearch credentials:\")\n",
     "cloud_id = input(\"Enter your cloud_id: \")\n",
-    "api_key = getpass(\"Enter your api_key: \")\n"
+    "api_key = getpass(\"Enter your api_key: \")"
    ]
   },
   {
@@ -82,14 +91,15 @@
    ],
    "source": [
     "from elasticsearch import Elasticsearch\n",
+    "\n",
     "try:\n",
     "    es = Elasticsearch(\n",
     "        hosts=[{\"host\": \"localhost\", \"port\": 9200, \"scheme\": \"https\"}],\n",
     "        basic_auth=(\"elastic\", \"qaf_admin\"),\n",
     "        verify_certs=False,  # Set to True if you have valid SSL certificates\n",
     "        # Alternatively, you can use Elastic cloud_id and api_key\n",
-    "        #api_key=getpass(\"API Key: \")\n",
-    "        #cloud_id=getpass(\"Cloud ID: \"),\n",
+    "        # api_key=getpass(\"API Key: \")\n",
+    "        # cloud_id=getpass(\"Cloud ID: \"),\n",
     "    )\n",
     "\n",
     "    # Test the connection\n",
@@ -101,7 +111,7 @@
     "except Exception as e:\n",
     "    print(f\"Error connecting to Elasticsearch: {e}\")\n",
     "    print(\"Please check your credentials\")\n",
-    "    raise\n"
+    "    raise"
    ]
   },
   {
@@ -147,7 +157,7 @@
     "    es.indices.create(index=index_name, body=mapping)\n",
     "\n",
     "# Load the JSON data\n",
-    "with open('./multilingual_coco_sample.json', 'r') as f:\n",
+    "with open(\"./multilingual_coco_sample.json\", \"r\") as f:\n",
     "    data = json.load(f)\n",
     "\n",
     "rows = data[\"rows\"]\n",
@@ -175,16 +185,16 @@
     "            if description == \"\":\n",
     "                continue\n",
     "            # Add index operation\n",
+    "            bulk_data.append({\"index\": {\"_index\": index_name}})\n",
+    "            # Add document\n",
     "            bulk_data.append(\n",
-    "                {\"index\": {\"_index\": index_name}}\n",
+    "                {\n",
+    "                    \"language\": lang,\n",
+    "                    \"description\": description,\n",
+    "                    \"en\": first_eng_caption,\n",
+    "                    \"image_url\": image_url,\n",
+    "                }\n",
     "            )\n",
-    "            # Add document\n",
-    "            bulk_data.append({\n",
-    "                \"language\": lang,\n",
-    "                \"description\": description,\n",
-    "                \"en\": first_eng_caption,\n",
-    "                \"image_url\": image_url,\n",
-    "            })\n",
     "\n",
     "# Perform bulk indexing\n",
     "if bulk_data:\n",

From fd84a1ac9b10199cc9e7d7ca4c236c66e7a7402c Mon Sep 17 00:00:00 2001
From: Carly Richmond <carly.richmond@elastic.co>
Date: Wed, 8 Oct 2025 13:18:03 +0200
Subject: [PATCH 3/5] Changing to use endpoint instead of cloud id, and
 removing unused os reference

---
 .../multilingual_embedding.ipynb              | 51 +++++++------------
 1 file changed, 17 insertions(+), 34 deletions(-)

diff --git a/supporting-blog-content/multilingual-embedding/multilingual_embedding.ipynb b/supporting-blog-content/multilingual-embedding/multilingual_embedding.ipynb
index d8793bf5..80d995f7 100644
--- a/supporting-blog-content/multilingual-embedding/multilingual_embedding.ipynb
+++ b/supporting-blog-content/multilingual-embedding/multilingual_embedding.ipynb
@@ -25,7 +25,6 @@
    "source": [
     "import requests\n",
     "import json\n",
-    "import os\n",
     "\n",
     "### Download multilingual coco dataset\n",
     "### Here we are retrieving first 100 rows for this example\n",
@@ -54,21 +53,29 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 3,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Enter your Elasticsearch credentials:\n"
+     ]
+    }
+   ],
    "source": [
     "from getpass import getpass\n",
     "\n",
     "# Get credentials securely for localhost Elasticsearch\n",
     "print(\"Enter your Elasticsearch credentials:\")\n",
-    "cloud_id = input(\"Enter your cloud_id: \")\n",
-    "api_key = getpass(\"Enter your api_key: \")"
+    "elastic_endpoint = input(\"Enter your Elastic endpoint: \")\n",
+    "api_key = getpass(\"Enter your API key: \")"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [
     {
@@ -77,16 +84,6 @@
      "text": [
       "Successfully connected to Elasticsearch\n"
      ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/elasticsearch/_sync/client/__init__.py:311: SecurityWarning: Connecting to 'https://localhost:9200' using TLS with verify_certs=False is insecure\n",
-      "  _transport = transport_class(\n",
-      "/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/urllib3/connectionpool.py:1099: InsecureRequestWarning: Unverified HTTPS request is being made to host 'localhost'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n",
-      "  warnings.warn(\n"
-     ]
     }
    ],
    "source": [
@@ -94,12 +91,8 @@
     "\n",
     "try:\n",
     "    es = Elasticsearch(\n",
-    "        hosts=[{\"host\": \"localhost\", \"port\": 9200, \"scheme\": \"https\"}],\n",
-    "        basic_auth=(\"elastic\", \"qaf_admin\"),\n",
-    "        verify_certs=False,  # Set to True if you have valid SSL certificates\n",
-    "        # Alternatively, you can use Elastic cloud_id and api_key\n",
-    "        # api_key=getpass(\"API Key: \")\n",
-    "        # cloud_id=getpass(\"Cloud ID: \"),\n",
+    "        hosts=[elastic_endpoint],\n",
+    "        api_key=api_key\n",
     "    )\n",
     "\n",
     "    # Test the connection\n",
@@ -116,7 +109,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [
     {
@@ -126,16 +119,6 @@
       "Successfully bulk indexed 4840 documents\n",
       "Indexing complete!\n"
      ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/urllib3/connectionpool.py:1099: InsecureRequestWarning: Unverified HTTPS request is being made to host 'localhost'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n",
-      "  warnings.warn(\n",
-      "/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/urllib3/connectionpool.py:1099: InsecureRequestWarning: Unverified HTTPS request is being made to host 'localhost'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n",
-      "  warnings.warn(\n"
-     ]
     }
    ],
    "source": [
@@ -227,7 +210,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.6"
+   "version": "3.12.10"
   }
  },
  "nbformat": 4,

From 3b8e5d21ba71c8bf612cecd178baa85edfdf32e8 Mon Sep 17 00:00:00 2001
From: Carly Richmond <carly.richmond@elastic.co>
Date: Wed, 8 Oct 2025 13:21:56 +0200
Subject: [PATCH 4/5] Changing code formatting to fix issue

---
 .../multilingual-embedding/multilingual_embedding.ipynb  | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/supporting-blog-content/multilingual-embedding/multilingual_embedding.ipynb b/supporting-blog-content/multilingual-embedding/multilingual_embedding.ipynb
index 80d995f7..308fabb5 100644
--- a/supporting-blog-content/multilingual-embedding/multilingual_embedding.ipynb
+++ b/supporting-blog-content/multilingual-embedding/multilingual_embedding.ipynb
@@ -11,7 +11,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [
     {
@@ -75,7 +75,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -90,10 +90,7 @@
     "from elasticsearch import Elasticsearch\n",
     "\n",
     "try:\n",
-    "    es = Elasticsearch(\n",
-    "        hosts=[elastic_endpoint],\n",
-    "        api_key=api_key\n",
-    "    )\n",
+    "    es = Elasticsearch(hosts=[elastic_endpoint], api_key=api_key)\n",
     "\n",
     "    # Test the connection\n",
     "    if not es.ping():\n",

From 4ef9d83c3964920a484f515fce79a4203a664be5 Mon Sep 17 00:00:00 2001
From: Quynh Nguyen <quynh.nguyen@elastic.co>
Date: Wed, 8 Oct 2025 10:46:52 -0500
Subject: [PATCH 5/5] Update with new ES queries for completeness

---
 .../multilingual_embedding.ipynb              | 227 ++++++++++++++++++
 1 file changed, 227 insertions(+)

diff --git a/supporting-blog-content/multilingual-embedding/multilingual_embedding.ipynb b/supporting-blog-content/multilingual-embedding/multilingual_embedding.ipynb
index 308fabb5..1414f79c 100644
--- a/supporting-blog-content/multilingual-embedding/multilingual_embedding.ipynb
+++ b/supporting-blog-content/multilingual-embedding/multilingual_embedding.ipynb
@@ -189,6 +189,233 @@
     "\n",
     "print(\"Indexing complete!\")"
    ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now are going to create a pipeline to vectorize the descriptions text_field through our inference text embedding model."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pipeline_body = {\n",
+    "    \"description\": \"Pipeline to run the descriptions text_field through our inference text embedding model\",\n",
+    "    \"processors\": [\n",
+    "        {\n",
+    "            \"set\": {\n",
+    "                \"field\": \"temp_desc\",\n",
+    "                \"value\": \"passage: {{description}}\"\n",
+    "            }\n",
+    "        },\n",
+    "        {\n",
+    "            \"inference\": {\n",
+    "                \"field_map\": {\n",
+    "                    \"temp_desc\": \"text_field\"\n",
+    "                },\n",
+    "                \"model_id\": \".multilingual-e5-small_linux-x86_64_search\",\n",
+    "                \"target_field\": \"vector_description\"\n",
+    "            }\n",
+    "        },\n",
+    "        {\n",
+    "            \"remove\": {\n",
+    "                \"field\": \"temp_desc\"\n",
+    "            }\n",
+    "        }\n",
+    "    ]\n",
+    "}\n",
+    "\n",
+    "try:\n",
+    "    es.ingest.put_pipeline(id=\"vectorize_descriptions\", body=pipeline_body)\n",
+    "    print(\"Pipeline 'vectorize_descriptions' created successfully.\")\n",
+    "except Exception as e:\n",
+    "    print(f\"Error creating pipeline: {str(e)}\")\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We also need to create a new Elasticsearch index with the specified vector mapping."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "index_body = {\n",
+    "    \"mappings\": {\n",
+    "        \"properties\": {\n",
+    "            \"description\": {\n",
+    "                \"type\": \"text\"\n",
+    "            },\n",
+    "            \"en\": {\n",
+    "                \"type\": \"text\"\n",
+    "            },\n",
+    "            \"image_url\": {\n",
+    "                \"type\": \"keyword\"\n",
+    "            },\n",
+    "            \"language\": {\n",
+    "                \"type\": \"keyword\"\n",
+    "            },\n",
+    "            \"vector_description.predicted_value\": {\n",
+    "                \"type\": \"dense_vector\",\n",
+    "                \"dims\": 384,\n",
+    "                \"index\": True,\n",
+    "                \"similarity\": \"cosine\",\n",
+    "                \"index_options\": {\n",
+    "                    \"type\": \"bbq_hnsw\"\n",
+    "                }\n",
+    "            }\n",
+    "        }\n",
+    "    }\n",
+    "}\n",
+    "\n",
+    "try:\n",
+    "    es.indices.create(index=\"coco_multi\", body=index_body)\n",
+    "    print(\"Index 'coco_multi' created successfully.\")\n",
+    "except Exception as e:\n",
+    "    print(f\"Error creating index: {str(e)}\")\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now, we just need to run the pipeline to bring and vectorize the data into the Elasticsearch index."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from elasticsearch import Elasticsearch\n",
+    "\n",
+    "es = Elasticsearch()\n",
+    "\n",
+    "reindex_body = {\n",
+    "    \"source\": {\n",
+    "        \"index\": \"coco\"\n",
+    "    },\n",
+    "    \"dest\": {\n",
+    "        \"index\": \"coco_multilingual\",\n",
+    "        \"pipeline\": \"vectorize_descriptions\"\n",
+    "    }\n",
+    "}\n",
+    "\n",
+    "response = es.reindex(\n",
+    "    body=reindex_body,\n",
+    "    # Not waiting for completion here cause this process might take a while\n",
+    "    wait_for_completion=False\n",
+    ")\n",
+    "\n",
+    "print(\"Reindex task started. Task info:\")\n",
+    "print(response)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Voilà, now let's try some queries and have some fun!"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "query_body = {\n",
+    "    \"size\": 10,\n",
+    "    \"_source\": [\n",
+    "        \"description\", \"language\", \"en\"\n",
+    "    ],\n",
+    "    \"knn\": {\n",
+    "        \"field\": \"vector_description.predicted_value\",\n",
+    "        \"k\": 10,\n",
+    "        \"num_candidates\": 100,\n",
+    "        \"query_vector_builder\": {\n",
+    "            \"text_embedding\": {\n",
+    "                \"model_id\": \".multilingual-e5-small_linux-x86_64_search\",\n",
+    "                \"model_text\": \"query: kitty\"\n",
+    "            }\n",
+    "        }\n",
+    "    }\n",
+    "}\n",
+    "\n",
+    "response = es.search(index=\"coco_multi\", body=query_body)\n",
+    "print(response)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "query_body = {\n",
+    "    \"size\": 100,\n",
+    "    \"_source\": [\n",
+    "        \"description\", \"language\", \"en\"\n",
+    "    ],\n",
+    "    \"knn\": {\n",
+    "        \"field\": \"vector_description.predicted_value\",\n",
+    "        \"k\": 50,\n",
+    "        \"num_candidates\": 1000,\n",
+    "        \"query_vector_builder\": {\n",
+    "            \"text_embedding\": {\n",
+    "                \"model_id\": \".multilingual-e5-small_linux-x86_64_search\",\n",
+    "                \"model_text\": \"query: kitty lying on something\"\n",
+    "            }\n",
+    "        }\n",
+    "    }\n",
+    "}\n",
+    "\n",
+    "response = es.search(index=\"coco_multi\", body=query_body)\n",
+    "print(response)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "query_body = {\n",
+    "    \"size\": 100,\n",
+    "    \"_source\": [\n",
+    "        \"description\", \"language\", \"en\"\n",
+    "    ],\n",
+    "    \"knn\": {\n",
+    "        \"field\": \"vector_description.predicted_value\",\n",
+    "        \"k\": 50,\n",
+    "        \"num_candidates\": 1000,\n",
+    "        \"query_vector_builder\": {\n",
+    "            \"text_embedding\": {\n",
+    "                \"model_id\": \".multilingual-e5-small_linux-x86_64_search\",\n",
+    "                \"model_text\": \"query: 고양이\"\n",
+    "            }\n",
+    "        }\n",
+    "    }\n",
+    "}\n",
+    "\n",
+    "response = es.search(index=\"coco_multi\", body=query_body)\n",
+    "print(response)\n"
+   ]
   }
  ],
  "metadata": {