diff --git a/docs/reference/search/search-your-data/cohere-es.asciidoc b/docs/reference/search/search-your-data/cohere-es.asciidoc
new file mode 100644
index 0000000000000..f12f23ad2c5dc
--- /dev/null
+++ b/docs/reference/search/search-your-data/cohere-es.asciidoc
@@ -0,0 +1,372 @@
+[[cohere-es]]
+=== Tutorial: Using Cohere with {es}
+++++
+Using Cohere with {es}
+++++
+
+The instructions in this tutorial shows you how to compute embeddings with
+Cohere using the {infer} API and store them for efficient vector or hybrid
+search in {es}. This tutorial will use the Python {es} client to perform the
+operations.
+
+You'll learn how to:
+
+* create an {infer} endpoint for text embedding using the Cohere service,
+* create the necessary index mapping for the {es} index,
+* build an {infer} pipeline to ingest documents into the index together with the
+embeddings,
+* perform hybrid search on the data,
+* rerank search results by using Cohere's rerank model,
+* design a RAG system with Cohere's Chat API.
+
+The tutorial uses the https://huggingface.co/datasets/mteb/scifact[SciFact] data
+set.
+
+Refer to https://docs.cohere.com/docs/elasticsearch-and-cohere[Cohere's tutorial]
+for an example using a different data set.
+
+
+[discrete]
+[[cohere-es-req]]
+==== Requirements
+
+* A https://cohere.com/[Cohere account],
+* an https://www.elastic.co/guide/en/cloud/current/ec-getting-started.html[Elastic Cloud]
+account,
+* Python 3.7 or higher.
+
+
+[discrete]
+[[cohere-es-packages]]
+==== Install required packages
+
+Install {es} and Cohere:
+
+[source,py]
+------------------------------------------------------------
+!pip install elasticsearch
+!pip install cohere
+------------------------------------------------------------
+
+Import the required packages:
+
+[source,py]
+------------------------------------------------------------
+from elasticsearch import Elasticsearch, helpers
+import cohere
+import json
+import requests
+------------------------------------------------------------
+
+[discrete]
+[[cohere-es-client]]
+==== Create the {es} client
+
+To create your {es} client, you need:
+
+* https://www.elastic.co/search-labs/tutorials/install-elasticsearch/elastic-cloud#finding-your-cloud-id[your Cloud ID],
+* https://www.elastic.co/search-labs/tutorials/install-elasticsearch/elastic-cloud#creating-an-api-key[an encoded API key].
+
+[source,py]
+------------------------------------------------------------
+ELASTICSEARCH_ENDPOINT = "elastic_endpoint"
+ELASTIC_API_KEY = "elastic_api_key"
+
+client = Elasticsearch(
+ cloud_id=ELASTICSEARCH_ENDPOINT,
+ api_key=ELASTIC_API_KEY
+)
+
+# Confirm the client has connected
+print(client.info())
+------------------------------------------------------------
+
+
+[discrete]
+[[cohere-es-infer-endpoint]]
+==== Create the {infer} endpoint
+
+<> first. In this example, the
+{infer} endpoint uses Cohere's `embed-english-v3.0` model and the
+`embedding_type` is set to `byte`.
+
+[source,py]
+------------------------------------------------------------
+COHERE_API_KEY = "cohere_api_key"
+
+client.inference.put_model(
+ task_type="text_embedding",
+ inference_id="cohere_embeddings",
+ body={
+ "service": "cohere",
+ "service_settings": {
+ "api_key": COHERE_API_KEY,
+ "model_id": "embed-english-v3.0",
+ "embedding_type": "byte"
+ }
+ },
+)
+------------------------------------------------------------
+
+You can find your API keys in your Cohere dashboard under the
+https://dashboard.cohere.com/api-keys[API keys section].
+
+
+[discrete]
+[[cohere-es-index-mapping]]
+==== Create the index mapping
+
+Create the index mapping for the index that will contain the embeddings.
+
+[source,py]
+------------------------------------------------------------
+client.indices.create(
+ index="cohere-embeddings",
+ settings={"index": {"default_pipeline": "cohere_embeddings"}},
+ mappings={
+ "properties": {
+ "text_embedding": {
+ "type": "dense_vector",
+ "dims": 1024,
+ "element_type": "byte",
+ },
+ "text": {"type": "text"},
+ "id": {"type": "integer"},
+ "title": {"type": "text"}
+ }
+ },
+)
+------------------------------------------------------------
+
+
+[discrete]
+[[cohere-es-infer-pipeline]]
+==== Create the {infer} pipeline
+
+Now you have an {infer} endpoint and an index ready to store embeddings. The
+next step is to create an <> with an
+<> that will create the embeddings using
+the {infer} endpoint and stores them in the index.
+
+[source,py]
+--------------------------------------------------
+client.ingest.put_pipeline(
+ id="cohere_embeddings",
+ description="Ingest pipeline for Cohere inference.",
+ processors=[
+ {
+ "inference": {
+ "model_id": "cohere_embeddings",
+ "input_output": {
+ "input_field": "text",
+ "output_field": "text_embedding",
+ },
+ }
+ }
+ ],
+)
+--------------------------------------------------
+
+
+[discrete]
+[[cohere-es-insert-documents]]
+==== Prepare data and insert documents
+
+This example uses the https://huggingface.co/datasets/mteb/scifact[SciFact] data
+set that you can find on HuggingFace.
+
+[source,py]
+--------------------------------------------------
+url = 'https://huggingface.co/datasets/mteb/scifact/raw/main/corpus.jsonl'
+
+# Fetch the JSONL data from the URL
+response = requests.get(url)
+response.raise_for_status() # Ensure noticing bad responses
+
+# Split the content by new lines and parse each line as JSON
+data = [json.loads(line) for line in response.text.strip().split('\n') if line]
+# Now data is a list of dictionaries
+
+# Change `_id` key to `id` as `_id` is a reserved key in Elasticsearch.
+for item in data:
+ if '_id' in item:
+ item['id'] = item.pop('_id')
+
+# Prepare the documents to be indexed
+documents = []
+for line in data:
+ data_dict = line
+ documents.append({
+ "_index": "cohere-embeddings",
+ "_source": data_dict,
+ }
+ )
+
+# Use the bulk endpoint to index
+helpers.bulk(client, documents)
+
+print("Data ingestion completed, text embeddings generated!")
+--------------------------------------------------
+
+Your index is populated with the SciFact data and text embeddings for the text
+field.
+
+
+[discrete]
+[[cohere-es-hybrid-search]]
+==== Hybrid search
+
+Let's start querying the index!
+
+The code below performs a hybrid search. The `kNN` query computes the relevance
+of search results based on vector similarity using the `text_embedding` field,
+the lexical search query uses BM25 retrieval to compute keyword similarity on
+the `title` and `text` fields.
+
+[source,py]
+--------------------------------------------------
+query = "What is biosimilarity?"
+
+response = client.search(
+ index="cohere-embeddings",
+ size=100,
+ knn={
+ "field": "text_embedding",
+ "query_vector_builder": {
+ "text_embedding": {
+ "model_id": "cohere_embeddings",
+ "model_text": query,
+ }
+ },
+ "k": 10,
+ "num_candidates": 50,
+ },
+ query={
+ "multi_match": {
+ "query": query,
+ "fields": ["text", "title"]
+ }
+ }
+)
+
+raw_documents = response["hits"]["hits"]
+
+# Display the first 10 results
+for document in raw_documents[0:10]:
+ print(f'Title: {document["_source"]["title"]}\nText: {document["_source"]["text"]}\n')
+
+# Format the documents for ranking
+documents = []
+for hit in response["hits"]["hits"]:
+ documents.append(hit["_source"]["text"])
+--------------------------------------------------
+
+
+[discrete]
+[[cohere-es-rerank-results]]
+===== Rerank search results
+
+To combine the results more effectively, use
+https://docs.cohere.com/docs/rerank-2[Cohere's Rerank v3] model through the
+{infer} API to provide a more precise semantic reranking of the results.
+
+Create an {infer} endpoint with your Cohere API key and the used model name as
+the `model_id` (`rerank-english-v3.0` in this example).
+
+[source,py]
+--------------------------------------------------
+client.inference.put_model(
+ task_type="rerank",
+ inference_id="cohere_rerank",
+ body={
+ "service": "cohere",
+ "service_settings":{
+ "api_key": COHERE_API_KEY,
+ "model_id": "rerank-english-v3.0"
+ },
+ "task_settings": {
+ "top_n": 10,
+ },
+ }
+)
+--------------------------------------------------
+
+Rerank the results using the new {infer} endpoint.
+
+[source,py]
+--------------------------------------------------
+# Pass the query and the search results to the service
+response = client.inference.inference(
+ inference_id="cohere_rerank",
+ body={
+ "query": query,
+ "input": documents,
+ "task_settings": {
+ "return_documents": False
+ }
+ }
+)
+
+# Reconstruct the input documents based on the index provided in the rereank response
+ranked_documents = []
+for document in response.body["rerank"]:
+ ranked_documents.append({
+ "title": raw_documents[int(document["index"])]["_source"]["title"],
+ "text": raw_documents[int(document["index"])]["_source"]["text"]
+ })
+
+# Print the top 10 results
+for document in ranked_documents[0:10]:
+ print(f"Title: {document['title']}\nText: {document['text']}\n")
+--------------------------------------------------
+
+The response is a list of documents in descending order of relevance. Each
+document has a corresponding index that reflects the order of the documents when
+they were sent to the {infer} endpoint.
+
+
+[discrete]
+[[cohere-es-rag]]
+==== Retrieval Augmented Generation (RAG) with Cohere and {es}
+
+RAG is a method for generating text using additional information fetched from an
+external data source. With the ranked results, you can build a RAG system on the
+top of what you previously created by using
+https://docs.cohere.com/docs/chat-api[Cohere's Chat API].
+
+Pass in the retrieved documents and the query to receive a grounded response
+using Cohere's newest generative model
+https://docs.cohere.com/docs/command-r-plus[Command R+].
+
+Then pass in the query and the documents to the Chat API, and print out the
+response.
+
+[source,py]
+--------------------------------------------------
+response = co.chat(message=query, documents=ranked_documents, model='command-r-plus')
+
+source_documents = []
+for citation in response.citations:
+ for document_id in citation.document_ids:
+ if document_id not in source_documents:
+ source_documents.append(document_id)
+
+print(f"Query: {query}")
+print(f"Response: {response.text}")
+print("Sources:")
+for document in response.documents:
+ if document['id'] in source_documents:
+ print(f"{document['title']}: {document['text']}")
+
+--------------------------------------------------
+
+The response will look similar to this:
+
+[source,consol-result]
+--------------------------------------------------
+Query: What is biosimilarity?
+Response: Biosimilarity is based on the comparability concept, which has been used successfully for several decades to ensure close similarity of a biological product before and after a manufacturing change. Over the last 10 years, experience with biosimilars has shown that even complex biotechnology-derived proteins can be copied successfully.
+Sources:
+Interchangeability of Biosimilars: A European Perspective: (...)
+--------------------------------------------------
+// NOTCONSOLE
diff --git a/docs/reference/search/search-your-data/semantic-search.asciidoc b/docs/reference/search/search-your-data/semantic-search.asciidoc
index a4d892c98645b..a1197e7bbbd3a 100644
--- a/docs/reference/search/search-your-data/semantic-search.asciidoc
+++ b/docs/reference/search/search-your-data/semantic-search.asciidoc
@@ -136,3 +136,4 @@ include::{es-ref-dir}/tab-widgets/semantic-search/hybrid-search-widget.asciidoc[
include::semantic-search-elser.asciidoc[]
include::semantic-search-inference.asciidoc[]
+include::cohere-es.asciidoc[]