From fa2f81353e3ec44b3945bfbf3792db2d9c5a6030 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Istv=C3=A1n=20Zolt=C3=A1n=20Szab=C3=B3?= Date: Thu, 9 May 2024 09:59:56 +0200 Subject: [PATCH] [DOCS] Adds complete Cohere tutorial (#108415) --- .../search-your-data/cohere-es.asciidoc | 371 ++++++++++++++++++ .../search-your-data/semantic-search.asciidoc | 1 + 2 files changed, 372 insertions(+) create mode 100644 docs/reference/search/search-your-data/cohere-es.asciidoc diff --git a/docs/reference/search/search-your-data/cohere-es.asciidoc b/docs/reference/search/search-your-data/cohere-es.asciidoc new file mode 100644 index 0000000000000..751cfebca8c78 --- /dev/null +++ b/docs/reference/search/search-your-data/cohere-es.asciidoc @@ -0,0 +1,371 @@ +[[cohere-es]] +=== Tutorial: Using Cohere with {es} +++++ +Using Cohere with {es} +++++ + +The instructions in this tutorial shows you how to compute embeddings with +Cohere using the {infer} API and store them for efficient vector or hybrid +search in {es}. This tutorial will use the Python {es} client to perform the +operations. + +You'll learn how to: + +* create an {infer} endpoint for text embedding using the Cohere service, +* create the necessary index mapping for the {es} index, +* build an {infer} pipeline to ingest documents into the index together with the +embeddings, +* perform hybrid search on the data, +* rerank search results by using Cohere's rerank model, +* design a RAG system with Cohere's Chat API. + +The tutorial uses the https://huggingface.co/datasets/mteb/scifact[SciFact] data +set. + +Refer to https://docs.cohere.com/docs/elasticsearch-and-cohere[Cohere's tutorial] +for an example using a different data set. + + +[discrete] +[[cohere-es-req]] +==== Requirements + +* A https://cohere.com/[Cohere account], +* an https://www.elastic.co/guide/en/cloud/current/ec-getting-started.html[Elastic Cloud] +account, +* Python 3.7 or higher. + + +[discrete] +[[cohere-es-packages]] +==== Istall required packages + +Install {es} and Cohere: + +[source,py] +------------------------------------------------------------ +!pip install elasticsearch +!pip install cohere +------------------------------------------------------------ + +Import the required packages: + +[source,py] +------------------------------------------------------------ +from elasticsearch import Elasticsearch, helpers +import cohere +import json +import requests +------------------------------------------------------------ + +[discrete] +[[cohere-es-client]] +==== Create the {es} client + +To create your {es} client, you need: +* https://www.elastic.co/search-labs/tutorials/install-elasticsearch/elastic-cloud#finding-your-cloud-id[your Cloud ID], +* https://www.elastic.co/search-labs/tutorials/install-elasticsearch/elastic-cloud#creating-an-api-key[an encoded API key]. + +[source,py] +------------------------------------------------------------ +ELASTICSEARCH_ENDPOINT = "elastic_endpoint" +ELASTIC_API_KEY = "elastic_api_key" + +client = Elasticsearch( + cloud_id=ELASTICSEARCH_ENDPOINT, + api_key=ELASTIC_API_KEY +) + +# Confirm the client has connected +print(client.info()) +------------------------------------------------------------ + + +[discrete] +[[cohere-es-infer-endpoint]] +==== Create the {infer} endpoint + +<> first. In this example, the +{infer} endpoint uses Cohere's `embed-english-v3.0` model and the +`embedding_type` is set to `byte`. + +[source,py] +------------------------------------------------------------ +COHERE_API_KEY = "cohere_api_key" + +client.inference.put_model( + task_type="text_embedding", + inference_id="cohere_embeddings", + body={ + "service": "cohere", + "service_settings": { + "api_key": COHERE_API_KEY, + "model_id": "embed-english-v3.0", + "embedding_type": "byte" + } + }, +) +------------------------------------------------------------ + +You can find your API keys in your Cohere dashboard under the +https://dashboard.cohere.com/api-keys[API keys section]. + + +[discrete] +[[cohere-es-index-mapping]] +==== Create the index mapping + +Create the index mapping for the index that will contain the embeddings. + +[source,py] +------------------------------------------------------------ +client.indices.create( + index="cohere-embeddings", + settings={"index": {"default_pipeline": "cohere_embeddings"}}, + mappings={ + "properties": { + "text_embedding": { + "type": "dense_vector", + "dims": 1024, + "element_type": "byte", + }, + "text": {"type": "text"}, + "id": {"type": "integer"}, + "title": {"type": "text"} + } + }, +) +------------------------------------------------------------ + + +[discrete] +[[cohere-es-infer-pipeline]] +==== Create the {infer} pipeline + +Now you have an {infer} endpoint and an index ready to store embeddings. The +next step is to create an <> with an +<> that will create the embeddings using +the {infer} endpoint and stores them in the index. + +[source,py] +-------------------------------------------------- +client.ingest.put_pipeline( + id="cohere_embeddings", + description="Ingest pipeline for Cohere inference.", + processors=[ + { + "inference": { + "model_id": "cohere_embeddings", + "input_output": { + "input_field": "text", + "output_field": "text_embedding", + }, + } + } + ], +) +-------------------------------------------------- + + +[discrete] +[[cohere-es-insert-documents]] +==== Prepare data and insert documents + +This example uses the https://huggingface.co/datasets/mteb/scifact[SciFact] data +set that you can find on HuggingFace. + +[source,py] +-------------------------------------------------- +url = 'https://huggingface.co/datasets/mteb/scifact/raw/main/corpus.jsonl' + +# Fetch the JSONL data from the URL +response = requests.get(url) +response.raise_for_status() # Ensure noticing bad responses + +# Split the content by new lines and parse each line as JSON +data = [json.loads(line) for line in response.text.strip().split('\n') if line] +# Now data is a list of dictionaries + +# Change `_id` key to `id` as `_id` is a reserved key in Elasticsearch. +for item in data: + if '_id' in item: + item['id'] = item.pop('_id') + +# Prepare the documents to be indexed +documents = [] +for line in data: + data_dict = line + documents.append({ + "_index": "cohere-embeddings", + "_source": data_dict, + } + ) + +# Use the bulk endpoint to index +helpers.bulk(client, documents) + +print("Data ingestion completed, text embeddings generated!") +-------------------------------------------------- + +Your index is populated with the SciFact data and text embeddings for the text +field. + + +[discrete] +[[cohere-es-hybrid-search]] +==== Hybrid search + +Let's start querying the index! + +The code below performs a hybrid search. The `kNN` query computes the relevance +of search results based on vector similarity using the `text_embedding` field, +the lexical search query uses BM25 retrieval to compute keyword similarity on +the `title` and `text` fields. + +[source,py] +-------------------------------------------------- +query = "What is biosimilarity?" + +response = client.search( + index="cohere-embeddings", + size=100, + knn={ + "field": "text_embedding", + "query_vector_builder": { + "text_embedding": { + "model_id": "cohere_embeddings", + "model_text": query, + } + }, + "k": 10, + "num_candidates": 50, + }, + query={ + "multi_match": { + "query": query, + "fields": ["text", "title"] + } + } +) + +raw_documents = response["hits"]["hits"] + +# Display the first 10 results +for document in raw_documents[0:10]: + print(f'Title: {document["_source"]["title"]}\nText: {document["_source"]["text"]}\n') + +# Format the documents for ranking +documents = [] +for hit in response["hits"]["hits"]: + documents.append(hit["_source"]["text"]) +-------------------------------------------------- + + +[discrete] +[[cohere-es-rerank-results]] +===== Rerank search results + +To combine the results more effectively, use +https://docs.cohere.com/docs/rerank-2[Cohere's Rerank v3] model through the +{infer} API to provide a more precise semantic reranking of the results. + +Create an {infer} endpoint with your Cohere API key and the used model name as +the `model_id` (`rerank-english-v3.0` in this example). + +[source,py] +-------------------------------------------------- +client.inference.put_model( + task_type="rerank", + inference_id="cohere_rerank", + body={ + "service": "cohere", + "service_settings":{ + "api_key": COHERE_API_KEY, + "model_id": "rerank-english-v3.0" + }, + "task_settings": { + "top_n": 10, + }, + } +) +-------------------------------------------------- + +Rerank the results using the new {infer} endpoint. + +[source,py] +-------------------------------------------------- +# Pass the query and the search results to the service +response = client.inference.inference( + inference_id="cohere_rerank", + body={ + "query": query, + "input": documents, + "task_settings": { + "return_documents": False + } + } +) + +# Reconstruct the input documents based on the index provided in the rereank response +ranked_documents = [] +for document in response.body["rerank"]: + ranked_documents.append({ + "title": raw_documents[int(document["index"])]["_source"]["title"], + "text": raw_documents[int(document["index"])]["_source"]["text"] + }) + +# Print the top 10 results +for document in ranked_documents[0:10]: + print(f"Title: {document['title']}\nText: {document['text']}\n") +-------------------------------------------------- + +The response is a list of documents in descending order of relevance. Each +document has a corresponding index that reflects the order of the documents when +they were sent to the {infer} endpoint. + + +[discrete] +[[cohere-es-rag]] +==== Retrieval Augmented Generation (RAG) with Cohere and {es} + +RAG is a method for generating text using additional information fetched from an +external data source. With the ranked results, you can build a RAG system on the +top of what you previously created by using +https://docs.cohere.com/docs/chat-api[Cohere's Chat API]. + +Pass in the retrieved documents and the query to receive a grounded response +using Cohere's newest generative model +https://docs.cohere.com/docs/command-r-plus[Command R+]. + +Then pass in the query and the documents to the Chat API, and print out the +response. + +[source,py] +-------------------------------------------------- +response = co.chat(message=query, documents=ranked_documents, model='command-r-plus') + +source_documents = [] +for citation in response.citations: + for document_id in citation.document_ids: + if document_id not in source_documents: + source_documents.append(document_id) + +print(f"Query: {query}") +print(f"Response: {response.text}") +print("Sources:") +for document in response.documents: + if document['id'] in source_documents: + print(f"{document['title']}: {document['text']}") + +-------------------------------------------------- + +The response will look similar to this: + +[source,consol-result] +-------------------------------------------------- +Query: What is biosimilarity? +Response: Biosimilarity is based on the comparability concept, which has been used successfully for several decades to ensure close similarity of a biological product before and after a manufacturing change. Over the last 10 years, experience with biosimilars has shown that even complex biotechnology-derived proteins can be copied successfully. +Sources: +Interchangeability of Biosimilars: A European Perspective: (...) +-------------------------------------------------- +// NOTCONSOLE diff --git a/docs/reference/search/search-your-data/semantic-search.asciidoc b/docs/reference/search/search-your-data/semantic-search.asciidoc index a4d892c98645b..a1197e7bbbd3a 100644 --- a/docs/reference/search/search-your-data/semantic-search.asciidoc +++ b/docs/reference/search/search-your-data/semantic-search.asciidoc @@ -136,3 +136,4 @@ include::{es-ref-dir}/tab-widgets/semantic-search/hybrid-search-widget.asciidoc[ include::semantic-search-elser.asciidoc[] include::semantic-search-inference.asciidoc[] +include::cohere-es.asciidoc[]