diff --git a/docs/docs/integrations/text_embedding/bookend.ipynb b/docs/docs/integrations/text_embedding/bookend.ipynb new file mode 100644 index 00000000000000..3277d49a6c885a --- /dev/null +++ b/docs/docs/integrations/text_embedding/bookend.ipynb @@ -0,0 +1,89 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "2c591a6a42ac7f0", + "metadata": {}, + "source": [ + "# Bookend AI\n", + "\n", + "Let's load the Bookend AI Embeddings class." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d94c62b4", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.embeddings import BookendEmbeddings" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "523a09e3", + "metadata": {}, + "outputs": [], + "source": [ + "embeddings = BookendEmbeddings(\n", + " domain=\"your_domain\",\n", + " api_token=\"your_api_token\",\n", + " model_id=\"your_embeddings_model_id\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b212bd5a", + "metadata": {}, + "outputs": [], + "source": [ + "text = \"This is a test document.\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "57db66bd", + "metadata": {}, + "outputs": [], + "source": [ + "query_result = embeddings.embed_query(text)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b790fd09", + "metadata": {}, + "outputs": [], + "source": [ + "doc_result = embeddings.embed_documents([text])" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.1" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/libs/langchain/langchain/embeddings/__init__.py b/libs/langchain/langchain/embeddings/__init__.py index 8f2887942562e1..3710a6e1969fa5 100644 --- a/libs/langchain/langchain/embeddings/__init__.py +++ b/libs/langchain/langchain/embeddings/__init__.py @@ -22,6 +22,7 @@ from langchain.embeddings.azure_openai import AzureOpenAIEmbeddings from langchain.embeddings.baidu_qianfan_endpoint import QianfanEmbeddingsEndpoint from langchain.embeddings.bedrock import BedrockEmbeddings +from langchain.embeddings.bookend import BookendEmbeddings from langchain.embeddings.cache import CacheBackedEmbeddings from langchain.embeddings.clarifai import ClarifaiEmbeddings from langchain.embeddings.cohere import CohereEmbeddings @@ -127,6 +128,7 @@ "QianfanEmbeddingsEndpoint", "JohnSnowLabsEmbeddings", "VoyageEmbeddings", + "BookendEmbeddings", ] diff --git a/libs/langchain/langchain/embeddings/bookend.py b/libs/langchain/langchain/embeddings/bookend.py new file mode 100644 index 00000000000000..0b1116c4ba947a --- /dev/null +++ b/libs/langchain/langchain/embeddings/bookend.py @@ -0,0 +1,91 @@ +"""Wrapper around Bookend AI embedding models.""" + +import json +from typing import Any, List + +import requests + +from langchain.pydantic_v1 import BaseModel, Field +from langchain.schema.embeddings import Embeddings + +API_URL = "https://api.bookend.ai/" +DEFAULT_TASK = "embeddings" +PATH = "/models/predict" + + +class BookendEmbeddings(BaseModel, Embeddings): + """Bookend AI sentence_transformers embedding models. + + Example: + .. code-block:: python + + from langchain.embeddings import BookendEmbeddings + + bookend = BookendEmbeddings( + domain={domain} + api_token={api_token} + model_id={model_id} + ) + bookend.embed_documents([ + "Please put on these earmuffs because I can't you hear.", + "Baby wipes are made of chocolate stardust.", + ]) + bookend.embed_query( + "She only paints with bold colors; she does not like pastels." + ) + """ + + domain: str + """Request for a domain at https://bookend.ai/ to use this embeddings module.""" + api_token: str + """Request for an API token at https://bookend.ai/ to use this embeddings module.""" + model_id: str + """Embeddings model ID to use.""" + auth_header: dict = Field(default_factory=dict) + + def __init__(self, **kwargs: Any): + super().__init__(**kwargs) + self.auth_header = {"Authorization": "Basic {}".format(self.api_token)} + + def embed_documents(self, texts: List[str]) -> List[List[float]]: + """Embed documents using a Bookend deployed embeddings model. + + Args: + texts: The list of texts to embed. + + Returns: + List of embeddings, one for each text. + """ + result = [] + headers = self.auth_header + headers["Content-Type"] = "application/json; charset=utf-8" + params = { + "model_id": self.model_id, + "task": DEFAULT_TASK, + } + + for text in texts: + data = json.dumps( + {"text": text, "question": None, "context": None, "instruction": None} + ) + r = requests.request( + "POST", + API_URL + self.domain + PATH, + headers=headers, + params=params, + data=data, + ) + result.append(r.json()[0]["data"]) + + return result + + def embed_query(self, text: str) -> List[float]: + """Embed a query using a Bookend deployed embeddings model. + + Args: + text: The text to embed. + + Returns: + Embeddings for the text. + """ + return self.embed_documents([text])[0] diff --git a/libs/langchain/tests/integration_tests/embeddings/test_bookend.py b/libs/langchain/tests/integration_tests/embeddings/test_bookend.py new file mode 100644 index 00000000000000..940f67063802cd --- /dev/null +++ b/libs/langchain/tests/integration_tests/embeddings/test_bookend.py @@ -0,0 +1,27 @@ +"""Test Bookend AI embeddings.""" +from langchain.embeddings.bookend import BookendEmbeddings + + +def test_bookend_embedding_documents() -> None: + """Test Bookend AI embeddings for documents.""" + documents = ["foo bar", "bar foo"] + embedding = BookendEmbeddings( + domain="", + api_token="", + model_id="", + ) + output = embedding.embed_documents(documents) + assert len(output) == 2 + assert len(output[0]) == 768 + + +def test_bookend_embedding_query() -> None: + """Test Bookend AI embeddings for query.""" + document = "foo bar" + embedding = BookendEmbeddings( + domain="", + api_token="", + model_id="", + ) + output = embedding.embed_query(document) + assert len(output) == 768 diff --git a/libs/langchain/tests/unit_tests/embeddings/test_imports.py b/libs/langchain/tests/unit_tests/embeddings/test_imports.py index 9de69602dc6a7a..8fe5df0994a50f 100644 --- a/libs/langchain/tests/unit_tests/embeddings/test_imports.py +++ b/libs/langchain/tests/unit_tests/embeddings/test_imports.py @@ -53,6 +53,7 @@ "QianfanEmbeddingsEndpoint", "JohnSnowLabsEmbeddings", "VoyageEmbeddings", + "BookendEmbeddings", ]