Skip to content

Commit

Permalink
Add input_type override (langchain-ai#14068)
Browse files Browse the repository at this point in the history
Add option to override input_type for cohere's v3 embeddings models

---------

Co-authored-by: Bagatur <baskaryan@gmail.com>
  • Loading branch information
2 people authored and aymeric-roucher committed Dec 11, 2023
1 parent 25eacbb commit 27ddfa7
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 44 deletions.
40 changes: 29 additions & 11 deletions docs/docs/integrations/text_embedding/cohere.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 2,
"id": "6b82f59f",
"metadata": {},
"outputs": [],
Expand All @@ -22,17 +22,17 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 3,
"id": "26895c60",
"metadata": {},
"outputs": [],
"source": [
"embeddings = CohereEmbeddings(cohere_api_key=cohere_api_key)"
"embeddings = CohereEmbeddings(model=\"embed-english-light-v3.0\")"
]
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 4,
"id": "eea52814",
"metadata": {},
"outputs": [],
Expand All @@ -42,22 +42,40 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 5,
"id": "fbe167bf",
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[-0.072631836, 0.06921387, -0.02658081, 0.022705078, 0.027328491, 0.046905518, -0.01838684, -0.029525757, 0.0041046143, -0.028198242, 0.0496521, 0.026901245, 0.03274536, 0.01574707, -0.081726074, -0.022369385, 0.049591064, 0.06549072, -0.015083313, -0.053863525, 0.098083496, 0.034698486, -0.08557129, -0.0024662018, -0.07519531, 0.03265381, 0.006046295, -0.0060691833, 0.032196045, 0.07537842, 9.024143e-05, -0.00869751, 0.022735596, 0.06329346, 0.068481445, -0.006778717, -0.07885742, 0.049560547, -0.008811951, 0.025253296, 0.050750732, -0.05343628, 0.051361084, -0.02319336, 0.026382446, 0.088378906, 0.03567505, -0.0736084, 0.039215088, -0.020584106, -0.03112793, -0.071777344, 0.018218994, -0.01876831, 0.040863037, 0.080078125, 0.046020508, -0.030792236, -0.011779785, -0.024871826, -0.06652832, 0.04748535, -0.038116455, 0.08453369, 0.08746338, 0.059509277, -0.037628174, -0.045410156, -0.054626465, -0.0036334991, -0.035949707, -0.011070251, 0.054534912, 0.0803833, 0.052734375, 0.06689453, 0.0074310303, 0.018249512, -0.023773193, 0.03845215, -0.113220215, 0.014251709, 0.028289795, -0.03942871, 0.029525757, 0.03036499, 0.035095215, 0.031829834, -0.0015306473, 0.027252197, 0.005088806, -0.035858154, -0.113220215, 0.021606445, 0.012046814, -0.06137085, 0.0057640076, -0.06994629, 0.02532959, 0.016952515, -0.010398865, -0.0066184998, -0.020904541, -0.12030029, 0.0036029816, -0.061553955, 0.023956299, -0.07330322, 0.013053894, -0.009613037, -0.062683105, 0.00013184547, 0.12030029, 0.028167725, 0.048614502, -0.09301758, -0.020324707, 0.022369385, -0.14025879, -0.052764893, 0.07220459, 0.028198242, 0.01499939, -0.029449463, 0.004711151, -0.05947876, 0.1640625, -0.09240723, 0.019500732, -0.0031089783, 0.0032081604, -0.0049934387, -0.01676941, 0.002691269, 0.02848816, 0.013504028, -0.057800293, 0.049041748, -0.022384644, 0.05517578, -0.031982422, 0.055389404, 0.0859375, 0.019866943, -0.052978516, 0.030929565, -0.15979004, 0.068481445, -0.020080566, -0.033477783, 0.07922363, -0.020736694, -0.025680542, 0.054016113, -0.028839111, -0.016189575, 0.03564453, 0.0001078248, 0.06304932, -0.022781372, 0.06555176, 0.010093689, 0.03286743, 0.14111328, -0.008468628, -0.04849243, 0.04525757, 0.065979004, -0.012138367, -0.017044067, 0.059509277, 0.035339355, -0.017807007, -0.027267456, -0.0034656525, -0.02078247, -0.033477783, 0.05041504, -0.043518066, -0.064208984, 0.034942627, -0.009300232, -0.08148193, 0.007774353, -0.03540039, -0.008255005, -0.1060791, -0.0703125, 0.091308594, 0.10095215, -0.081970215, 0.02355957, -0.026382446, -0.0070610046, -0.051208496, -0.014961243, 0.07269287, -0.033721924, 0.017669678, -0.08972168, 0.035339355, 0.03579712, -0.07299805, -0.014144897, -0.008850098, 0.023742676, -0.05847168, -0.07873535, -0.015388489, -0.039642334, -0.028930664, 0.008926392, -0.040283203, -0.02897644, -0.013557434, -0.006088257, 0.024169922, -0.10217285, 0.014526367, 0.007381439, -0.0005607605, -0.058410645, -0.008399963, -0.08001709, 0.05065918, 0.01727295, 0.012191772, -0.016571045, 0.03717041, -0.02607727, 0.060760498, 0.057678223, -0.06585693, 0.059173584, 0.023117065, -0.034118652, -0.03189087, 0.010429382, 0.010368347, -0.011230469, -0.020980835, -0.04019165, 0.048187256, -0.019638062, -0.024414062, -0.0019989014, 0.04336548, 0.117248535, 0.00033903122, -0.0014419556, 0.013946533, -0.11541748, 0.030059814, -0.06500244, 0.05441284, 0.021759033, 0.030380249, 0.080566406, 0.02331543, -0.04586792, 0.037322998, 0.011390686, -0.01374054, 0.1459961, -0.050964355, 0.081970215, -0.061645508, 0.07067871, -0.036956787, 0.060455322, 0.051361084, -0.05831909, 0.05328369, -0.008628845, 0.054534912, -0.047332764, 0.030578613, -0.048828125, -0.018112183, 0.022979736, -0.07318115, -0.0423584, -0.094177246, -0.04071045, 0.054260254, 0.0423584, 0.075805664, -0.06365967, 0.009269714, -0.054779053, -0.007637024, -0.01876831, 0.08453369, 0.058898926, -0.07727051, 0.04360962, 0.010574341, -0.027694702, 0.024917603, -0.0463562, 0.040222168, -0.05496216, -0.048461914, 0.013710022, -0.1038208, 0.027954102, 0.031951904, -0.05618286, 0.0025730133, -0.06549072, -0.049957275, 0.01499939, -0.11090088, -0.009017944, 0.021835327, 0.03503418, 0.058746338, -0.12756348, -0.0345459, -0.04699707, -0.029830933, -0.06726074, 0.010612488, -0.024108887, 0.016464233, 0.013076782, -0.06298828, -0.0657959, -0.0025234222, -0.0625, 0.013420105, 0.05810547, -0.006362915, -0.028625488, 0.06085205, 0.12310791, 0.04751587, -0.027740479, -0.02029419, -0.02293396, 0.048858643, -0.006793976, -0.0061073303, 0.029067993, -0.0076942444, -0.00088596344, -0.007446289, 0.12756348, 0.082092285, -0.0037841797, 0.03866577, 0.040374756, 0.019104004, -0.0345459, 0.019042969, -0.038116455, 0.045410156, 0.062683105, -0.024963379, 0.085632324, 0.005897522, 0.008285522, 0.008811951, 0.026504517, 0.025558472, -0.005554199, -0.017822266, -0.112854004, -0.03768921, -0.00097227097, -0.061401367, 0.050567627, -0.010734558, 0.07220459, 0.03643799, 0.0007662773, -0.020980835, -0.04711914, -0.03488159, -0.09655762, 0.0048561096, 0.028030396, 0.04586792, -0.014915466]\n"
]
}
],
"source": [
"query_result = embeddings.embed_query(text)"
"query_result = embeddings.embed_query(text)\n",
"print(query_result)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 6,
"id": "38ad3b20",
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[[-0.072631836, 0.06921387, -0.02658081, 0.022705078, 0.027328491, 0.046905518, -0.01838684, -0.029525757, 0.0041046143, -0.028198242, 0.0496521, 0.026901245, 0.03274536, 0.01574707, -0.081726074, -0.022369385, 0.049591064, 0.06549072, -0.015083313, -0.053863525, 0.098083496, 0.034698486, -0.08557129, -0.0024662018, -0.07519531, 0.03265381, 0.006046295, -0.0060691833, 0.032196045, 0.07537842, 9.024143e-05, -0.00869751, 0.022735596, 0.06329346, 0.068481445, -0.006778717, -0.07885742, 0.049560547, -0.008811951, 0.025253296, 0.050750732, -0.05343628, 0.051361084, -0.02319336, 0.026382446, 0.088378906, 0.03567505, -0.0736084, 0.039215088, -0.020584106, -0.03112793, -0.071777344, 0.018218994, -0.01876831, 0.040863037, 0.080078125, 0.046020508, -0.030792236, -0.011779785, -0.024871826, -0.06652832, 0.04748535, -0.038116455, 0.08453369, 0.08746338, 0.059509277, -0.037628174, -0.045410156, -0.054626465, -0.0036334991, -0.035949707, -0.011070251, 0.054534912, 0.0803833, 0.052734375, 0.06689453, 0.0074310303, 0.018249512, -0.023773193, 0.03845215, -0.113220215, 0.014251709, 0.028289795, -0.03942871, 0.029525757, 0.03036499, 0.035095215, 0.031829834, -0.0015306473, 0.027252197, 0.005088806, -0.035858154, -0.113220215, 0.021606445, 0.012046814, -0.06137085, 0.0057640076, -0.06994629, 0.02532959, 0.016952515, -0.010398865, -0.0066184998, -0.020904541, -0.12030029, 0.0036029816, -0.061553955, 0.023956299, -0.07330322, 0.013053894, -0.009613037, -0.062683105, 0.00013184547, 0.12030029, 0.028167725, 0.048614502, -0.09301758, -0.020324707, 0.022369385, -0.14025879, -0.052764893, 0.07220459, 0.028198242, 0.01499939, -0.029449463, 0.004711151, -0.05947876, 0.1640625, -0.09240723, 0.019500732, -0.0031089783, 0.0032081604, -0.0049934387, -0.01676941, 0.002691269, 0.02848816, 0.013504028, -0.057800293, 0.049041748, -0.022384644, 0.05517578, -0.031982422, 0.055389404, 0.0859375, 0.019866943, -0.052978516, 0.030929565, -0.15979004, 0.068481445, -0.020080566, -0.033477783, 0.07922363, -0.020736694, -0.025680542, 0.054016113, -0.028839111, -0.016189575, 0.03564453, 0.0001078248, 0.06304932, -0.022781372, 0.06555176, 0.010093689, 0.03286743, 0.14111328, -0.008468628, -0.04849243, 0.04525757, 0.065979004, -0.012138367, -0.017044067, 0.059509277, 0.035339355, -0.017807007, -0.027267456, -0.0034656525, -0.02078247, -0.033477783, 0.05041504, -0.043518066, -0.064208984, 0.034942627, -0.009300232, -0.08148193, 0.007774353, -0.03540039, -0.008255005, -0.1060791, -0.0703125, 0.091308594, 0.10095215, -0.081970215, 0.02355957, -0.026382446, -0.0070610046, -0.051208496, -0.014961243, 0.07269287, -0.033721924, 0.017669678, -0.08972168, 0.035339355, 0.03579712, -0.07299805, -0.014144897, -0.008850098, 0.023742676, -0.05847168, -0.07873535, -0.015388489, -0.039642334, -0.028930664, 0.008926392, -0.040283203, -0.02897644, -0.013557434, -0.006088257, 0.024169922, -0.10217285, 0.014526367, 0.007381439, -0.0005607605, -0.058410645, -0.008399963, -0.08001709, 0.05065918, 0.01727295, 0.012191772, -0.016571045, 0.03717041, -0.02607727, 0.060760498, 0.057678223, -0.06585693, 0.059173584, 0.023117065, -0.034118652, -0.03189087, 0.010429382, 0.010368347, -0.011230469, -0.020980835, -0.04019165, 0.048187256, -0.019638062, -0.024414062, -0.0019989014, 0.04336548, 0.117248535, 0.00033903122, -0.0014419556, 0.013946533, -0.11541748, 0.030059814, -0.06500244, 0.05441284, 0.021759033, 0.030380249, 0.080566406, 0.02331543, -0.04586792, 0.037322998, 0.011390686, -0.01374054, 0.1459961, -0.050964355, 0.081970215, -0.061645508, 0.07067871, -0.036956787, 0.060455322, 0.051361084, -0.05831909, 0.05328369, -0.008628845, 0.054534912, -0.047332764, 0.030578613, -0.048828125, -0.018112183, 0.022979736, -0.07318115, -0.0423584, -0.094177246, -0.04071045, 0.054260254, 0.0423584, 0.075805664, -0.06365967, 0.009269714, -0.054779053, -0.007637024, -0.01876831, 0.08453369, 0.058898926, -0.07727051, 0.04360962, 0.010574341, -0.027694702, 0.024917603, -0.0463562, 0.040222168, -0.05496216, -0.048461914, 0.013710022, -0.1038208, 0.027954102, 0.031951904, -0.05618286, 0.0025730133, -0.06549072, -0.049957275, 0.01499939, -0.11090088, -0.009017944, 0.021835327, 0.03503418, 0.058746338, -0.12756348, -0.0345459, -0.04699707, -0.029830933, -0.06726074, 0.010612488, -0.024108887, 0.016464233, 0.013076782, -0.06298828, -0.0657959, -0.0025234222, -0.0625, 0.013420105, 0.05810547, -0.006362915, -0.028625488, 0.06085205, 0.12310791, 0.04751587, -0.027740479, -0.02029419, -0.02293396, 0.048858643, -0.006793976, -0.0061073303, 0.029067993, -0.0076942444, -0.00088596344, -0.007446289, 0.12756348, 0.082092285, -0.0037841797, 0.03866577, 0.040374756, 0.019104004, -0.0345459, 0.019042969, -0.038116455, 0.045410156, 0.062683105, -0.024963379, 0.085632324, 0.005897522, 0.008285522, 0.008811951, 0.026504517, 0.025558472, -0.005554199, -0.017822266, -0.112854004, -0.03768921, -0.00097227097, -0.061401367, 0.050567627, -0.010734558, 0.07220459, 0.03643799, 0.0007662773, -0.020980835, -0.04711914, -0.03488159, -0.09655762, 0.0048561096, 0.028030396, 0.04586792, -0.014915466]]\n"
]
}
],
"source": [
"doc_result = embeddings.embed_documents([text])"
"doc_result = embeddings.embed_documents([text])\n",
"print(doc_result)"
]
},
{
Expand Down Expand Up @@ -85,7 +103,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.1"
"version": "3.11.5"
},
"vscode": {
"interpreter": {
Expand Down
64 changes: 31 additions & 33 deletions libs/langchain/langchain/embeddings/cohere.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
from typing import Any, Dict, List, Optional

from langchain_core.embeddings import Embeddings
from langchain_core.pydantic_v1 import BaseModel, Extra, root_validator

from langchain.pydantic_v1 import BaseModel, Extra, root_validator
from langchain.schema.embeddings import Embeddings
from langchain.utils import get_from_dict_or_env


Expand All @@ -18,7 +17,8 @@ class CohereEmbeddings(BaseModel, Embeddings):
from langchain.embeddings import CohereEmbeddings
cohere = CohereEmbeddings(
model="embed-english-light-v3.0", cohere_api_key="my-api-key"
model="embed-english-light-v3.0",
cohere_api_key="my-api-key"
)
"""

Expand Down Expand Up @@ -78,22 +78,38 @@ def validate_environment(cls, values: Dict) -> Dict:
)
return values

def embed(
self, texts: List[str], *, input_type: Optional[str] = None
) -> List[List[float]]:
embeddings = self.client.embed(
model=self.model,
texts=texts,
input_type=input_type,
truncate=self.truncate,
).embeddings
return [list(map(float, e)) for e in embeddings]

async def aembed(
self, texts: List[str], *, input_type: Optional[str] = None
) -> List[List[float]]:
embeddings = await self.async_client.embed(
model=self.model,
texts=texts,
input_type=input_type,
truncate=self.truncate,
).embeddings
return [list(map(float, e)) for e in embeddings]

def embed_documents(self, texts: List[str]) -> List[List[float]]:
"""Call out to Cohere's embedding endpoint.
"""Embed a list of document texts.
Args:
texts: The list of texts to embed.
Returns:
List of embeddings, one for each text.
"""
embeddings = self.client.embed(
model=self.model,
texts=texts,
input_type="search_document",
truncate=self.truncate,
).embeddings
return [list(map(float, e)) for e in embeddings]
return self.embed(texts, input_type="search_document")

async def aembed_documents(self, texts: List[str]) -> List[List[float]]:
"""Async call out to Cohere's embedding endpoint.
Expand All @@ -104,13 +120,7 @@ async def aembed_documents(self, texts: List[str]) -> List[List[float]]:
Returns:
List of embeddings, one for each text.
"""
embeddings = await self.async_client.embed(
model=self.model,
texts=texts,
input_type="search_document",
truncate=self.truncate,
)
return [list(map(float, e)) for e in embeddings.embeddings]
return await self.aembed(texts, input_type="search_document")

def embed_query(self, text: str) -> List[float]:
"""Call out to Cohere's embedding endpoint.
Expand All @@ -121,13 +131,7 @@ def embed_query(self, text: str) -> List[float]:
Returns:
Embeddings for the text.
"""
embeddings = self.client.embed(
model=self.model,
texts=[text],
input_type="search_query",
truncate=self.truncate,
).embeddings
return [list(map(float, e)) for e in embeddings][0]
return self.embed([text], input_type="search_query")[0]

async def aembed_query(self, text: str) -> List[float]:
"""Async call out to Cohere's embedding endpoint.
Expand All @@ -138,10 +142,4 @@ async def aembed_query(self, text: str) -> List[float]:
Returns:
Embeddings for the text.
"""
embeddings = await self.async_client.embed(
model=self.model,
texts=[text],
input_type="search_query",
truncate=self.truncate,
)
return [list(map(float, e)) for e in embeddings.embeddings][0]
return (await self.aembed([text], input_type="search_query"))[0]

0 comments on commit 27ddfa7

Please sign in to comment.