Skip to content

Commit

Permalink
apply the changes (#10)
Browse files Browse the repository at this point in the history
  • Loading branch information
mhaligowski committed Nov 22, 2023
1 parent 914dd89 commit 64c373d
Show file tree
Hide file tree
Showing 6 changed files with 88 additions and 78 deletions.
Expand Up @@ -17,8 +17,13 @@ class GradientDocumentEmbedder:
The embedding of each Document is stored in the `embedding` field of the Document.
```python
embedder = GradientDocumentEmbedder(
access_token=gradient_access_token,
workspace_id=gradient_workspace_id,
model_name="bge_large"))
p = Pipeline()
p.add_component(instance=GradientDocumentEmbedder(), name="document_embedder")
p.add_component(embedder, name="document_embedder")
p.add_component(instance=GradientDocumentEmbedder(
p.add_component(instance=DocumentWriter(document_store=InMemoryDocumentStore()), name="document_writer")
p.connect("document_embedder", "document_writer")
p.run({"document_embedder": {"documents": documents}})
Expand All @@ -43,6 +48,7 @@ def __init__(
variable GRADIENT_WORKSPACE_ID.
:param host: The Gradient host. By default it uses https://api.gradient.ai/.
"""
gradientai_import.check()
self._host = host
self._model_name = model_name

Expand Down Expand Up @@ -88,7 +94,7 @@ def run(self, documents: List[Document]):
:param documents: A list of Documents to embed.
"""
if not isinstance(documents, list) or documents and not isinstance(documents[0], Document):
if not isinstance(documents, list) or documents and any(not isinstance(doc, Document) for doc in documents):
raise TypeError(
"GradientDocumentEmbedder expects a list of Documents as input."
"In case you want to embed a list of strings, please use the GradientTextEmbedder."
Expand Down
26 changes: 17 additions & 9 deletions haystack/preview/components/embedders/gradient_text_embedder.py
Expand Up @@ -11,6 +11,18 @@
class GradientTextEmbedder:
"""
A component for embedding strings using models hosted on Gradient AI (https://gradient.ai).
```python
embedder = GradientTextEmbedder(
access_token=gradient_access_token,
workspace_id=gradient_workspace_id,
model_name="bge_large")
p = Pipeline()
p.add_component(instance=embedder, name="text_embedder")
p.add_component(instance=InMemoryEmbeddingRetriever(document_store=InMemoryDocumentStore()), name="retriever")
p.connect("text_embedder", "retriever")
p.run("embed me!!!")
```
"""

def __init__(
Expand All @@ -24,22 +36,14 @@ def __init__(
"""
Create a GradientTextEmbedder component.
```python
p = Pipeline()
embedder = GradientTextEmbedder(access_token=gradient_access_token)
p.add_component(instance=embedder, name="text_embedder")
p.add_component(instance=InMemoryEmbeddingRetriever(document_store=InMemoryDocumentStore()), name="retriever")
p.connect("text_embedder", "retriever")
p.run("embed me!!!")
```
:param model_name: The name of the model to use.
:param access_token: The Gradient access token. If not provided it's read from the environment
variable GRADIENT_ACCESS_TOKEN.
:param workspace_id: The Gradient workspace ID. If not provided it's read from the environment
variable GRADIENT_WORKSPACE_ID.
:param host: The Gradient host. By default it uses https://api.gradient.ai/.
"""
gradientai_import.check()
self._host = host
self._model_name = model_name

Expand Down Expand Up @@ -77,4 +81,8 @@ def run(self, text: str):
raise RuntimeError("The embedding model has not been loaded. Please call warm_up() before running.")

result = self._embedding_model.generate_embeddings(inputs=[{"input": text}])

if (not result) or (result.embeddings is None) or (len(result.embeddings) == 0):
raise RuntimeError("The embedding model did not return any embeddings.")

return {"embedding": result.embeddings[0].embedding}
72 changes: 34 additions & 38 deletions haystack/preview/components/generators/gradient/base.py
Expand Up @@ -18,37 +18,17 @@ class GradientGenerator:
Queries the LLM using Gradient AI's SDK ('gradientai' package).
See [Gradient AI API](https://docs.gradient.ai/docs/sdk-quickstart) for more details.
"""

@overload
def __init__(
self,
*,
access_token: Optional[str] = None,
base_model_slug: str,
host: Optional[str] = None,
max_generated_token_count: Optional[int] = None,
temperature: Optional[float] = None,
top_k: Optional[int] = None,
top_p: Optional[float] = None,
workspace_id: Optional[str] = None,
) -> None:
...
@overload
def __init__(
self,
*,
access_token: Optional[str] = None,
host: Optional[str] = None,
max_generated_token_count: Optional[int] = None,
model_adapter_id: str,
temperature: Optional[float] = None,
top_k: Optional[int] = None,
top_p: Optional[float] = None,
workspace_id: Optional[str] = None,
) -> None:
...
```python
llm = GradientGenerator(
access_token=gradient_access_token,
workspace_id=gradient_workspace_id,
base_model_slug="llama2-7b-chat")
llm.warm_up()
print(llm.run(prompt="What is the meaning of life?"))
# Output: {'replies': ['42']}
```
"""

def __init__(
self,
Expand Down Expand Up @@ -78,6 +58,8 @@ def __init__(
:param workspace_id: The Gradient workspace ID. If not provided it's read from the environment
variable GRADIENT_WORKSPACE_ID.
"""
gradientai_import.check()

self._access_token = access_token
self._base_model_slug = base_model_slug
self._host = host
Expand All @@ -88,16 +70,20 @@ def __init__(
self._top_p = top_p
self._workspace_id = workspace_id

if (base_model_slug is None and model_adapter_id is None) or (
isinstance(base_model_slug, str) and isinstance(model_adapter_id, str)
):
raise ValueError("expected be provided exactly one of base_model_slug or model_adapter_id")
has_base_model_slug = base_model_slug is not None and base_model_slug != ""
has_model_adapter_id = model_adapter_id is not None and model_adapter_id != ""

if not has_base_model_slug and not has_model_adapter_id:
raise ValueError("Either base_model_slug or model_adapter_id must be provided.")
if has_base_model_slug and has_model_adapter_id:
raise ValueError("Only one of base_model_slug or model_adapter_id must be provided.")

if has_base_model_slug:
self._base_model_slug = base_model_slug
if has_model_adapter_id:
self._model_adapter_id = model_adapter_id

self._gradient = Gradient(access_token=access_token, host=host, workspace_id=workspace_id)
if isinstance(base_model_slug, str):
self._model = self._gradient.get_base_model(base_model_slug=base_model_slug)
if isinstance(model_adapter_id, str):
self._model = self._gradient.get_model_adapter(model_adapter_id=model_adapter_id)

def to_dict(self) -> Dict[str, Any]:
"""
Expand All @@ -115,6 +101,16 @@ def to_dict(self) -> Dict[str, Any]:
workspace_id=self._workspace_id,
)

def warm_up(self):
"""
Initializes the LLM model instance if it doesn't exist.
"""
if not hasattr(self, "_model"):
if isinstance(self._base_model_slug, str):
self._model = self._gradient.get_base_model(base_model_slug=self.base_model_slug)
if isinstance(self._model_adapter_id, str):
self._model = self._gradient.get_model_adapter(model_adapter_id=self.model_adapter_id)

@component.output_types(replies=List[str])
def run(self, prompt: str):
"""
Expand Down
3 changes: 0 additions & 3 deletions pyproject.toml
Expand Up @@ -227,9 +227,6 @@ dev = [
# https://opendev.org/openstack/reno/src/branch/master/requirements.txt#L7
"dulwich>=0.21.0,<1.0.0",
]
gradient = [
"gradientai==1.1.0"
]

formatting = [
# Version specified following Black stability policy:
Expand Down
@@ -1,4 +1,5 @@
import pytest
from gradientai.openapi.client.models.generate_embedding_success import GenerateEmbeddingSuccess
from haystack.preview.components.embedders.gradient_document_embedder import GradientDocumentEmbedder
from unittest.mock import MagicMock, NonCallableMagicMock
import numpy as np
Expand All @@ -11,16 +12,6 @@
model = "bge-large"


def has_gradient():
try:
import gradientai

return True
except ModuleNotFoundError:
return False


@pytest.mark.skipif(not has_gradient(), reason="Gradient is not installed")
class TestGradientDocumentEmbedder:
@pytest.mark.unit
def test_init_from_env(self, monkeypatch):
Expand Down Expand Up @@ -96,8 +87,6 @@ def test_run_fail_if_not_warmed_up(self):

@pytest.mark.unit
def test_run(self):
from gradientai.openapi.client.models.generate_embedding_success import GenerateEmbeddingSuccess

embedder = GradientDocumentEmbedder(access_token=access_token, workspace_id=workspace_id)
embedder._embedding_model = NonCallableMagicMock()
embedder._embedding_model.generate_embeddings.return_value = GenerateEmbeddingSuccess(
Expand All @@ -108,6 +97,7 @@ def test_run(self):

result = embedder.run(documents=documents)

assert embedder._embedding_model.generate_embeddings.call_count == 1
assert isinstance(result["documents"], list)
assert len(result["documents"]) == len(documents)
for doc in result["documents"]:
Expand Down Expand Up @@ -140,8 +130,6 @@ def test_run_batch(self):

@pytest.mark.unit
def test_run_empty(self):
from gradientai.openapi.client.models.generate_embedding_success import GenerateEmbeddingSuccess

embedder = GradientDocumentEmbedder(access_token=access_token, workspace_id=workspace_id)
embedder._embedding_model = NonCallableMagicMock()

Expand Down
39 changes: 27 additions & 12 deletions test/preview/components/embedders/test_gradient_text_embedder.py
@@ -1,4 +1,5 @@
import pytest
from gradientai.openapi.client.models.generate_embedding_success import GenerateEmbeddingSuccess
from haystack.preview.components.embedders.gradient_text_embedder import GradientTextEmbedder
from unittest.mock import MagicMock, NonCallableMagicMock
import numpy as np
Expand All @@ -9,16 +10,6 @@
model = "bge-large"


def has_gradient():
try:
import gradientai

return True
except ModuleNotFoundError:
return False


@pytest.mark.skipif(not has_gradient(), reason="Gradient is not installed")
class TestGradientTextEmbedder:
@pytest.mark.unit
def test_init_from_env(self, monkeypatch):
Expand Down Expand Up @@ -93,9 +84,33 @@ def test_run_fail_if_not_warmed_up(self):
embedder.run(text="The food was delicious")

@pytest.mark.unit
def test_run(self):
from gradientai.openapi.client.models.generate_embedding_success import GenerateEmbeddingSuccess
def test_run_fail_when_no_embeddings_returned(self):
embedder = GradientTextEmbedder(access_token=access_token, workspace_id=workspace_id)
embedder._embedding_model = NonCallableMagicMock()
embedder._embedding_model.generate_embeddings.return_value = GenerateEmbeddingSuccess(embeddings=[])

with pytest.raises(RuntimeError):
_result = embedder.run(text="The food was delicious")
embedder._embedding_model.generate_embeddings.assert_called_once_with(
inputs=[{"input": "The food was delicious"}]
)

@pytest.mark.unit
def test_run_empty_string(self):
embedder = GradientTextEmbedder(access_token=access_token, workspace_id=workspace_id)
embedder._embedding_model = NonCallableMagicMock()
embedder._embedding_model.generate_embeddings.return_value = GenerateEmbeddingSuccess(
embeddings=[{"embedding": np.random.rand(1024).tolist(), "index": 0}]
)

result = embedder.run(text="")
embedder._embedding_model.generate_embeddings.assert_called_once_with(inputs=[{"input": ""}])

assert len(result["embedding"]) == 1024 # 1024 is the bge-large embedding size
assert all(isinstance(x, float) for x in result["embedding"])

@pytest.mark.unit
def test_run(self):
embedder = GradientTextEmbedder(access_token=access_token, workspace_id=workspace_id)
embedder._embedding_model = NonCallableMagicMock()
embedder._embedding_model.generate_embeddings.return_value = GenerateEmbeddingSuccess(
Expand Down

0 comments on commit 64c373d

Please sign in to comment.