From ee836cbe2829d6d98deb0cb5e92b2fb650d95343 Mon Sep 17 00:00:00 2001 From: Tuana Celik Date: Wed, 27 Dec 2023 14:50:31 +0000 Subject: [PATCH 1/3] adding Haystack --- docs/integrations/haystack.md | 81 +++++++++++++++++++++++++++++++++++ docs/integrations/index.md | 1 + 2 files changed, 82 insertions(+) create mode 100644 docs/integrations/haystack.md diff --git a/docs/integrations/haystack.md b/docs/integrations/haystack.md new file mode 100644 index 00000000..225e0cda --- /dev/null +++ b/docs/integrations/haystack.md @@ -0,0 +1,81 @@ +--- +slug: /integrations/haystack +title: πŸ’™ Haystack +--- + +[Haystack](https://github.com/deepset-ai/haystack) is an open-source LLM framework in Python. It provides tooling for all steps of the NLP application building life-cycle. This includes tooling to prepare and embed documents into vector databases, as well as building fully customizable LLM pipelines such as retrieval-augmented generative (RAG) pipelines. + + + +|[Docs](https://docs.haystack.deepset.ai/v2.0/docs) | [Github](https://github.com/deepset-ai/haystack) | [Haystack Integrations](https://haystack.deepset.ai/integrations) | [Tutorials](https://haystack.deepset.ai/tutorials) | + +You can use Chroma together with Haystack by installing the integration and using the `ChromaDocumentStore` + +### Installation + +```bash +pip install chroma-haystack +``` + +### Usage + +- The [Chroma Integration page](https://haystack.deepset.ai/integrations/chroma-documentstore) +- [Chroma + Haystack Example](https://colab.research.google.com/drive/1YpDetI8BRbObPDEVdfqUcwhEX9UUXP-m?usp=sharing) + +#### Write documents into a ChromaDocumentStore + +```python +import os +from pathlib import Path + +from haystack import Pipeline +from haystack.components.converters import TextFileToDocument +from haystack.components.writers import DocumentWriter +from chroma_haystack import ChromaDocumentStore + +file_paths = ["data" / Path(name) for name in os.listdir("data")] + +document_store = ChromaDocumentStore() + +indexing = Pipeline() +indexing.add_component("converter", TextFileToDocument()) +indexing.add_component("writer", DocumentWriter(document_store)) + +indexing.connect("converter", "writer") +indexing.run({"converter": {"sources": file_paths}}) +``` + +#### Build RAG on top of Chroma + +```python +from chroma_haystack.retriever import ChromaQueryRetriever +from haystack.components.generators import HuggingFaceTGIGenerator +from haystack.components.builders import PromptBuilder + +prompt = """ +Answer the query based on the provided context. +If the context does not contain the answer, say 'Answer not found'. +Context: +{% for doc in documents %} + {{ doc.content }} +{% endfor %} +query: {{query}} +Answer: +""" +prompt_builder = PromptBuilder(template=prompt) + +llm = HuggingFaceTGIGenerator(model="mistralai/Mixtral-8x7B-Instruct-v0.1", token='YOUR_HF_TOKEN') +llm.warm_up() +retriever = ChromaQueryRetriever(document_store) + +querying = Pipeline() +querying.add_component("retriever", retriever) +querying.add_component("prompt_builder", prompt_builder) +querying.add_component("llm", llm) + +querying.connect("retriever.documents", "prompt_builder.documents") +querying.connect("prompt_builder", "llm") + +results = querying.run({"retriever": {"queries": [query], "top_k": 3}, + "prompt_builder": {"query": query}}) +``` \ No newline at end of file diff --git a/docs/integrations/index.md b/docs/integrations/index.md index 77e45dc7..797d0a4a 100644 --- a/docs/integrations/index.md +++ b/docs/integrations/index.md @@ -19,6 +19,7 @@ We welcome pull requests to add new Integrations to the community. | [Braintrust](/integrations/braintrust) | βœ… | βœ… | | [πŸ”­ OpenLLMetry](/integrations/openllmetry) | βœ… | :soon: | | [🎈 Streamlit](/integrations/streamlit) | βœ… | βž– | +| [πŸ’™ Haystack](/integrations/haystack) | βœ… | βž– | *Coming soon* - integrations with LangSmith, JinaAI, and more. From 17589ab19682d4f2275f1ccee7b13eb689c390db Mon Sep 17 00:00:00 2001 From: Tuana Celik Date: Wed, 27 Dec 2023 15:03:45 +0000 Subject: [PATCH 2/3] add haystack to sidebar --- sidebars.js | 1 + 1 file changed, 1 insertion(+) diff --git a/sidebars.js b/sidebars.js index 3dd25e51..62c6fd88 100644 --- a/sidebars.js +++ b/sidebars.js @@ -68,6 +68,7 @@ const sidebars = { 'integrations/braintrust', 'integrations/openllmetry', 'integrations/streamlit', + 'integrations/haystack', ], }, ], From a2f04fb194e92175b4d416cf116d773303d8f435 Mon Sep 17 00:00:00 2001 From: Jeff Huber Date: Thu, 4 Jan 2024 22:44:02 -0800 Subject: [PATCH 3/3] Update docs/integrations/haystack.md MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Tuana Γ‡elik --- docs/integrations/haystack.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/integrations/haystack.md b/docs/integrations/haystack.md index 225e0cda..ebcebca5 100644 --- a/docs/integrations/haystack.md +++ b/docs/integrations/haystack.md @@ -3,7 +3,7 @@ slug: /integrations/haystack title: πŸ’™ Haystack --- -[Haystack](https://github.com/deepset-ai/haystack) is an open-source LLM framework in Python. It provides tooling for all steps of the NLP application building life-cycle. This includes tooling to prepare and embed documents into vector databases, as well as building fully customizable LLM pipelines such as retrieval-augmented generative (RAG) pipelines. +[Haystack](https://github.com/deepset-ai/haystack) is an open-source LLM framework in Python. It provides [embedders](https://docs.haystack.deepset.ai/v2.0/docs/embedders), [generators](https://docs.haystack.deepset.ai/v2.0/docs/generators) and [rankers](https://docs.haystack.deepset.ai/v2.0/docs/rankers) via a number of LLM providers, tooling for [preprocessing](https://docs.haystack.deepset.ai/v2.0/docs/preprocessors) and data preparation, connectors to a number of vector databases including Chroma and more. Haystack allows you to build custom LLM applications using both components readily available in Haystack and [custom components](https://docs.haystack.deepset.ai/v2.0/docs/custom-components). Some of the most common applications you can build with Haystack are retrieval-augmented generation pipelines (RAG), question-answering and semantic search.