<a href="https://colab.research.google.com/github/nickprock/haystack-cookbook/blob/main/notebooks/fastembed_haystack.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# RAG pipeline using FastEmbed embeddings, QdrantDocumentStore and Zephyr-7B with Haystack

## Install dependencies

In [None]:
!pip install fastembed-haystack qdrant-haystack wikipedia


In [None]:
import warnings

warnings.filterwarnings("ignore")

### Use your Hugging Face Token

In [None]:
from getpass import getpass
import os

os.environ["HF_API_TOKEN"] = getpass("Enter your Hugging Face Token: https://huggingface.co/settings/tokens ")

## Download contents and create docs

In [None]:
favourite_bands="""Audioslave
Green Day
Muse (band)
Foo Fighters (band)
Nirvana (band)""".split("\n")

In [None]:
import wikipedia
from haystack.dataclasses import Document

raw_docs=[]

for title in favourite_bands:
    page = wikipedia.page(title=title, auto_suggest=False)
    doc = Document(content=page.content, meta={"title": page.title, "url":page.url})
    raw_docs.append(doc)

## Clean, split and index documents on Qdrant

In [None]:
from haystack_integrations.document_stores.qdrant import QdrantDocumentStore
from haystack.components.preprocessors import DocumentCleaner, DocumentSplitter
from haystack_integrations.components.embedders.fastembed import FastembedDocumentEmbedder
from haystack.components.writers import DocumentWriter
from haystack.document_stores.types import DuplicatePolicy

In [None]:
document_store = document_store = QdrantDocumentStore(
    ":memory:",
    embedding_dim = 768,
    recreate_index=True,
    return_embedding=True,
    wait_result_from_api=True,
)

In [None]:
cleaner = DocumentCleaner()
splitter = DocumentSplitter(split_by='sentence', split_length=2)

splitted_docs = splitter.run(cleaner.run(raw_docs)["documents"])

In [None]:
document_embedder = FastembedDocumentEmbedder(model="BAAI/bge-base-en-v1.5", parallel = 0, meta_fields_to_embed=["title"])
document_embedder.warm_up()
documents_with_embeddings = document_embedder.run(splitted_docs["documents"])

In [None]:
document_store.write_documents(documents_with_embeddings.get("documents"), policy=DuplicatePolicy.OVERWRITE)

## RAG Pipeline using Zephyr-7B

In [None]:
from haystack import Pipeline
from haystack_integrations.components.retrievers.qdrant import QdrantEmbeddingRetriever
from haystack_integrations.components.embedders.fastembed import FastembedTextEmbedder
from haystack.components.generators import HuggingFaceTGIGenerator
from haystack.components.builders.prompt_builder import PromptBuilder

from pprint import pprint

In [None]:
generator = HuggingFaceTGIGenerator("HuggingFaceH4/zephyr-7b-beta",
                                    generation_kwargs={"max_new_tokens":500})
generator.warm_up()

In [None]:
# define the prompt template

prompt_template = """
Using only the information contained in these documents return a brief answer (max 50 words).
If the answer cannot be inferred from the documents, respond \"I don't know\".
Documents:
{% for doc in documents %}
    {{ doc.content }}
{% endfor %}
Question: {{question}}
Answer:
"""

In [None]:
query_pipeline = Pipeline()
query_pipeline.add_component("text_embedder", FastembedTextEmbedder(model="BAAI/bge-base-en-v1.5", prefix="query:"))
query_pipeline.add_component("retriever", QdrantEmbeddingRetriever(document_store=document_store))
query_pipeline.add_component("prompt_builder", PromptBuilder(template=prompt_template))
query_pipeline.add_component("generator", generator)



In [None]:
# connect the components
query_pipeline.connect("text_embedder.embedding", "retriever.query_embedding")
query_pipeline.connect("retriever.documents", "prompt_builder.documents")
query_pipeline.connect("prompt_builder", "generator")

## Try the pipeline

In [None]:
question = "Who is Dave Grohl?"

results = query_pipeline.run(
    {   "text_embedder": {"text": question},
        "prompt_builder": {"question": question},
    }
)



In [None]:
for d in results['generator']['replies']:
  pprint(d)