In [7]:
%pip install "sentence-transformers>=3.0.0" haystack-ai google-ai-haystack wikipedia rich

Collecting sentence-transformers>=3.0.0Note: you may need to restart the kernel to use updated packages.





  Downloading sentence_transformers-3.2.1-py3-none-any.whl.metadata (10 kB)
Collecting transformers<5.0.0,>=4.41.0 (from sentence-transformers>=3.0.0)
  Downloading transformers-4.46.2-py3-none-any.whl.metadata (44 kB)
Collecting safetensors>=0.4.1 (from transformers<5.0.0,>=4.41.0->sentence-transformers>=3.0.0)
  Downloading safetensors-0.4.5-cp312-none-win_amd64.whl.metadata (3.9 kB)
Downloading sentence_transformers-3.2.1-py3-none-any.whl (255 kB)
Downloading transformers-4.46.2-py3-none-any.whl (10.0 MB)
   ---------------------------------------- 0.0/10.0 MB ? eta -:--:--
   --------------- ------------------------ 3.9/10.0 MB 19.6 MB/s eta 0:00:01
   ---------------------------------------- 10.0/10.0 MB 27.2 MB/s eta 0:00:00
Downloading safetensors-0.4.5-cp312-none-win_amd64.whl (286 kB)
Installing collected packages: safetensors, transformers, sentence-transformers
Successfully installed safetensors-0.4.5 sentence-transformers-3.2.1 transformers-4.46.2


In [1]:
import os
import random
import wikipedia

from IPython.display import Image
from rich import print

from datasets import load_dataset
from haystack import Document, Pipeline
from haystack.components.builders import PromptBuilder
from haystack.components.embedders import SentenceTransformersDocumentEmbedder, SentenceTransformersTextEmbedder
from haystack.components.retrievers import InMemoryEmbeddingRetriever
from haystack.components.retrievers.in_memory import InMemoryBM25Retriever
from haystack.components.preprocessors import DocumentCleaner, DocumentSplitter
from haystack.components.writers import DocumentWriter
from haystack.document_stores.in_memory import InMemoryDocumentStore
from haystack.document_stores.types import DuplicatePolicy
from haystack_integrations.components.generators.google_ai import GoogleAIGeminiGenerator, GoogleAIGeminiChatGenerator




## Authorization

- You need an Google API Key, one can be obtained at https://aistudio.google.com/app/apikey

In [2]:
os.environ["GOOGLE_API_KEY"] = "AIzaSyDXGatm2MlsXhtMn-fzPIwAnucdgU4hwIs"

## RAG with Gemma (about Rock music) 🎸

### Load data from Wikipedia

In [3]:
favourite_bands="""Audioslave
Blink-182
Dire Straits
Evanescence
Green Day
Muse (band)
Nirvana (band)
Sum 41
The Cure
The Smiths""".split("\n")

In [4]:
raw_docs=[]

for title in favourite_bands:
    page = wikipedia.page(title=title, auto_suggest=False)
    doc = Document(content=page.content, meta={"title": page.title, "url":page.url})
    raw_docs.append(doc)

### Indexing Pipeline

In [5]:
document_store = InMemoryDocumentStore()

In [6]:
embedder = SentenceTransformersDocumentEmbedder("sentence-transformers/all-MiniLM-L6-v2")
embedder.warm_up()

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [7]:
indexing = Pipeline()
indexing.add_component("cleaner", DocumentCleaner())
indexing.add_component("splitter", DocumentSplitter(split_by='sentence', split_length=2))
indexing.add_component("embedder", embedder)
indexing.add_component("writer", DocumentWriter(document_store=document_store, policy=DuplicatePolicy.OVERWRITE))
indexing.connect("cleaner", "splitter")
indexing.connect("splitter", "embedder")
indexing.connect("embedder", "writer")

<haystack.core.pipeline.pipeline.Pipeline object at 0x0000019782E01160>
🚅 Components
  - cleaner: DocumentCleaner
  - splitter: DocumentSplitter
  - embedder: SentenceTransformersDocumentEmbedder
  - writer: DocumentWriter
🛤️ Connections
  - cleaner.documents -> splitter.documents (List[Document])
  - splitter.documents -> embedder.documents (List[Document])
  - embedder.documents -> writer.documents (List[Document])

In [8]:
indexing.run({"cleaner":{"documents":raw_docs}})

Batches:   0%|          | 0/51 [00:00<?, ?it/s]

  attn_output = torch.nn.functional.scaled_dot_product_attention(


{'writer': {'documents_written': 1610}}

In [12]:
print(document_store.filter_documents()[0])

### RAG Pipeline

In [None]:
prompt_template = """
<start_of_turn>user
Using the information contained in the context, give a comprehensive answer to the question.
If the answer is contained in the context, also report the source URL.
If the answer cannot be deduced from the context, do not give an answer.

Context:
  {% for doc in documents %}
  {{ doc.content }} URL:{{ doc.meta['url'] }}
  {% endfor %};
  Question: {{query}}<end_of_turn>

<start_of_turn>model
"""
prompt_builder = PromptBuilder(template=prompt_template)

In [None]:
retriever = InMemoryEmbeddingRetriever(document_store=document_store, top_k=5)
generator = GoogleAIGeminiGenerator(model="gemini-1.5-flash-latest")
query_embedder = SentenceTransformersTextEmbedder("sentence-transformers/all-MiniLM-L6-v2")

rag = Pipeline()
rag.add_component("text_embedder", query_embedder)
rag.add_component("retriever", retriever)
rag.add_component("prompt_builder", prompt_builder)
rag.add_component("llm", generator)

rag.connect("text_embedder.embedding", "retriever.query_embedding")
rag.connect("retriever", "prompt_builder.documents")
rag.connect("prompt_builder", "llm")


<haystack.core.pipeline.pipeline.Pipeline object at 0x000001980BDEFA40>
🚅 Components
  - text_embedder: SentenceTransformersTextEmbedder
  - retriever: InMemoryEmbeddingRetriever
  - prompt_builder: PromptBuilder
  - llm: GoogleAIGeminiGenerator
🛤️ Connections
  - text_embedder.embedding -> retriever.query_embedding (List[float])
  - retriever.documents -> prompt_builder.documents (List[Document])
  - prompt_builder.prompt -> llm.parts (str)

In [29]:
def get_generative_answer(query, rag_model=rag):

  results = rag_model.run({
      "text_embedder": {"text": query},
      "prompt_builder": {"query": query}
    }
  )

  answer = results["llm"]["replies"][0]  
  return answer

In [31]:
print(get_generative_answer("Audioslave was formed by members of two iconic bands. Can you name the bands and discuss the sound of Audioslave in comparison?"))

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

In [73]:
questions="""What was the original name of Sum 41?
What is the name of the lead singer of Blink-182?
What was the title of Nirvana's breakthrough album released in 1991?
What does Rhodes Statue look like?
Green Day's "American Idiot" is a rock opera. What's the story it tells?
Who was the lead singer of The Cure?
Audioslave was formed by members of two iconic bands. Can you name the bands and discuss the sound of Audioslave in comparison?
Evanescence's "Bring Me to Life" features a male vocalist. Who is he, and how does his voice complement Amy Lee's in the song?
What is Sum 41's debut studio album called?
How many languages are there?
What did Einstein win the Nobel Prize for?
What What does Rhodes Statue look like?
Who was the lead singer of Audioslave?
Who has the most followers on Instagram?
When was Nirvana's first studio album, "Bleach," released?
Were the Smiths an influential band?
What is the name of Evanescence's debut album?
Which band was Morrissey the lead singer of before he formed The Smiths?
Whose the father of Luke Skywalker?
What type of music plays Coldplay?
Dire Straits' hit song "Money for Nothing" features a guest vocal by a famous artist. Who is this artist?
Who played the song "Like a stone"?
What is the most interesting thing you know?""".split('\n')

In [74]:
q=random.choice(questions)
print(q)
get_generative_answer(q)

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

'Nirvana\'s first album, "Bleach," was released in June 1989. \n\nSource: https://en.wikipedia.org/wiki/Nirvana_(band) \n'

In [106]:
critic_prompt_template = """
<start_of_turn>user
Decide if the following answer is consistent with the corresponding sources. Note that 
consistency means all information in the answer is supported by the sources.

Sources: [
  {% for doc in documents %}
  {{ doc.content }} URL:{{ doc.meta['url'] }}
  {% endfor %};
]
Answer: [{{answer}}]

Explain your reasoning step by step then answer (yes or no) the question.
ANSWER: <end_of_turn>

<start_of_turn>model
"""
critic_prompt_builder = PromptBuilder(template=critic_prompt_template)

critic_rag = Pipeline()
critic_rag.add_component("text_embedder", SentenceTransformersTextEmbedder("sentence-transformers/all-MiniLM-L6-v2"))
critic_rag.add_component("retriever", InMemoryEmbeddingRetriever(document_store=document_store, top_k=5))
critic_rag.add_component("prompt_builder", critic_prompt_builder)
critic_rag.add_component("llm", GoogleAIGeminiGenerator(model="gemini-1.5-flash-latest"))

critic_rag.connect("text_embedder.embedding", "retriever.query_embedding")
critic_rag.connect("retriever", "prompt_builder.documents")
critic_rag.connect("prompt_builder", "llm")

<haystack.core.pipeline.pipeline.Pipeline object at 0x00000198118A8530>
🚅 Components
  - text_embedder: SentenceTransformersTextEmbedder
  - retriever: InMemoryEmbeddingRetriever
  - prompt_builder: PromptBuilder
  - llm: GoogleAIGeminiGenerator
🛤️ Connections
  - text_embedder.embedding -> retriever.query_embedding (List[float])
  - retriever.documents -> prompt_builder.documents (List[Document])
  - prompt_builder.prompt -> llm.parts (str)

In [107]:
def get_critic_answer(query, rag_model=rag, critic_model=critic_rag): 
  print(f"Query: {query}")
  answer = get_generative_answer(query, rag_model)
  print(f"Model answer:\n{answer}")
  
  results = critic_model.run({
      "text_embedder": {"text": query},
      "prompt_builder": {"answer": answer}
    }
  )
  
  critic_answer = results["llm"]["replies"][0]  
  return critic_answer


q=random.choice(nice_questions_to_try)
print(q)
print(f"Critic answer: \n{get_critic_answer(q)}")

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

In [104]:
import time
for q in questions:  
  print(f"Critic answer: \n{get_critic_answer(q)}")
  time.sleep(12)

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

KeyboardInterrupt: 

__________________________________________________________________

Unsafe prompt

In [91]:
unsafe_prompt_template = """
<start_of_turn>user
Using the information contained in the context, give a comprehensive answer to the question.
If the answer is contained in the context, also report the source URL.
If you don't know the answer, tell me a joke instead.


Context:
  {% for doc in documents %}
  {{ doc.content }} URL:{{ doc.meta['url'] }}
  {% endfor %};
  Question: {{query}}<end_of_turn>

<start_of_turn>model
"""
unsafe_prompt_builder = PromptBuilder(template=unsafe_prompt_template)

unsafe_rag = Pipeline()
unsafe_rag.add_component("text_embedder", SentenceTransformersTextEmbedder("sentence-transformers/all-MiniLM-L6-v2"))
unsafe_rag.add_component("retriever", InMemoryEmbeddingRetriever(document_store=document_store, top_k=5))
unsafe_rag.add_component("prompt_builder", unsafe_prompt_builder)
unsafe_rag.add_component("llm", GoogleAIGeminiGenerator(model="gemini-1.5-flash-latest"))

unsafe_rag.connect("text_embedder.embedding", "retriever.query_embedding")
unsafe_rag.connect("retriever", "prompt_builder.documents")
unsafe_rag.connect("prompt_builder", "llm")

<haystack.core.pipeline.pipeline.Pipeline object at 0x000001980BDEFE60>
🚅 Components
  - text_embedder: SentenceTransformersTextEmbedder
  - retriever: InMemoryEmbeddingRetriever
  - prompt_builder: PromptBuilder
  - llm: GoogleAIGeminiGenerator
🛤️ Connections
  - text_embedder.embedding -> retriever.query_embedding (List[float])
  - retriever.documents -> prompt_builder.documents (List[Document])
  - prompt_builder.prompt -> llm.parts (str)

In [97]:
print(get_critic_answer("What is the most interesting thing you know?", rag_model=unsafe_rag, critic_model=critic_rag))

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

___________________________________________