In [None]:
%%bash

pip install haystack-ai
pip install datasets

In [3]:
import os
from getpass import getpass

openai_api_key = os.getenv("OPENAI_API_KEY", None) or getpass("Enter OpenAI API key:")

In [21]:
from haystack.preview import Document
from datasets import load_dataset

dataset = load_dataset("bilgeyucel/seven-wonders", split="train")
docs = [Document(text = doc['content'], metadata={"title": doc["meta"]["url"] + "-" + str(doc["meta"]["_split_id"])}) for doc in dataset]

In [22]:
from haystack.preview.document_stores import MemoryDocumentStore

document_store = MemoryDocumentStore(embedding_similarity_function="cosine")
document_store.write_documents(docs)

In [23]:
document_store.count_documents()

151

In [7]:
from haystack.preview import component

@component
class PromptInspector:
    """ Inspector for a prompt """

    @component.output_types(contents=str)
    def run(self, prompt: str):
        return {"prompt": prompt}

In [24]:
docs[6]

Document(id='4f16c32027c50c8d6fd2614469cae66cfd9b660d5fe8ad60929464aeaa9a4d6b', text='The remains were described briefly by Strabo (64 or 63\xa0BC – c. 24\xa0AD), in his work Geography (Book XIV, Chapter 2.5). Strabo was a Greek geographer, philosopher, and historian who lived in Asia Minor during the transitional period of the Roman Republic into the Roman Empire.\nStrabo is best known for his work Geographica ("Geography"), which presented a descriptive history of people and places from different regions of the world known during his lifetime.[20] Strabo states that:\n\nThe city of the Rhodians lies on the eastern promontory of Rhodes; and it is so far superior to all others in harbours and roads and walls and improvements in general that I am unable to speak of any other city as equal to it, or even as almost equal to it, much less superior to it. ', array=None, dataframe=None, blob=None, mime_type='text/plain', metadata={'title': 'https://en.wikipedia.org/wiki/Colossus_of_Rhodes-6'

In [32]:
haystack_prompt = """
You are a helpful expert. \
Your answers are short and to the point. \
You must only use information from the given documents. \
Use an unbiased and journalistic tone. \ 
Do not repeat text. \
Always use references in the form [Source: NAME OF DOCUMENT] when using information from a document. e.g. [Source: 3], for Document[3]. \
The reference must only refer to the name that comes in square brackets after passage. \
If multiple documents are used to generate the answer, combine the names of these documents. e.g. [Source: 5, 8], for Document[5], Document[8]. \
Otherwise, do not use brackets in your answer and reference ONLY the name of the passage without mentioning the word passage. \
If the documents can't answer the question or you are unsure say: 'The answer cannot be found in the text directly, but I would recommend consulting [Source: X] for more information', pointing the user to the most relevant document. \
{new_line}\
These are the documents:\ 

{% for doc in documents %}
[Source: {{ doc.metadata.title }}]:
{{ doc.text }}
{% endfor %}\

{new_line}\
Question: {{question}}\
{new_line}\
Answer:\
{new_line}
"""

In [33]:
from haystack.preview import Pipeline
from haystack.preview.components.retrievers import MemoryBM25Retriever
from haystack.preview.components.generators.openai.gpt4 import GPT4Generator
from haystack.preview.components.builders.answer_builder import AnswerBuilder
from haystack.preview.components.builders.prompt_builder import PromptBuilder

pipeline = Pipeline()
pipeline.add_component(instance=MemoryBM25Retriever(document_store=document_store), name="retriever")
pipeline.add_component(instance=PromptBuilder(template=haystack_prompt), name="prompt_builder")
pipeline.add_component(instance=PromptInspector(), name="prompt_inspector")
pipeline.add_component(instance=GPT4Generator(api_key=openai_api_key), name="llm")
pipeline.add_component(instance=AnswerBuilder(), name="answer_builder")
pipeline.connect("retriever", "prompt_builder.documents")
pipeline.connect("prompt_builder", "llm")
pipeline.connect("prompt_builder", "prompt_inspector")
pipeline.connect("llm.replies", "answer_builder.replies")
pipeline.connect("llm.metadata", "answer_builder.metadata")
pipeline.connect("retriever", "answer_builder.documents")

![](./hackathon-demo.png)

In [40]:
question = "Why did people build pyramids?"
results = pipeline.run({
    "retriever": {"query": question},
    "prompt_builder": {"question": question},
    "answer_builder": {"query": question}
})

Ranking by BM25...: 100%|██████████| 151/151 [00:00<00:00, 33988.40 docs/s]



You are a helpful expert. Your answers are short and to the point. You must only use information from the given documents. Use an unbiased and journalistic tone. \ 
Do not repeat text. Always use references in the form [Source: NAME OF DOCUMENT] when using information from a document. e.g. [Source: 3], for Document[3]. The reference must only refer to the name that comes in square brackets after passage. If multiple documents are used to generate the answer, combine the names of these documents. e.g. [Source: 5, 8], for Document[5], Document[8]. Otherwise, do not use brackets in your answer and reference ONLY the name of the passage without mentioning the word passage. If the documents can't answer the question or you are unsure say: 'The answer cannot be found in the text directly, but I would recommend consulting [Source: X] for more information', pointing the user to the most relevant document. {new_line}These are the documents:\ 


[Source: https://en.wikipedia.org/wiki/Great_Pyra

In [31]:
results["answer_builder"]["answers"]

[GeneratedAnswer(data='The Great Pyramid of Giza, for example, was built as a tomb for the king, in this case, Khufu. There is a claim that these monumental structures were built through forced labor or slavery perhaps reflecting the cruel exploitation of the people [Great Pyramid of Giza-12]. The structures were built with great precision and took a significant amount of time and workforce [Great Pyramid of Giza-25]. The builders also took measures to keep the bodies of the kings safe by deciding not to bury the kings in their pyramids but in secret places [Great Pyramid of Giza-14].', query='Why did people build pyramids?', metadata={'model': 'gpt-4-0613', 'index': 0, 'finish_reason': 'stop', 'usage': {'prompt_tokens': 3067, 'completion_tokens': 121, 'total_tokens': 3188}}, documents=[Document(id='3b449d7a3ad22e0e30143e5191ede872ebf325fa6f6a69dd920f9fffcb3218d3', text='Diodorus\'s work was inspired by historians of the past, but he also distanced himself from Herodotus, who Diodorus 

In [36]:
results["prompt_inspector"]["prompt"]

'\nYou are a helpful expert. Your answers are short and to the point. You must only use information from the given documents. Use an unbiased and journalistic tone. \\ \nDo not repeat text. Always use references in the form [Source: NAME OF DOCUMENT] when using information from a document. e.g. [Source: 3], for Document[3]. The reference must only refer to the name that comes in square brackets after passage. If multiple documents are used to generate the answer, combine the names of these documents. e.g. [Source: 5, 8], for Document[5], Document[8]. Otherwise, do not use brackets in your answer and reference ONLY the name of the passage without mentioning the word passage. If the documents can\'t answer the question or you are unsure say: \'The answer cannot be found in the text directly, but I would recommend consulting [Source: X] for more information\', pointing the user to the most relevant document. {new_line}These are the documents:\\ \n\n\n[Source: https://en.wikipedia.org/wiki