In [1]:
from haystack.telemetry import tutorial_running
tutorial_running(27)

In [2]:
from haystack.document_stores.in_memory import InMemoryDocumentStore
document_store = InMemoryDocumentStore()

In [3]:
from datasets import load_dataset
from haystack import Document

dataset = load_dataset("bilgeyucel/seven-wonders", split="train")
docs = [Document(content=doc["content"], meta=doc["meta"]) for doc in dataset]

In [4]:
from haystack.components.embedders import SentenceTransformersDocumentEmbedder
doc_embedder = SentenceTransformersDocumentEmbedder(model="sentence-transformers/all-MiniLM-L6-v2")
doc_embedder.warm_up()

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [5]:
docs_with_embeddings = doc_embedder.run(docs)
document_store.write_documents(docs_with_embeddings["documents"])

Batches:   0%|          | 0/5 [00:00<?, ?it/s]

151

In [6]:
from haystack.components.embedders import SentenceTransformersTextEmbedder
text_embedder = SentenceTransformersTextEmbedder(model="sentence-transformers/all-MiniLM-L6-v2")

In [7]:
from haystack.components.retrievers.in_memory import InMemoryEmbeddingRetriever
retriever = InMemoryEmbeddingRetriever(document_store)

In [8]:
from typing import List
from haystack import Pipeline, component
from transformers import pipeline

# Inicializando o modelo de question-answering
model_name = 'pierreguillou/bert-large-cased-squad-v1.1-portuguese'
qa_pipeline = pipeline("question-answering", model=model_name)

@component
class QAPipelineComponent:
    """
    A component for question answering using a Hugging Face pipeline
    """
    def __init__(self, qa_pipeline):
        self.qa_pipeline = qa_pipeline

    @component.output_types(answers=List[dict])
    def run(self, query: str, documents: List[Document]):
        context = ' '.join([doc.content for doc in documents])
        print(f"Context: {context}")
        result = self.qa_pipeline(question=query, context=context)
        print(f"Answer: {result['answer']}")
        return {"answers": [{"answer": result["answer"], "score": result["score"]}]}


In [9]:
# Criando o pipeline de texto
basic_rag_pipeline = Pipeline()
basic_rag_pipeline.add_component("text_embedder", text_embedder)
basic_rag_pipeline.add_component("retriever", retriever)
basic_rag_pipeline.add_component(name="qa_component", instance=QAPipelineComponent(qa_pipeline))

In [10]:
# Conectando os componentes entre si
basic_rag_pipeline.connect("text_embedder.embedding", "retriever.query_embedding")
basic_rag_pipeline.connect("retriever", "qa_component.documents")

<haystack.core.pipeline.pipeline.Pipeline object at 0x7de435de6fd0>
🚅 Components
  - text_embedder: SentenceTransformersTextEmbedder
  - retriever: InMemoryEmbeddingRetriever
  - qa_component: QAPipelineComponent
🛤️ Connections
  - text_embedder.embedding -> retriever.query_embedding (List[float])
  - retriever.documents -> qa_component.documents (List[Document])

In [11]:
question = "What does Rhodes Statue look like?"
response = basic_rag_pipeline.run({
    "text_embedder": {"text": question},
    "qa_component": {"query": question}
})

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Context: Within it, too, are to be seen large masses of rock, by the weight of which the artist steadied it while erecting it.[22][23]
Destruction of the remains[edit]
The ultimate fate of the remains of the statue is uncertain. Rhodes has two serious earthquakes per century, owing to its location on the seismically unstable Hellenic Arc. Pausanias tells us, writing ca. 174, how the city was so devastated by an earthquake that the Sibyl oracle foretelling its destruction was considered fulfilled.[24] This means the statue could not have survived for long if it was ever repaired. By the 4th century Rhodes was Christianized, meaning any further maintenance or rebuilding, if there ever was any before, on an ancient pagan statue is unlikely. The metal would have likely been used for coins and maybe also tools by the time of the Arab wars, especially during earlier conflicts such as the Sassanian wars.[9]
The onset of Islamic naval incursions against the Byzantine empire gave rise to a dram

In [12]:
# Acessando e exibindo a primeira resposta e sua pontuação
first_answer = response["qa_component"]["answers"][0]
answer = first_answer["answer"]
score = first_answer["score"]
print(f"Answer: {answer}")
print(f"Score: {score}")

Answer: Christianized
Score: 0.9340901970863342
