In [None]:
pip install haystack-ai accelerate "sentence-transformers>=3.0.0" "datasets>=2.6.1"



Knowing you‚Äôre using this tutorial helps us decide where to invest our efforts to build a better product but you can always opt out by commenting the following line. See [Telemetry](https://docs.haystack.deepset.ai/docs/enabling-telemetry) for more details.

## Load data into the `DocumentStore`

Before you can use this data in the extractive pipeline, you'll use an indexing pipeline to fetch it, process it, and load it into the document store.


The data has already been cleaned and preprocessed, so turning it into Haystack `Documents` is fairly straightfoward.

Using an `InMemoryDocumentStore` here keeps things simple. However, this general approach would work with [any document store that Haystack 2.0 supports](https://docs.haystack.deepset.ai/docs/document-store).

The `SentenceTransformersDocumentEmbedder` transforms each `Document` into a vector. Here we've used [`sentence-transformers/multi-qa-mpnet-base-dot-v1`](https://huggingface.co/sentence-transformers/multi-qa-mpnet-base-dot-v1). You can substitute any embedding model you like, as long as you use the same one in your extractive pipeline.

Lastly, the `DocumentWriter` writes the vectorized documents to the `DocumentStore`.


In [None]:
import pandas as pd

In [None]:
#from datasets import load_dataset
from haystack import Document
from haystack import Pipeline
from haystack.document_stores.in_memory import InMemoryDocumentStore
from haystack.components.retrievers.in_memory import InMemoryEmbeddingRetriever
from haystack.components.readers import ExtractiveReader
from haystack.components.embedders import SentenceTransformersDocumentEmbedder
from haystack.components.writers import DocumentWriter

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
file_path = '/content/drive/My Drive/df_for_LLM_v2.csv'

In [None]:
df_for_LLM = pd.read_csv(file_path)

In [None]:
df_for_LLM

Unnamed: 0,name,property_tag_old_2,property_email,property_hours_of_operation,property_location,property_description,property_phone_number,combined_text
0,San Carlos,Hardware Store,mmora@depositosancarlos.com,"Monday - Friday 7:00 AM- 5:00 PM, Saturday 7:0...",Playa del Coco,Deposito San Carlos is a largest building mate...,+506 2670-1211,"San Carlos, Hardware Store, mmora@depositosanc..."
1,Angel's salon,Beauty Bar,myrnaangel@hotmail.com,"Monday 9:00 AM - 6:00 PM, Tuesday - Saturday 9...",Playa del Coco,Angel's Salon is a professional hair and beaut...,no information available,"Angel's salon, Beauty Bar, myrnaangel@hotmail...."
2,La Casa del Plastico,Department Store,info@lacasadelplastico.com,"Monday - Saturday 8:00 AM - 6:00 PM, Sunday C...",Playa del Coco,La Casa del Plastico specializes in the sale o...,+506 2670-0700,"La Casa del Plastico, Department Store, info@l..."
3,EPA Solarium,"Department Store, Hardware Store",contacto@xr.epa.biz,"Monday - Friday 9:00 AM - 5:00 PM, Saturday 9:...",Liberia,EPA Solarium - Hardware and Home Goods Store o...,no information available,"EPA Solarium, Department Store, Hardware Store..."
4,Policia Upala,"Police Department, Information Services",no information available,"Monday - Friday 8:00 AM - 4:00 PM, Saturday - ...",Canalete,Policia Upala - Police Department and Informat...,+506 2470-0235,"Policia Upala, Police Department, Information ..."
...,...,...,...,...,...,...,...,...
143,Coopeguanacaste,Utilities,servicioalcliente@coopeguanacaste.com,"Monday - Saturday 7:00 AM - 5:00 PM,\nSunday ...",Playa del Coco,Coopeguanacaste R.L. is a cooperative in Guana...,+506 2681-4700,"Coopeguanacaste, Utilities, servicioalcliente@..."
144,Liberia International Airport,Airport,\ninfo@guanacasteairport.com,Monday - Sunday 6:00 AM - 12:00 AM,Liberia,Daniel Oduber Quir√≥s International Airport (LI...,+506 2666-9600,"Liberia International Airport, Airport, \ninfo..."
145,Playa del Coco,Beaches,no information available,no information available,Playa del Coco,"Playas del Coco is a vibrant hub of activity, ...",no information available,"Playa del Coco, Beaches, no information availa..."
146,Mega Super,Supermarket,no information available,Monday - Saturday 7:00 AM - 10:00 PM Sunday 7:...,Playa del Coco,"Mega Super offers a wide range of products, fr...",+506 2670-1239,"Mega Super, Supermarket, no information availa..."


In [None]:
# # –ü—Ä–∏–º–µ—Ä ‚Ññ1, –≤ –∫–æ–Ω—Ç–µ–Ω—Ç–µ —Ç–æ–ª—å–∫–æ –æ–ø–∏—Å–∞–Ω–∏–µ –∑–∞–≤–µ–¥–µ–Ω–∏–π
# documents = [
#     Document(
#         content=f"{row['property_description']}",
#         meta={
#             "name": row['name'],
#             "tags": row['property_tag_old_2'],
#             "email": row['property_email'],
#             "location": row['property_location'],
#             "phone_number": row['property_phone_number'],
#             "hours_of_operation": row['property_hours_of_operation']
#         }
#     )
#     for _, row in df_for_LLM.iterrows()
# ]

In [None]:
# # –ü—Ä–∏–º–µ—Ä ‚Ññ2, –≤ –∫–æ–Ω—Ç–µ–Ω—Ç–µ —Ç–∞–∫–∂–µ —Ä–µ–∂–∏–º —Ä–∞–±–æ—Ç—ã
# documents = [
#     Document(
#         content=f"{row['property_description']}\nHours: {row['property_hours_of_operation']}",
#         meta={
#             "name": row['name'],
#             "tags": row['property_tag_old_2'],
#             "email": row['property_email'],
#             "location": row['property_location'],
#             "phone_number": row['property_phone_number']
#         }
#     )
#     for _, row in df_for_LLM.iterrows()
# ]

In [None]:
# –ü—Ä–∏–º–µ—Ä ‚Ññ3, –≤ –∫–æ–Ω—Ç–µ–Ω—Ç–µ combined_text —Ç–æ –µ—Å—Ç—å —Å–æ–¥–µ—Ä–∂–∏–º–æ–µ –≤—Å–µ—Ö —Å—Ç–æ–ª–±—Ü–æ–≤
documents = [
    Document(
        content=f"{row['combined_text']}",

    )
    for _, row in df_for_LLM.iterrows()
]

In [None]:
# # –ü—Ä–∏–º–µ—Ä ‚Ññ4, –≤ –∫–æ–Ω—Ç–µ–Ω—Ç–µ combined_text, –∞ —Ç–∞–∫–∂–µ –µ—Å—Ç—å meta
# documents = [
#     Document(
#         content=f"{row['combined_text']}",
#         meta={
#             "name": row['name'],
#             "tags": row['property_tag_old_2'],
#             "email": row['property_email'],
#             "location": row['property_location'],
#             "phone_number": row['property_phone_number'],
#             "hours_of_operation": row['property_hours_of_operation']
#         }
#     )
#     for _, row in df_for_LLM.iterrows()
# ]

In [None]:
documents

[Document(id=15d5fa5cb856487f11d2bf5c60d218b65b97056a8485c246e7313d05bd7afe8e, content: 'San Carlos, Hardware Store, mmora@depositosancarlos.com, Monday - Friday 7:00 AM- 5:00 PM, Saturday ...'),
 Document(id=f2a36e72ab889a4a0d9fc4d9e4f513226cbba05cf774689c596ae541f9488c11, content: 'Angel's salon, Beauty Bar, myrnaangel@hotmail.com, Monday 9:00 AM - 6:00 PM, Tuesday - Saturday 9:00...'),
 Document(id=df5708ebb664a1d8728a25452a763a5736931ac8776a5eaba13d68e2c434b604, content: 'La Casa del Plastico, Department Store, info@lacasadelplastico.com, Monday - Saturday 8:00 AM - 6:00...'),
 Document(id=652bb2e573623ca2933923f5322e29e26601f617f0ceb673e0374bc1adaaa266, content: 'EPA Solarium, Department Store, Hardware Store, contacto@xr.epa.biz, Monday - Friday 9:00 AM - 5:00 ...'),
 Document(id=f28c9df328974b5c719ed0b76c1146c1a33913d0470d3ce70dc6e08960592fc4, content: 'Policia Upala, Police Department, Information Services, no information available, Monday - Friday 8:...'),
 Document(id=5f0e74a

In [None]:
model = "sentence-transformers/multi-qa-mpnet-base-dot-v1"

#model = "deepset/roberta-base-squad2"

#model="google/tapas-base-finetuned-wtq"                             #–•–æ—á—É –ø—Ä–æ–≤–µ—Ä–∏—Ç—å –º–æ–¥–µ–ª—å, –∫–æ—Ç–æ—Ä–∞—è —Å–¥–µ–ª–∞–Ω–∞ –¥–ª—è –∑–∞–¥–∞—á table QA. –¢–∞–∫–∂–µ –¥–ª—è –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–Ω–∏—è –≤ docs –≤–æ–∑—å–º—É:
                                                                     #–ü—Ä–∏–º–µ—Ä ‚Ññ1, –≥–¥–µ –≤ –∫–æ–Ω—Ç–µ–Ω—Ç–µ —Ç–æ–ª—å–∫–æ –æ–ø–∏—Å–∞–Ω–∏–µ –∑–∞–≤–µ–¥–µ–Ω–∏–π. –ù–µ –ø–æ–¥—Ö–æ–¥–∏—Ç, —Ç–∞–∫ –∫–∞–∫ –Ω–∞ –≤—Ö–æ–¥ –Ω—É–∂–µ–Ω —Ç–æ–ª—å–∫–æ pd dataframe


#model="microsoft/tapex-base"                                         #–í–∏–¥–∏–º–æ –æ–Ω–∏ –≤—Å–µ —Ç–∞–∫ —É—Å—Ç—Ä–æ–µ–Ω—ã, —Ç–∞–∫–∂–µ –Ω–∞ –≤—Ö–æ–¥ –Ω—É–∂–µ–Ω pd dataframe


#model="deepset/roberta-large-squad2"

#model="deepset/deberta-v3-large-squad2"

#model="deepset/deberta-v3-base-squad2"

#model="deepset/bert-base-cased-squad2"                                #–£—á–∏—Ç—ã–≤–∞–µ—Ç —Ä–µ–≥–∏—Å—Ç—Ä

#–î–∞–ª–µ–µ –±—É–¥—É –ø—Ä–æ–±–æ–≤–∞—Ç—å –º–æ–¥–µ–ª–∏ –∏–∑ –±–∏–±–ª–∏–æ—Ç–µ–∫–∏ sentence-transformers.

#model = "sentence-transformers/multi-qa-distilbert-dot-v1"

#model = "sentence-transformers/multi-qa-MiniLM-L6-dot-v1"

#model = "sentence-transformers/msmarco-bert-base-dot-v5"

#model = "sentence-transformers/msmarco-distilbert-base-tas-b"

#model = "sentence-transformers/msmarco-MiniLM-L12-cos-v5"

document_store = InMemoryDocumentStore()

indexing_pipeline = Pipeline()

indexing_pipeline.add_component(instance=SentenceTransformersDocumentEmbedder(model=model), name="embedder")
indexing_pipeline.add_component(instance=DocumentWriter(document_store=document_store), name="writer")
indexing_pipeline.connect("embedder.documents", "writer.documents")

indexing_pipeline.run({"documents": documents})

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

{'writer': {'documents_written': 148}}

## Build an Extractive QA Pipeline

Your extractive QA pipeline will consist of three components: an embedder, retriever, and reader.

- The `SentenceTransformersTextEmbedder` turns a query into a vector, usaing the same embedding model defined above.

- Vector search allows the retriever to efficiently return relevant documents from the document store. Retrievers are tightly coupled with document stores; thus, you'll use an `InMemoryEmbeddingRetriever`to go with the `InMemoryDocumentStore`.

- The `ExtractiveReader` returns answers to that query, as well as their location in the source document, and a confidence score.


In [None]:
from haystack.components.retrievers.in_memory import InMemoryEmbeddingRetriever
from haystack.components.readers import ExtractiveReader
from haystack.components.embedders import SentenceTransformersTextEmbedder

In [None]:
retriever = InMemoryEmbeddingRetriever(document_store=document_store)
reader = ExtractiveReader()
reader.warm_up()

extractive_qa_pipeline = Pipeline()

extractive_qa_pipeline.add_component(instance=SentenceTransformersTextEmbedder(model=model), name="embedder")
extractive_qa_pipeline.add_component(instance=retriever, name="retriever")
extractive_qa_pipeline.add_component(instance=reader, name="reader")

extractive_qa_pipeline.connect("embedder.embedding", "retriever.query_embedding")
extractive_qa_pipeline.connect("retriever.documents", "reader.documents")

<haystack.core.pipeline.pipeline.Pipeline object at 0x7b9e20b5cc40>
üöÖ Components
  - embedder: SentenceTransformersTextEmbedder
  - retriever: InMemoryEmbeddingRetriever
  - reader: ExtractiveReader
üõ§Ô∏è Connections
  - embedder.embedding -> retriever.query_embedding (List[float])
  - retriever.documents -> reader.documents (List[Document])

In [None]:
query = "What are the opening hours of the restaurant Zi Lounge?"
extractive_qa_pipeline.run(
    data={"embedder": {"text": query}, "retriever": {"top_k": 3}, "reader": {"query": query, "top_k": 3}}
)

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

{'reader': {'answers': [ExtractedAnswer(query='What are the opening hours of the restaurant Zi Lounge?', score=0.8432546854019165, data='11:00 AM - 2:30 AM', document=Document(id=e8a323a4533b246c32c4f3b40c1c583c7684cad1b757abb5e4bd8b211c473ac4, content: 'Zi Lounge, Restaurant, Bar, International, Seafood, Pizza, Pasta, Meat, Steak, Salads, Cocktails, Al...', score: 0.6435337060392761), context=None, document_offset=ExtractedAnswer.Span(start=142, end=160), context_offset=None, meta={}),
   ExtractedAnswer(query='What are the opening hours of the restaurant Zi Lounge?', score=0.8176264762878418, data='Monday - Sunday', document=Document(id=e8a323a4533b246c32c4f3b40c1c583c7684cad1b757abb5e4bd8b211c473ac4, content: 'Zi Lounge, Restaurant, Bar, International, Seafood, Pizza, Pasta, Meat, Steak, Salads, Cocktails, Al...', score: 0.6435337060392761), context=None, document_offset=ExtractedAnswer.Span(start=126, end=141), context_offset=None, meta={}),
   ExtractedAnswer(query='What are the o

In [None]:
query = "What are the opening days and hours of the restaurant Zi Lounge?"
extractive_qa_pipeline.run(
    data={"embedder": {"text": query}, "retriever": {"top_k": 3}, "reader": {"query": query, "top_k": 3}}
)

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

{'reader': {'answers': [ExtractedAnswer(query='What are the opening days and hours of the restaurant Zi Lounge?', score=0.8397794961929321, data='Monday - Sunday', document=Document(id=e8a323a4533b246c32c4f3b40c1c583c7684cad1b757abb5e4bd8b211c473ac4, content: 'Zi Lounge, Restaurant, Bar, International, Seafood, Pizza, Pasta, Meat, Steak, Salads, Cocktails, Al...', score: 0.6269520563996132), context=None, document_offset=ExtractedAnswer.Span(start=126, end=141), context_offset=None, meta={}),
   ExtractedAnswer(query='What are the opening days and hours of the restaurant Zi Lounge?', score=0.791502058506012, data='11:00 AM - 2:30 AM', document=Document(id=e8a323a4533b246c32c4f3b40c1c583c7684cad1b757abb5e4bd8b211c473ac4, content: 'Zi Lounge, Restaurant, Bar, International, Seafood, Pizza, Pasta, Meat, Steak, Salads, Cocktails, Al...', score: 0.6269520563996132), context=None, document_offset=ExtractedAnswer.Span(start=142, end=160), context_offset=None, meta={}),
   ExtractedAnswer(quer

In [None]:
query = "When is open the restaurant Zi Lounge?"
extractive_qa_pipeline.run(
    data={"embedder": {"text": query}, "retriever": {"top_k": 3}, "reader": {"query": query, "top_k": 3}}
)

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

{'reader': {'answers': [ExtractedAnswer(query='When is open the restaurant Zi Lounge?', score=0.7865237593650818, data='Monday - Sunday', document=Document(id=e8a323a4533b246c32c4f3b40c1c583c7684cad1b757abb5e4bd8b211c473ac4, content: 'Zi Lounge, Restaurant, Bar, International, Seafood, Pizza, Pasta, Meat, Steak, Salads, Cocktails, Al...', score: 0.6357605786058824), context=None, document_offset=ExtractedAnswer.Span(start=126, end=141), context_offset=None, meta={}),
   ExtractedAnswer(query='When is open the restaurant Zi Lounge?', score=0.729029655456543, data='Monday - Sunday', document=Document(id=0259c633d15088de015f32391e35593c62914f2ccdb89c3a878ba00cdd7887aa, content: 'Aqua Sport, Restaurant, Bar, Latin, Vegetarian, Cocktails, Pizza, Seafood, Fast Food, infoaquarestau...', score: 0.4430885420666822), context=None, document_offset=ExtractedAnswer.Span(start=117, end=132), context_offset=None, meta={}),
   ExtractedAnswer(query='When is open the restaurant Zi Lounge?', score=0.693

Try extracting some answers. –ï—Å–ª–∏ –ø–æ–¥–∞–≤–∞—Ç—å —Ç–æ–ª—å–∫–æ description –≤ –∫–∞—á–µ—Å—Ç–≤–µ content, –∞ –æ—Å—Ç–∞–ª—å–Ω–æ–µ –≤ –∫–∞—á–µ—Å—Ç–≤–µ meta, —Ä–∞–±–æ—Ç–∞ –∞–ª–≥–æ—Ä–∏—Ç–º–∞ –Ω–µ –æ—á–µ–Ω—å. –ù–µ –ø—Ä–∞–≤–∏–ª—å–Ω–æ –æ–ø—Ä–µ–¥–µ–ª—è—é—Ç—Å—è –∑–∞–≤–µ–¥–µ–Ω–∏—è.

–í —Ö–æ–¥–µ —Ç–µ—Å—Ç–æ–≤ –≤—ã—è–≤–ª–µ–Ω–æ, —á—Ç–æ –ª—É—á—à–µ –≤—Å–µ–≥–æ –ø–æ–¥–∞–≤–∞—Ç—å combined_text, —ç—Ç–æ —Å—Ç–æ–ª–±–µ—Ü, —Å–æ–¥–µ—Ä–∂–∞—â–∏–π –≤—Å–µ –¥–∞–Ω–Ω—ã–µ —Å–æ —Å—Ç–æ–ª–±—Ü–æ–≤ –≤ –æ–¥–Ω–æ–º.

–ù–µ –∏–¥–µ–∞–ª—å–Ω–æ. –°–µ–π—á–∞—Å –∑–∞–≤–µ–¥–µ–Ω–∏–µ –æ–ø—Ä–µ–¥–µ–ª—è–µ—Ç—Å—è –ø—Ä–∞–≤–∏–ª—å–Ω–æ, –Ω–æ —Ä–µ–∂–∏–º —Ä–∞–±–æ—Ç—ã –ø–æ—á–µ–º—É —Ç–æ –¥—Ä–æ–±–∏—Ç—Å—è. –ù–∞ –¥–Ω–∏ –Ω–µ–¥–µ–ª–∏ –∏ —á–∞—Å—ã —Ä–∞–±–æ—Ç—ã.

–ï—Å–ª–∏ –≤ –∫–æ–Ω—Ç–µ–Ω—Ç–µ combined text, –∞ —Ç–∞–∫–∂–µ –ø—Ä–∏—Å—É—Ç—Å—Ç–≤—É–µ—Ç meta, —Ä–µ–∑—É–ª—å—Ç–∞—Ç –∞–Ω–∞–ª–æ–≥–∏—á–Ω—ã–π.

–ü—Ä–∏ –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–Ω–∏–∏ –º–æ–¥–µ–ª–∏ Roberta –≤–µ—Ä—Å–∏–∏ Large –∏ Base, –∫–∞—á–µ—Å—Ç–≤–æ –æ—Ç–≤–µ—Ç–æ–≤ –Ω–µ—É–¥ (–Ω–µ —Ç–µ –∑–∞–≤–µ–¥–µ–Ω–∏—è). –ï—Å–ª–∏ –≤ Roberta –ø–æ–¥–≤–∞–≤–∞—Ç—å —Ç–æ–ª—å–∫–æ –æ–ø–∏—Å–∞–Ω–∏–µ, –±–µ–∑ –æ—Å—Ç–∞–ª—å–Ω—ã—Ö —Å—Ç–æ–ª–±—Ü–æ–≤, —Ç–æ —Ä–µ–∑—É–ª—å—Ç–∞—Ç —Ä–∞–±–æ—Ç—ã —Å–æ–≤—Å–µ–º –ø–ª–∞—á–µ–≤–Ω—ã–π

Deberta Large –∏ Base –≤–µ—Ä—Å–∏–∏ –æ—Ç–≤–µ—Ç—ã –Ω–µ—É–¥ (–Ω–µ —Ç–µ –∑–∞–≤–µ–¥–µ–Ω–∏—è).

–õ—É—á—à–∏–π —Ä–µ–∑—É–ª—å—Ç–∞—Ç —É –±–∞–∑–æ–≤–æ–π –º–æ–¥–µ–ª–∏ multi-qa-mpnet-base-dot-v1. –ü–æ–ø–∞–¥–∞–µ—Ç –≤ –∑–∞–≤–µ–¥–µ–Ω–∏—è, –Ω–æ –æ—Ç–≤–µ—á–∞–µ—Ç —Å–ª–∏—à–∫–æ–º –±—É–∫–≤–∞–ª—å–Ω–æ.

–†–∞–±–æ—Ç–∞ bert-base-cased-squad2 —Ç–∞–∫–∂–µ –Ω–µ—É–¥.

–†–∞–∑–Ω–∏—Ü—ã –≤ –æ—Ç–≤–µ—Ç–µ –º–µ–∂–¥—É multi-qa-mpnet-base-dot-v1 –∏ multi-qa-distilbert-dot-v1 –∏ multi-qa-MiniLM-L6-dot-v1 –Ω–µ—Ç.

–ï—Å–ª–∏ –∏—Å–ø–æ–ª—å–∑–æ–≤–∞—Ç—å sentence-transformers/msmarco-bert-base-dot-v5, —Ç–æ –µ—Å—Ç—å –∞–Ω–∞–ª–æ–≥–∏—á–Ω—É—é (–Ω–æ –ø–æ–±–æ–ª—å—à–µ) –º–æ–¥–µ–ª—å –±–µ—Ä—Ç–∞, –æ–±—É—á–µ–Ω–Ω–æ–≥–æ –Ω–∞ –¥—Ä—É–≥–æ–º –¥–∞—Ç–∞—Å–µ—Ç–µ, —Ä–µ–∑—É–ª—å—Ç–∞—Ç —Ç–æ—Ç –∂–µ. –ü—Ä–∏ –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–Ω–∏–∏ sentence-transformers/msmarco-distilbert-base-tas-b –∏ msmarco-MiniLM-L12-cos-v5 —Ä–µ–∑—É–ª—å—Ç–∞—Ç —Ç–æ—Ç –∂–µ.

–ü–æ —Å—É—Ç–∏, —Ö–æ—Ä–æ—à–æ –º–∞—Ç—á–∏—Ç –ø–æ –Ω–∞–∑–≤–∞–Ω–∏—é sentence —Ç—Ä–∞–Ω—Å—Ñ–æ—Ä–º–µ—Ä—ã. –ê –æ—Ç–≤–µ—á–∞–µ—Ç Deberta (–Ω–æ –Ω–µ —Ç–µ –∑–∞–≤–µ–¥–µ–Ω–∏—è). –í–æ–∑–º–æ–∂–Ω–æ —Å—Ç–æ–∏—Ç –∏—Å–ø–æ–ª—å–∑–æ–≤–∞—Ç—å 2 –º–æ–¥–µ–ª–∏. –û—Ç–ª–æ–∂–∏–º –ø–æ–∫–∞ –∏ –ø–æ–ø—Ä–æ–±—É–µ–º –¥—Ä—É–≥–∏–µ –≤–∞—Ä–∏–∞–Ω—Ç—ã –ø–æ —Ç—é–Ω–∏–Ω–≥—É –∞–ª–≥–æ—Ä–∏—Ç–º–∞.



–í —á–∞—Å—Ç–Ω–æ—Å—Ç–∏ –∏—Å–ø–æ–ª—å–∑–æ–≤–∞—Ç—å Part-of-speech tagging (POS-tagging) ‚Äî —ç—Ç–æ –ø—Ä–æ—Ü–µ—Å—Å, –∏—Å–ø–æ–ª—å–∑—É–µ–º—ã–π –≤ –æ–±—Ä–∞–±–æ—Ç–∫–µ –µ—Å—Ç–µ—Å—Ç–≤–µ–Ω–Ω–æ–≥–æ —è–∑—ã–∫–∞ (NLP), –∫–æ—Ç–æ—Ä—ã–π –∑–∞–∫–ª—é—á–∞–µ—Ç—Å—è –≤ –∞–≤—Ç–æ–º–∞—Ç–∏—á–µ—Å–∫–æ–º –æ–ø—Ä–µ–¥–µ–ª–µ–Ω–∏–∏ –∏ –æ–±–æ–∑–Ω–∞—á–µ–Ω–∏–∏ —á–∞—Å—Ç–µ–π —Ä–µ—á–∏ –¥–ª—è –∫–∞–∂–¥–æ–≥–æ —Å–ª–æ–≤–∞ –≤ —Ç–µ–∫—Å—Ç–µ. –ß–∞—Å—Ç–∏ —Ä–µ—á–∏ –≤–∫–ª—é—á–∞—é—Ç —Ç–∞–∫–∏–µ –∫–∞—Ç–µ–≥–æ—Ä–∏–∏, –∫–∞–∫ —Å—É—â–µ—Å—Ç–≤–∏—Ç–µ–ª—å–Ω—ã–µ, –≥–ª–∞–≥–æ–ª—ã, –ø—Ä–∏–ª–∞–≥–∞—Ç–µ–ª—å–Ω—ã–µ, –Ω–∞—Ä–µ—á–∏—è –∏ –¥—Ä—É–≥–∏–µ. POS-—Ç–µ–≥–∏–Ω–≥ –ø–æ–º–æ–≥–∞–µ—Ç –ø–æ–Ω—è—Ç—å —Å–∏–Ω—Ç–∞–∫—Å–∏—á–µ—Å–∫—É—é —Å—Ç—Ä—É–∫—Ç—É—Ä—É —Ç–µ–∫—Å—Ç–∞ –∏ —á–∞—Å—Ç–æ –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è –≤ –±–æ–ª–µ–µ —Å–ª–æ–∂–Ω—ã—Ö –∑–∞–¥–∞—á–∞—Ö NLP, —Ç–∞–∫–∏—Ö –∫–∞–∫ —Å–∏–Ω—Ç–∞–∫—Å–∏—á–µ—Å–∫–∏–π —Ä–∞–∑–±–æ—Ä, –∞–Ω–∞–ª–∏–∑ —Ç–æ–Ω–∞–ª—å–Ω–æ—Å—Ç–∏ –∏ –∏–∑–≤–ª–µ—á–µ–Ω–∏–µ –∏–Ω—Ñ–æ—Ä–º–∞—Ü–∏–∏.

–ü—Ä–æ—Ü–µ—Å—Å POS-—Ç–µ–≥–∏–Ω–≥–∞ –æ–±—ã—á–Ω–æ –≤–∫–ª—é—á–∞–µ—Ç —Å–ª–µ–¥—É—é—â–∏–µ —ç—Ç–∞–ø—ã:

1. **–¢–æ–∫–µ–Ω–∏–∑–∞—Ü–∏—è**: –†–∞–∑–¥–µ–ª–µ–Ω–∏–µ —Ç–µ–∫—Å—Ç–∞ –Ω–∞ –æ—Ç–¥–µ–ª—å–Ω—ã–µ —Å–ª–æ–≤–∞ –∏–ª–∏ —Ç–æ–∫–µ–Ω—ã.

2. **–¢–µ–≥–∏—Ä–æ–≤–∞–Ω–∏–µ**: –ö–∞–∂–¥–æ–º—É —Ç–æ–∫–µ–Ω—É –ø—Ä–∏—Å–≤–∞–∏–≤–∞–µ—Ç—Å—è —Å–æ–æ—Ç–≤–µ—Ç—Å—Ç–≤—É—é—â–∞—è —á–∞—Å—Ç—å —Ä–µ—á–∏. –≠—Ç–æ –º–æ–∂–µ—Ç –±—ã—Ç—å —Å–¥–µ–ª–∞–Ω–æ —Å –ø–æ–º–æ—â—å—é –ø—Ä–µ–¥–≤–∞—Ä–∏—Ç–µ–ª—å–Ω–æ –æ–±—É—á–µ–Ω–Ω—ã—Ö –º–æ–¥–µ–ª–µ–π, –∫–æ—Ç–æ—Ä—ã–µ –∏—Å–ø–æ–ª—å–∑—É—é—Ç –≤–µ—Ä–æ—è—Ç–Ω–æ—Å—Ç–Ω—ã–µ –º–µ—Ç–æ–¥—ã, —Ç–∞–∫–∏–µ –∫–∞–∫ —Å–∫—Ä—ã—Ç—ã–µ –º–∞—Ä–∫–æ–≤—Å–∫–∏–µ –º–æ–¥–µ–ª–∏ (HMM), –∏–ª–∏ —Å–æ–≤—Ä–µ–º–µ–Ω–Ω—ã–µ –ø–æ–¥—Ö–æ–¥—ã, —Ç–∞–∫–∏–µ –∫–∞–∫ –Ω–µ–π—Ä–æ–Ω–Ω—ã–µ —Å–µ—Ç–∏ –∏ —Ç—Ä–∞–Ω—Å—Ñ–æ—Ä–º–µ—Ä—ã.

3. **–ö–æ–Ω—Ç–µ–∫—Å—Ç–Ω–æ–µ –æ–ø—Ä–µ–¥–µ–ª–µ–Ω–∏–µ**: –ü–æ—Å–∫–æ–ª—å–∫—É –Ω–µ–∫–æ—Ç–æ—Ä—ã–µ —Å–ª–æ–≤–∞ –º–æ–≥—É—Ç –ø—Ä–∏–Ω–∞–¥–ª–µ–∂–∞—Ç—å –∫ –Ω–µ—Å–∫–æ–ª—å–∫–∏–º —á–∞—Å—Ç—è–º —Ä–µ—á–∏ –≤ –∑–∞–≤–∏—Å–∏–º–æ—Å—Ç–∏ –æ—Ç –∫–æ–Ω—Ç–µ–∫—Å—Ç–∞ (–Ω–∞–ø—Ä–∏–º–µ—Ä, —Å–ª–æ–≤–æ ¬´run¬ª –º–æ–∂–µ—Ç –±—ã—Ç—å –∫–∞–∫ —Å—É—â–µ—Å—Ç–≤–∏—Ç–µ–ª—å–Ω—ã–º, —Ç–∞–∫ –∏ –≥–ª–∞–≥–æ–ª–æ–º), –∞–ª–≥–æ—Ä–∏—Ç–º—ã POS-—Ç–µ–≥–∏–Ω–≥–∞ —É—á–∏—Ç—ã–≤–∞—é—Ç –±–ª–∏–∂–∞–π—à–µ–µ –æ–∫—Ä—É–∂–µ–Ω–∏–µ —Å–ª–æ–≤–∞.

POS-—Ç–µ–≥–∏–Ω–≥ –≤–∞–∂–µ–Ω –¥–ª—è –±–æ–ª–µ–µ –≥–ª—É–±–æ–∫–æ–≥–æ –ø–æ–Ω–∏–º–∞–Ω–∏—è —Ç–µ–∫—Å—Ç–∞ –∏ —è–≤–ª—è–µ—Ç—Å—è –æ—Å–Ω–æ–≤–æ–π –¥–ª—è –º–Ω–æ–≥–∏—Ö –¥—Ä—É–≥–∏—Ö –ø—Ä–∏–ª–æ–∂–µ–Ω–∏–π –≤ –æ–±–ª–∞—Å—Ç–∏ –æ–±—Ä–∞–±–æ—Ç–∫–∏ –µ—Å—Ç–µ—Å—Ç–≤–µ–Ω–Ω–æ–≥–æ —è–∑—ã–∫–∞.

In [None]:
import spacy
nlp = spacy.load("en_core_web_sm")

In [None]:
def pos_tagging(text):
    doc = nlp(text)
    return [(token.text, token.pos_) for token in doc]

def create_document(row):
    content = row['combined_text']
    pos_tags = pos_tagging(content)
    return Document(content=content, meta={'pos_tags': pos_tags})

documents = [
    create_document(row)
    for _, row in df_for_LLM.iterrows()
]

In [None]:
documents

[Document(id=e79d09a1ec1d9c3a8f4f558b9590369d0e0f0e3e5bd3c9705e33725fbec9f19f, content: 'San Carlos, Hardware Store, mmora@depositosancarlos.com, Monday - Friday 7:00 AM- 5:00 PM, Saturday ...', meta: {'pos_tags': [('San', 'PROPN'), ('Carlos', 'PROPN'), (',', 'PUNCT'), ('Hardware', 'PROPN'), ('Store', 'PROPN'), (',', 'PUNCT'), ('mmora@depositosancarlos.com', 'X'), (',', 'PUNCT'), ('Monday', 'PROPN'), ('-', 'PUNCT'), ('Friday', 'PROPN'), ('7:00', 'NUM'), ('AM-', 'PROPN'), ('5:00', 'NUM'), ('PM', 'NOUN'), (',', 'PUNCT'), ('Saturday', 'PROPN'), ('7:00', 'NUM'), ('AM-', 'PROPN'), ('3:00', 'NUM'), ('PM', 'NOUN'), (',', 'PUNCT'), ('Sunday', 'PROPN'), (' ', 'SPACE'), ('–°losed', 'VERB'), (',', 'PUNCT'), ('Playa', 'PROPN'), ('del', 'PROPN'), ('Coco', 'PROPN'), (',', 'PUNCT'), ('Deposito', 'PROPN'), ('San', 'PROPN'), ('Carlos', 'PROPN'), ('is', 'AUX'), ('a', 'DET'), ('largest', 'ADJ'), ('building', 'NOUN'), ('materials', 'NOUN'), ('store', 'NOUN'), ('with', 'ADP'), ('a', 'DET'), ('strong', 'ADJ

In [None]:
model = "sentence-transformers/multi-qa-mpnet-base-dot-v1"

document_store = InMemoryDocumentStore()

indexing_pipeline = Pipeline()

indexing_pipeline.add_component(instance=SentenceTransformersDocumentEmbedder(model=model), name="embedder")
indexing_pipeline.add_component(instance=DocumentWriter(document_store=document_store), name="writer")
indexing_pipeline.connect("embedder.documents", "writer.documents")

indexing_pipeline.run({"documents": documents})


retriever = InMemoryEmbeddingRetriever(document_store=document_store)
reader = ExtractiveReader()
reader.warm_up()

Batches:   0%|          | 0/5 [00:00<?, ?it/s]

In [None]:
# –°–æ–∑–¥–∞–µ–º –ø–∞–π–ø–ª–∞–π–Ω –¥–ª—è –∏–∑–≤–ª–µ—á–µ–Ω–∏—è –æ—Ç–≤–µ—Ç–æ–≤
extractive_qa_pipeline = Pipeline()

In [None]:
# –î–æ–±–∞–≤–ª—è–µ–º –Ω–æ–≤—ã–π –∫–æ–º–ø–æ–Ω–µ–Ω—Ç –¥–ª—è –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–Ω–∏—è POS-tagging
class PosTaggingReader(ExtractiveReader):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)

    def extract_answers(self, query, documents):
        answers = []
        for doc in documents:
            pos_tags = doc.meta['pos_tags']
            for token, pos in pos_tags:
                if pos == 'NN' and token in query:
                    answers.append(token)
        return answers

pos_tagging_reader = PosTaggingReader()

# –î–æ–±–∞–≤–ª—è–µ–º –∫–æ–º–ø–æ–Ω–µ–Ω—Ç—ã –≤ –ø–∞–π–ø–ª–∞–π–Ω
extractive_qa_pipeline.add_component(instance=SentenceTransformersTextEmbedder(model=model), name="embedder")
extractive_qa_pipeline.add_component(instance=retriever, name="retriever")
extractive_qa_pipeline.add_component(instance=pos_tagging_reader, name="reader")

# –°–æ–µ–¥–∏–Ω—è–µ–º –∫–æ–º–ø–æ–Ω–µ–Ω—Ç—ã –≤ –ø–∞–π–ø–ª–∞–π–Ω
extractive_qa_pipeline.connect("embedder.embedding", "retriever.query_embedding")
extractive_qa_pipeline.connect("retriever.documents", "reader.documents")

<haystack.core.pipeline.pipeline.Pipeline object at 0x7e724e502530>
üöÖ Components
  - embedder: SentenceTransformersTextEmbedder
  - retriever: InMemoryEmbeddingRetriever
  - reader: PosTaggingReader
üõ§Ô∏è Connections
  - embedder.embedding -> retriever.query_embedding (List[float])
  - retriever.documents -> reader.documents (List[Document])

In [None]:
query = "What are the opening hours of the restaurant Zi Lounge?"
extractive_qa_pipeline.run(
    data={"embedder": {"text": query}, "retriever": {"top_k": 3}, "reader": {"query": query, "top_k": 3}}
)

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

{'reader': {'answers': [ExtractedAnswer(query='What are the opening hours of the restaurant Zi Lounge?', score=0.843254804611206, data='11:00 AM - 2:30 AM', document=Document(id=cd115b04dbac72c7c7b41f1d8310fa2ec93bb3fb2704c6d18a3829da87338b58, content: 'Zi Lounge, Restaurant, Bar, International, Seafood, Pizza, Pasta, Meat, Steak, Salads, Cocktails, Al...', meta: {'pos_tags': [('Zi', 'PROPN'), ('Lounge', 'PROPN'), (',', 'PUNCT'), ('Restaurant', 'PROPN'), (',', 'PUNCT'), ('Bar', 'PROPN'), (',', 'PUNCT'), ('International', 'PROPN'), (',', 'PUNCT'), ('Seafood', 'PROPN'), (',', 'PUNCT'), ('Pizza', 'PROPN'), (',', 'PUNCT'), ('Pasta', 'PROPN'), (',', 'PUNCT'), ('Meat', 'NOUN'), (',', 'PUNCT'), ('Steak', 'PROPN'), (',', 'PUNCT'), ('Salads', 'PROPN'), (',', 'PUNCT'), ('Cocktails', 'PROPN'), (',', 'PUNCT'), ('Alcohol', 'PROPN'), (',', 'PUNCT'), ('info@zilounge.com', 'X'), (',', 'PUNCT'), ('Monday', 'PROPN'), ('-', 'PUNCT'), ('Sunday', 'PROPN'), ('11:00', 'NUM'), ('AM', 'PROPN'), ('-', 'PUNCT'),

In [None]:
query = "What are the opening days and hours of the restaurant Zi Lounge?"
extractive_qa_pipeline.run(
    data={"embedder": {"text": query}, "retriever": {"top_k": 3}, "reader": {"query": query, "top_k": 3}}
)

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

{'reader': {'answers': [ExtractedAnswer(query='What are the opening days and hours of the restaurant Zi Lounge?', score=0.8397794961929321, data='Monday - Sunday', document=Document(id=cd115b04dbac72c7c7b41f1d8310fa2ec93bb3fb2704c6d18a3829da87338b58, content: 'Zi Lounge, Restaurant, Bar, International, Seafood, Pizza, Pasta, Meat, Steak, Salads, Cocktails, Al...', meta: {'pos_tags': [('Zi', 'PROPN'), ('Lounge', 'PROPN'), (',', 'PUNCT'), ('Restaurant', 'PROPN'), (',', 'PUNCT'), ('Bar', 'PROPN'), (',', 'PUNCT'), ('International', 'PROPN'), (',', 'PUNCT'), ('Seafood', 'PROPN'), (',', 'PUNCT'), ('Pizza', 'PROPN'), (',', 'PUNCT'), ('Pasta', 'PROPN'), (',', 'PUNCT'), ('Meat', 'NOUN'), (',', 'PUNCT'), ('Steak', 'PROPN'), (',', 'PUNCT'), ('Salads', 'PROPN'), (',', 'PUNCT'), ('Cocktails', 'PROPN'), (',', 'PUNCT'), ('Alcohol', 'PROPN'), (',', 'PUNCT'), ('info@zilounge.com', 'X'), (',', 'PUNCT'), ('Monday', 'PROPN'), ('-', 'PUNCT'), ('Sunday', 'PROPN'), ('11:00', 'NUM'), ('AM', 'PROPN'), ('-', 'P

In [None]:
query = "When is open the restaurant Zi Lounge?"
extractive_qa_pipeline.run(
    data={"embedder": {"text": query}, "retriever": {"top_k": 3}, "reader": {"query": query, "top_k": 3}}
)

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

{'reader': {'answers': [ExtractedAnswer(query='When is open the restaurant Zi Lounge?', score=0.786523699760437, data='Monday - Sunday', document=Document(id=cd115b04dbac72c7c7b41f1d8310fa2ec93bb3fb2704c6d18a3829da87338b58, content: 'Zi Lounge, Restaurant, Bar, International, Seafood, Pizza, Pasta, Meat, Steak, Salads, Cocktails, Al...', meta: {'pos_tags': [('Zi', 'PROPN'), ('Lounge', 'PROPN'), (',', 'PUNCT'), ('Restaurant', 'PROPN'), (',', 'PUNCT'), ('Bar', 'PROPN'), (',', 'PUNCT'), ('International', 'PROPN'), (',', 'PUNCT'), ('Seafood', 'PROPN'), (',', 'PUNCT'), ('Pizza', 'PROPN'), (',', 'PUNCT'), ('Pasta', 'PROPN'), (',', 'PUNCT'), ('Meat', 'NOUN'), (',', 'PUNCT'), ('Steak', 'PROPN'), (',', 'PUNCT'), ('Salads', 'PROPN'), (',', 'PUNCT'), ('Cocktails', 'PROPN'), (',', 'PUNCT'), ('Alcohol', 'PROPN'), (',', 'PUNCT'), ('info@zilounge.com', 'X'), (',', 'PUNCT'), ('Monday', 'PROPN'), ('-', 'PUNCT'), ('Sunday', 'PROPN'), ('11:00', 'NUM'), ('AM', 'PROPN'), ('-', 'PUNCT'), ('2:30', 'NUM'), ('

–ò—Å–ø–æ–ª—å–∑–æ–≤–∞–Ω–∏–µ Part-of-speech tagging (POS-tagging) –Ω–µ –æ–ø—Ä–∞–≤–¥–∞–ª–æ –æ–∂–∏–¥–∞–Ω–∏–µ.

–ü–æ–ø—Ä–æ–±—É–µ–º –∏—Å–ø–æ–ª—å–∑–æ–≤–∞—Ç—å Named Entity Recognition (NER).

Named Entity Recognition (NER) ‚Äî —ç—Ç–æ –∑–∞–¥–∞—á–∞ –≤ –æ–±–ª–∞—Å—Ç–∏ –æ–±—Ä–∞–±–æ—Ç–∫–∏ –µ—Å—Ç–µ—Å—Ç–≤–µ–Ω–Ω–æ–≥–æ —è–∑—ã–∫–∞ (NLP), –∫–æ—Ç–æ—Ä–∞—è –∑–∞–∫–ª—é—á–∞–µ—Ç—Å—è –≤ –∏–¥–µ–Ω—Ç–∏—Ñ–∏–∫–∞—Ü–∏–∏ –∏ –∫–ª–∞—Å—Å–∏—Ñ–∏–∫–∞—Ü–∏–∏ –∏–º–µ–Ω–æ–≤–∞–Ω–Ω—ã—Ö —Å—É—â–Ω–æ—Å—Ç–µ–π –≤ —Ç–µ–∫—Å—Ç–µ. –ò–º–µ–Ω–æ–≤–∞–Ω–Ω—ã–º–∏ —Å—É—â–Ω–æ—Å—Ç—è–º–∏ –º–æ–≥—É—Ç –±—ã—Ç—å –∏–º–µ–Ω–∞ —Å–æ–±—Å—Ç–≤–µ–Ω–Ω—ã–µ, —Ç–∞–∫–∏–µ –∫–∞–∫ –∏–º–µ–Ω–∞ –ª—é–¥–µ–π, –Ω–∞–∑–≤–∞–Ω–∏—è –æ—Ä–≥–∞–Ω–∏–∑–∞—Ü–∏–π, –≥–µ–æ–≥—Ä–∞—Ñ–∏—á–µ—Å–∫–∏–µ –Ω–∞–∑–≤–∞–Ω–∏—è, –¥–∞—Ç—ã, –≤—Ä–µ–º–µ–Ω–Ω—ã–µ –∏–Ω—Ç–µ—Ä–≤–∞–ª—ã, –¥–µ–Ω–µ–∂–Ω—ã–µ —Å—É–º–º—ã –∏ –¥—Ä—É–≥–∏–µ —Å–ø–µ—Ü–∏—Ñ–∏—á–µ—Å–∫–∏–µ –∫–∞—Ç–µ–≥–æ—Ä–∏–∏, –∫–æ—Ç–æ—Ä—ã–µ –∏–º–µ—é—Ç –∫–æ–Ω–∫—Ä–µ—Ç–Ω–æ–µ –∑–Ω–∞—á–µ–Ω–∏–µ –≤ –∫–æ–Ω—Ç–µ–∫—Å—Ç–µ.

–û—Å–Ω–æ–≤–Ω—ã–µ –∞—Å–ø–µ–∫—Ç—ã NER:

1. **–ò–¥–µ–Ω—Ç–∏—Ñ–∏–∫–∞—Ü–∏—è**: –ü—Ä–æ—Ü–µ—Å—Å –≤—ã–¥–µ–ª–µ–Ω–∏—è –ø–æ—Å–ª–µ–¥–æ–≤–∞—Ç–µ–ª—å–Ω–æ—Å—Ç–µ–π —Å–∏–º–≤–æ–ª–æ–≤, –∫–æ—Ç–æ—Ä—ã–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª—è—é—Ç —Å–æ–±–æ–π –∏–º–µ–Ω–æ–≤–∞–Ω–Ω—ã–µ —Å—É—â–Ω–æ—Å—Ç–∏. –ù–∞–ø—Ä–∏–º–µ—Ä, –≤ –ø—Ä–µ–¥–ª–æ–∂–µ–Ω–∏–∏ "–ò–ª–æ–Ω –ú–∞—Å–∫ –æ—Å–Ω–æ–≤–∞–ª SpaceX –≤ 2002 –≥–æ–¥—É" NER-—Å–∏—Å—Ç–µ–º–∞ –¥–æ–ª–∂–Ω–∞ –∏–¥–µ–Ω—Ç–∏—Ñ–∏—Ü–∏—Ä–æ–≤–∞—Ç—å "–ò–ª–æ–Ω –ú–∞—Å–∫", "SpaceX" –∏ "2002" –∫–∞–∫ –ø–æ—Ç–µ–Ω—Ü–∏–∞–ª—å–Ω—ã–µ –∏–º–µ–Ω–æ–≤–∞–Ω–Ω—ã–µ —Å—É—â–Ω–æ—Å—Ç–∏.

2. **–ö–ª–∞—Å—Å–∏—Ñ–∏–∫–∞—Ü–∏—è**: –ü–æ—Å–ª–µ –∏–¥–µ–Ω—Ç–∏—Ñ–∏–∫–∞—Ü–∏–∏ —Å–∏—Å—Ç–µ–º–∞ –∫–ª–∞—Å—Å–∏—Ñ–∏—Ü–∏—Ä—É–µ—Ç –∫–∞–∂–¥—É—é —Å—É—â–Ω–æ—Å—Ç—å –≤ –æ–¥–Ω—É –∏–∑ –ø—Ä–µ–¥–æ–ø—Ä–µ–¥–µ–ª—ë–Ω–Ω—ã—Ö –∫–∞—Ç–µ–≥–æ—Ä–∏–π. –í –ø—Ä–∏–≤–µ–¥—ë–Ω–Ω–æ–º –ø—Ä–∏–º–µ—Ä–µ "–ò–ª–æ–Ω –ú–∞—Å–∫" –º–æ–∂–µ—Ç –±—ã—Ç—å –∫–ª–∞—Å—Å–∏—Ñ–∏—Ü–∏—Ä–æ–≤–∞–Ω –∫–∞–∫ "PERSON" (—á–µ–ª–æ–≤–µ–∫), "SpaceX" –∫–∞–∫ "ORGANIZATION" (–æ—Ä–≥–∞–Ω–∏–∑–∞—Ü–∏—è), –∞ "2002" –∫–∞–∫ "DATE" (–¥–∞—Ç–∞).

3. **–ö–æ–Ω—Ç–µ–∫—Å—Ç—É–∞–ª—å–Ω—ã–π –∞–Ω–∞–ª–∏–∑**: NER-—Å–∏—Å—Ç–µ–º—ã –¥–æ–ª–∂–Ω—ã —É—á–∏—Ç—ã–≤–∞—Ç—å –∫–æ–Ω—Ç–µ–∫—Å—Ç, –ø–æ—Å–∫–æ–ª—å–∫—É –Ω–µ–∫–æ—Ç–æ—Ä—ã–µ —Å–ª–æ–≤–∞ –∏ —Ñ—Ä–∞–∑—ã –º–æ–≥—É—Ç –±—ã—Ç—å –æ–º–æ–Ω–∏–º–∞–º–∏ –∏–ª–∏ –∏–º–µ—Ç—å —Ä–∞–∑–Ω—ã–µ –∑–Ω–∞—á–µ–Ω–∏—è –≤ –∑–∞–≤–∏—Å–∏–º–æ—Å—Ç–∏ –æ—Ç –æ–∫—Ä—É–∂–µ–Ω–∏—è. –ù–∞–ø—Ä–∏–º–µ—Ä, "Apple" –º–æ–∂–µ—Ç –æ–∑–Ω–∞—á–∞—Ç—å –∫–∞–∫ —Ç–µ—Ö–Ω–æ–ª–æ–≥–∏—á–µ—Å–∫—É—é –∫–æ–º–ø–∞–Ω–∏—é, —Ç–∞–∫ –∏ —Ñ—Ä—É–∫—Ç.

NER –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è –≤ —Ä–∞–∑–ª–∏—á–Ω—ã—Ö –ø—Ä–∏–ª–æ–∂–µ–Ω–∏—è—Ö, —Ç–∞–∫–∏—Ö –∫–∞–∫ –∏–∑–≤–ª–µ—á–µ–Ω–∏–µ –∏–Ω—Ñ–æ—Ä–º–∞—Ü–∏–∏, –∞–≤—Ç–æ–º–∞—Ç–∏—á–µ—Å–∫–æ–µ –∞–Ω–Ω–æ—Ç–∏—Ä–æ–≤–∞–Ω–∏–µ —Ç–µ–∫—Å—Ç–æ–≤, —É–ª—É—á—à–µ–Ω–∏–µ –ø–æ–∏—Å–∫–∞ –ø–æ –∫–ª—é—á–µ–≤—ã–º —Å–ª–æ–≤–∞–º –∏ –∞–Ω–∞–ª–∏–∑ —Ç–µ–∫—Å—Ç–æ–≤—ã—Ö –¥–∞–Ω–Ω—ã—Ö –≤ —Ä–µ–∞–ª—å–Ω–æ–º –≤—Ä–µ–º–µ–Ω–∏. –°–æ–≤—Ä–µ–º–µ–Ω–Ω—ã–µ —Å–∏—Å—Ç–µ–º—ã NER —á–∞—Å—Ç–æ —Å—Ç—Ä–æ—è—Ç—Å—è —Å –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–Ω–∏–µ–º –º–∞—à–∏–Ω–Ω–æ–≥–æ –æ–±—É—á–µ–Ω–∏—è –∏ –≥–ª—É–±–æ–∫–æ–≥–æ –æ–±—É—á–µ–Ω–∏—è, —á—Ç–æ –ø–æ–∑–≤–æ–ª—è–µ—Ç –∏–º –¥–æ—Å—Ç–∏–≥–∞—Ç—å –≤—ã—Å–æ–∫–æ–π —Ç–æ—á–Ω–æ—Å—Ç–∏ –≤ —Å–ª–æ–∂–Ω—ã—Ö –∏ –º–Ω–æ–≥–æ–∑–Ω–∞—á–Ω—ã—Ö —Ç–µ–∫—Å—Ç–∞—Ö.


–°–ª–µ–¥—É–µ—Ç –æ—Ç–º–µ—Ç–∏—Ç—å, —á—Ç–æ en_core_web_sm –±–∞–∑–æ–≤–∞—è –º–æ–¥–µ–ª—å –≤ spacy. –í –Ω–∞—à–µ–º —Å–ª—É—á–∞–µ –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è –∏ –≤ POS-tagging –∏ NER. –ë—ã–≤–∞—é—Ç —Å–ª–æ–∂–Ω–µ–µ.

In [None]:
retriever = InMemoryEmbeddingRetriever(document_store=document_store)

In [None]:
# –°–æ–∑–¥–∞–µ–º –ø–∞–π–ø–ª–∞–π–Ω –¥–ª—è –∏–∑–≤–ª–µ—á–µ–Ω–∏—è –æ—Ç–≤–µ—Ç–æ–≤
extractive_qa_pipeline = Pipeline()

In [None]:
class NERReader(ExtractiveReader):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)

    def extract_answers(self, query, documents):
        answers = []
        for doc in documents:
            text = doc.content
            doc_spacy = nlp(text)
            for ent in doc_spacy.ents:
                if ent.text in query:
                    answers.append(ent.text)
        return answers


In [None]:
ner_reader = NERReader()

# –î–æ–±–∞–≤–ª—è–µ–º –∫–æ–º–ø–æ–Ω–µ–Ω—Ç –≤ –ø–∞–π–ø–ª–∞–π–Ω
extractive_qa_pipeline.add_component(instance=ner_reader, name="reader")
extractive_qa_pipeline.add_component(instance=SentenceTransformersTextEmbedder(model=model), name="embedder")
extractive_qa_pipeline.add_component(instance=retriever, name="retriever")

# –°–æ–µ–¥–∏–Ω—è–µ–º –∫–æ–º–ø–æ–Ω–µ–Ω—Ç—ã –≤ –ø–∞–π–ø–ª–∞–π–Ω
extractive_qa_pipeline.connect("embedder.embedding", "retriever.query_embedding")
extractive_qa_pipeline.connect("retriever.documents", "reader.documents")

<haystack.core.pipeline.pipeline.Pipeline object at 0x7b6380613430>
üöÖ Components
  - reader: NERReader
  - embedder: SentenceTransformersTextEmbedder
  - retriever: InMemoryEmbeddingRetriever
üõ§Ô∏è Connections
  - embedder.embedding -> retriever.query_embedding (List[float])
  - retriever.documents -> reader.documents (List[Document])

In [None]:
query = "What are the opening hours of the restaurant Zi Lounge?"
extractive_qa_pipeline.run(
    data={"embedder": {"text": query}, "retriever": {"top_k": 3}, "reader": {"query": query, "top_k": 3}}
)

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

{'reader': {'answers': [ExtractedAnswer(query='What are the opening hours of the restaurant Zi Lounge?', score=0.843254804611206, data='11:00 AM - 2:30 AM', document=Document(id=e8a323a4533b246c32c4f3b40c1c583c7684cad1b757abb5e4bd8b211c473ac4, content: 'Zi Lounge, Restaurant, Bar, International, Seafood, Pizza, Pasta, Meat, Steak, Salads, Cocktails, Al...', score: 26.02480504900634), context=None, document_offset=ExtractedAnswer.Span(start=142, end=160), context_offset=None, meta={}),
   ExtractedAnswer(query='What are the opening hours of the restaurant Zi Lounge?', score=0.8176265358924866, data='Monday - Sunday', document=Document(id=e8a323a4533b246c32c4f3b40c1c583c7684cad1b757abb5e4bd8b211c473ac4, content: 'Zi Lounge, Restaurant, Bar, International, Seafood, Pizza, Pasta, Meat, Steak, Salads, Cocktails, Al...', score: 26.02480504900634), context=None, document_offset=ExtractedAnswer.Span(start=126, end=141), context_offset=None, meta={}),
   ExtractedAnswer(query='What are the open

In [None]:
query = "What are the opening days and hours of the restaurant Zi Lounge?"
extractive_qa_pipeline.run(
    data={"embedder": {"text": query}, "retriever": {"top_k": 3}, "reader": {"query": query, "top_k": 3}}
)

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

{'reader': {'answers': [ExtractedAnswer(query='What are the opening days and hours of the restaurant Zi Lounge?', score=0.8397794961929321, data='Monday - Sunday', document=Document(id=e8a323a4533b246c32c4f3b40c1c583c7684cad1b757abb5e4bd8b211c473ac4, content: 'Zi Lounge, Restaurant, Bar, International, Seafood, Pizza, Pasta, Meat, Steak, Salads, Cocktails, Al...', score: 26.470380126861734), context=None, document_offset=ExtractedAnswer.Span(start=126, end=141), context_offset=None, meta={}),
   ExtractedAnswer(query='What are the opening days and hours of the restaurant Zi Lounge?', score=0.8241560459136963, data='Monday - Saturday', document=Document(id=829cb8c526265bc03477b109eb89acda01db27d62b51c8fa586ca3c63ec56afd, content: 'Zarpe, Bar, Cocktails, Alcohol, LASENORA@ZARPE.BAR, Monday - Saturday 4:00 PM - 12:00 AM, Sunday - C...', score: 21.329905508721964), context=None, document_offset=ExtractedAnswer.Span(start=52, end=69), context_offset=None, meta={}),
   ExtractedAnswer(query=

In [None]:
query = "When is open the restaurant Zi Lounge?"
extractive_qa_pipeline.run(
    data={"embedder": {"text": query}, "retriever": {"top_k": 3}, "reader": {"query": query, "top_k": 3}}
)

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

{'reader': {'answers': [ExtractedAnswer(query='When is open the restaurant Zi Lounge?', score=0.786523699760437, data='Monday - Sunday', document=Document(id=e8a323a4533b246c32c4f3b40c1c583c7684cad1b757abb5e4bd8b211c473ac4, content: 'Zi Lounge, Restaurant, Bar, International, Seafood, Pizza, Pasta, Meat, Steak, Salads, Cocktails, Al...', score: 27.195980742193726), context=None, document_offset=ExtractedAnswer.Span(start=126, end=141), context_offset=None, meta={}),
   ExtractedAnswer(query='When is open the restaurant Zi Lounge?', score=0.7416019439697266, data='Monday - Saturday', document=Document(id=829cb8c526265bc03477b109eb89acda01db27d62b51c8fa586ca3c63ec56afd, content: 'Zarpe, Bar, Cocktails, Alcohol, LASENORA@ZARPE.BAR, Monday - Saturday 4:00 PM - 12:00 AM, Sunday - C...', score: 21.518277971272383), context=None, document_offset=ExtractedAnswer.Span(start=52, end=69), context_offset=None, meta={}),
   ExtractedAnswer(query='When is open the restaurant Zi Lounge?', score=0.687

–ü—Ä–∏–º–µ–Ω–µ–Ω–∏–µ NER —Ç–∞–∫–∂–µ –Ω–µ –æ–ø—Ä–∞–≤–¥–∞–ª–æ –æ–∂–∏–¥–∞–Ω–∏–π. –ú–æ–∂–Ω–æ –∫–æ–Ω–µ—á–Ω–æ —É–ø–æ—Ä–æ—Ç—å—Å—è –≤ –ø–æ–∏—Å–∫ –Ω–∞–≤–æ—Ä–æ—á–µ–Ω–Ω—ã—Ö spacy –º–æ–¥–µ–ª–µ–π, –Ω–æ –º–Ω–µ –∫–∞–∂–µ—Ç—Å—è, –≤ –Ω–∞—à–µ–º —Å–ª—É—á–∞–µ –Ω—É–∂–Ω–æ –∏—Å–ø–æ–ª—å–∑–æ–≤–∞—Ç—å –¥—Ä—É–≥–∏–µ –ø–æ–¥—Ö–æ–¥—ã.

–ü–æ–ø—Ä–æ–±—É–µ–º Dependency Parsing - —ç—Ç–æ —Å–ø–æ—Å–æ–± –∞–Ω–∞–ª–∏–∑–∞ —Å–∏–Ω—Ç–∞–∫—Å–∏—á–µ—Å–∫–æ–π —Å—Ç—Ä—É–∫—Ç—É—Ä—ã –ø—Ä–µ–¥–ª–æ–∂–µ–Ω–∏—è, –∫–æ—Ç–æ—Ä—ã–π –ø–æ–∑–≤–æ–ª—è–µ—Ç –Ω–∞–º –æ–ø—Ä–µ–¥–µ–ª–∏—Ç—å –∑–∞–≤–∏—Å–∏–º–æ—Å—Ç–∏ –º–µ–∂–¥—É —Å–ª–æ–≤–∞–º–∏ –≤ –ø—Ä–µ–¥–ª–æ–∂–µ–Ω–∏–∏.

–ü–æ–ø—Ä–æ–±—É–µ–º —Å–æ–±—Ä–∞—Ç—å –≥–∏–±—Ä–∏–¥ –∏–∑ —Ç–æ–≥–æ, –æ —á–µ–º –≥–æ–≤–æ—Ä–∏–ª–∏ —Ä–∞–Ω–µ–µ. –†–µ—Ç—Ä–∏–≤–µ—Ä + Deberta –≤ –∫–∞—á–µ—Å—Ç–≤–µ Reader.

In [None]:
retriever = InMemoryEmbeddingRetriever(document_store=document_store)

In [None]:
# –°–æ–∑–¥–∞–µ–º –ø–∞–π–ø–ª–∞–π–Ω –¥–ª—è –∏–∑–≤–ª–µ—á–µ–Ω–∏—è –æ—Ç–≤–µ—Ç–æ–≤
extractive_qa_pipeline = Pipeline()

In [None]:
class DependencyParsingReader(ExtractiveReader):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)

    def extract_answers(self, query, documents):
        answers = []
        for doc in documents:
            text = doc.content
            doc_spacy = nlp(text)
            for token in doc_spacy:
                if token.dep_ == "ROOT":
                    root_token = token
                    for child in root_token.children:
                        if child.dep_ == "nsubj":
                            subject_token = child
                            answers.append(subject_token.text)
        return answers

In [None]:
dependency_parsing_reader = DependencyParsingReader()

# –î–æ–±–∞–≤–ª—è–µ–º –∫–æ–º–ø–æ–Ω–µ–Ω—Ç –≤ –ø–∞–π–ø–ª–∞–π–Ω
extractive_qa_pipeline.add_component(instance=dependency_parsing_reader, name="reader")
extractive_qa_pipeline.add_component(instance=SentenceTransformersTextEmbedder(model=model), name="embedder")
extractive_qa_pipeline.add_component(instance=retriever, name="retriever")

# –°–æ–µ–¥–∏–Ω—è–µ–º –∫–æ–º–ø–æ–Ω–µ–Ω—Ç—ã –≤ –ø–∞–π–ø–ª–∞–π–Ω
extractive_qa_pipeline.connect("embedder.embedding", "retriever.query_embedding")
extractive_qa_pipeline.connect("retriever.documents", "reader.documents")

<haystack.core.pipeline.pipeline.Pipeline object at 0x7b63800e6fe0>
üöÖ Components
  - reader: DependencyParsingReader
  - embedder: SentenceTransformersTextEmbedder
  - retriever: InMemoryEmbeddingRetriever
üõ§Ô∏è Connections
  - embedder.embedding -> retriever.query_embedding (List[float])
  - retriever.documents -> reader.documents (List[Document])

In [None]:
query = "What are the opening hours of the restaurant Zi Lounge?"
extractive_qa_pipeline.run(
    data={"embedder": {"text": query}, "retriever": {"top_k": 3}, "reader": {"query": query, "top_k": 3}}
)

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

{'reader': {'answers': [ExtractedAnswer(query='What are the opening hours of the restaurant Zi Lounge?', score=0.843254804611206, data='11:00 AM - 2:30 AM', document=Document(id=e8a323a4533b246c32c4f3b40c1c583c7684cad1b757abb5e4bd8b211c473ac4, content: 'Zi Lounge, Restaurant, Bar, International, Seafood, Pizza, Pasta, Meat, Steak, Salads, Cocktails, Al...', score: 26.02480504900634), context=None, document_offset=ExtractedAnswer.Span(start=142, end=160), context_offset=None, meta={}),
   ExtractedAnswer(query='What are the opening hours of the restaurant Zi Lounge?', score=0.8176265358924866, data='Monday - Sunday', document=Document(id=e8a323a4533b246c32c4f3b40c1c583c7684cad1b757abb5e4bd8b211c473ac4, content: 'Zi Lounge, Restaurant, Bar, International, Seafood, Pizza, Pasta, Meat, Steak, Salads, Cocktails, Al...', score: 26.02480504900634), context=None, document_offset=ExtractedAnswer.Span(start=126, end=141), context_offset=None, meta={}),
   ExtractedAnswer(query='What are the open

In [None]:
query = "What are the opening days and hours of the restaurant Zi Lounge?"
extractive_qa_pipeline.run(
    data={"embedder": {"text": query}, "retriever": {"top_k": 3}, "reader": {"query": query, "top_k": 3}}
)

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

{'reader': {'answers': [ExtractedAnswer(query='What are the opening days and hours of the restaurant Zi Lounge?', score=0.8397794961929321, data='Monday - Sunday', document=Document(id=e8a323a4533b246c32c4f3b40c1c583c7684cad1b757abb5e4bd8b211c473ac4, content: 'Zi Lounge, Restaurant, Bar, International, Seafood, Pizza, Pasta, Meat, Steak, Salads, Cocktails, Al...', score: 26.470380126861734), context=None, document_offset=ExtractedAnswer.Span(start=126, end=141), context_offset=None, meta={}),
   ExtractedAnswer(query='What are the opening days and hours of the restaurant Zi Lounge?', score=0.8241560459136963, data='Monday - Saturday', document=Document(id=829cb8c526265bc03477b109eb89acda01db27d62b51c8fa586ca3c63ec56afd, content: 'Zarpe, Bar, Cocktails, Alcohol, LASENORA@ZARPE.BAR, Monday - Saturday 4:00 PM - 12:00 AM, Sunday - C...', score: 21.329905508721964), context=None, document_offset=ExtractedAnswer.Span(start=52, end=69), context_offset=None, meta={}),
   ExtractedAnswer(query=

In [None]:
query = "When is open the restaurant Zi Lounge?"
extractive_qa_pipeline.run(
    data={"embedder": {"text": query}, "retriever": {"top_k": 3}, "reader": {"query": query, "top_k": 3}}
)

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

{'reader': {'answers': [ExtractedAnswer(query='When is open the restaurant Zi Lounge?', score=0.786523699760437, data='Monday - Sunday', document=Document(id=e8a323a4533b246c32c4f3b40c1c583c7684cad1b757abb5e4bd8b211c473ac4, content: 'Zi Lounge, Restaurant, Bar, International, Seafood, Pizza, Pasta, Meat, Steak, Salads, Cocktails, Al...', score: 27.195980742193726), context=None, document_offset=ExtractedAnswer.Span(start=126, end=141), context_offset=None, meta={}),
   ExtractedAnswer(query='When is open the restaurant Zi Lounge?', score=0.7416019439697266, data='Monday - Saturday', document=Document(id=829cb8c526265bc03477b109eb89acda01db27d62b51c8fa586ca3c63ec56afd, content: 'Zarpe, Bar, Cocktails, Alcohol, LASENORA@ZARPE.BAR, Monday - Saturday 4:00 PM - 12:00 AM, Sunday - C...', score: 21.518277971272383), context=None, document_offset=ExtractedAnswer.Span(start=52, end=69), context_offset=None, meta={}),
   ExtractedAnswer(query='When is open the restaurant Zi Lounge?', score=0.687

Dependency Parsing —Ç–∞–∫–∂–µ –Ω–µ –æ–ø—Ä–∞–≤–¥–∞–ª –æ–∂–∏–¥–∞–Ω–∏–π.

–ú—ã —Ç–∞–∫–∂–µ –º–æ–∂–µ–º –ø–æ–ø—Ä–æ–±–æ–≤–∞—Ç—å –∏—Å–ø–æ–ª—å–∑–æ–≤–∞—Ç—å –±–æ–ª–µ–µ —Å–ª–æ–∂–Ω—ã–µ –∞–ª–≥–æ—Ä–∏—Ç–º—ã –∏–∑–≤–ª–µ—á–µ–Ω–∏—è –æ—Ç–≤–µ—Ç–æ–≤, —Ç–∞–∫–∏–µ –∫–∞–∫ Span-based QA –∏–ª–∏ Sequence-to-sequence QA.

–°–º—ã—Å–ª–∞ –∏—Å–ø–æ–ª—å–∑–æ–≤–∞—Ç—å Span-based QA –Ω–µ—Ç, —Ç–∞–∫ –∫–∞–∫ –∞–ª–≥–æ—Ä–∏—Ç–º —Ö–æ—Ä–æ—à–æ —Ä–∞–±–æ—Ç–∞–µ—Ç, —Ç–æ–ª—å–∫–æ –µ—Å–ª–∏ –≤ —Ç–µ–∫—Å—Ç–µ –æ—Ç–≤–µ—Ç –Ω–∞–ø—Ä—è–º—É—é –ø—Ä–∏—Å—É—Ç—Å—Ç–≤—É–µ—Ç –≤ —Ç–µ–∫—Å—Ç–µ.

In [None]:
# pip install farm-haystack

In [None]:
# from haystack.nodes import T5Reader
# from transformers import T5Tokenizer, T5ForConditionalGeneration

In [None]:
from haystack.components.retrievers.in_memory import InMemoryBM25Retriever

In [None]:
# –°–æ–∑–¥–∞–µ–º DocumentStore
document_store = InMemoryDocumentStore()

# –î–æ–±–∞–≤–ª—è–µ–º –¥–æ–∫—É–º–µ–Ω—Ç—ã –≤ DocumentStore
document_store.write_documents(documents)

# –°–æ–∑–¥–∞–µ–º InMemoryEmbeddingRetriever
retriever = InMemoryEmbeddingRetriever(document_store=document_store)

In [None]:
qa_pipeline = Pipeline()

In [None]:
retriever = InMemoryBM25Retriever(document_store=document_store)
qa_pipeline.add_component(instance=retriever, name="retriever")

In [None]:
qa_pipeline.add_component(instance=ExtractiveReader(model="deepset/deberta-v3-base-squad2"), name="reader")

In [None]:
# qa_pipeline.add_component(instance=InMemoryBM25Retriever(document_store=document_store), name="retriever")
# qa_pipeline.add_component(instance=ExtractiveReader(model="deepset/deberta-v3-base-squad2"), name="reader")

In [None]:
qa_pipeline.connect("retriever", "reader")

<haystack.core.pipeline.pipeline.Pipeline object at 0x780b17860640>
üöÖ Components
  - retriever: InMemoryBM25Retriever
  - reader: ExtractiveReader
üõ§Ô∏è Connections
  - retriever.documents -> reader.documents (List[Document])

In [None]:
query = "What are the opening hours of the restaurant Zi Lounge?"
qa_pipeline.run(
    data={"retriever": {"query": query, "top_k": 3}, "reader": {"query": query, "top_k": 3}}
)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


{'reader': {'answers': [ExtractedAnswer(query='What are the opening hours of the restaurant Zi Lounge?', score=0.8549867868423462, data=' Monday - Sunday 11:00 AM - 2:30 AM', document=Document(id=e8a323a4533b246c32c4f3b40c1c583c7684cad1b757abb5e4bd8b211c473ac4, content: 'Zi Lounge, Restaurant, Bar, International, Seafood, Pizza, Pasta, Meat, Steak, Salads, Cocktails, Al...', score: 25.75866793740871), context=None, document_offset=ExtractedAnswer.Span(start=125, end=160), context_offset=None, meta={}),
   ExtractedAnswer(query='What are the opening hours of the restaurant Zi Lounge?', score=0.6113113164901733, data=' Monday - Saturday 4:00 PM - 12:00 AM, Sunday - Closed', document=Document(id=829cb8c526265bc03477b109eb89acda01db27d62b51c8fa586ca3c63ec56afd, content: 'Zarpe, Bar, Cocktails, Alcohol, LASENORA@ZARPE.BAR, Monday - Saturday 4:00 PM - 12:00 AM, Sunday - C...', score: 20.38204179149279), context=None, document_offset=ExtractedAnswer.Span(start=51, end=105), context_offset=Non

In [None]:
query = "What are the opening days and hours of the restaurant Zi Lounge?"
qa_pipeline.run(
    data={"retriever": {"query": query, "top_k": 3}, "reader": {"query": query, "top_k": 3}}
)

{'reader': {'answers': [ExtractedAnswer(query='What are the opening days and hours of the restaurant Zi Lounge?', score=0.8552565574645996, data=' Monday - Sunday 11:00 AM - 2:30 AM', document=Document(id=e8a323a4533b246c32c4f3b40c1c583c7684cad1b757abb5e4bd8b211c473ac4, content: 'Zi Lounge, Restaurant, Bar, International, Seafood, Pizza, Pasta, Meat, Steak, Salads, Cocktails, Al...', score: 25.88915923886433), context=None, document_offset=ExtractedAnswer.Span(start=125, end=160), context_offset=None, meta={}),
   ExtractedAnswer(query='What are the opening days and hours of the restaurant Zi Lounge?', score=0.6460022926330566, data=' Monday - Saturday 4:00 PM - 12:00 AM, Sunday - Closed', document=Document(id=829cb8c526265bc03477b109eb89acda01db27d62b51c8fa586ca3c63ec56afd, content: 'Zarpe, Bar, Cocktails, Alcohol, LASENORA@ZARPE.BAR, Monday - Saturday 4:00 PM - 12:00 AM, Sunday - C...', score: 20.497650393518462), context=None, document_offset=ExtractedAnswer.Span(start=51, end=105),

In [None]:
query = "When is open the restaurant Zi Lounge?"
qa_pipeline.run(
    data={"retriever": {"query": query, "top_k": 3}, "reader": {"query": query, "top_k": 3}}
)

{'reader': {'answers': [ExtractedAnswer(query='When is open the restaurant Zi Lounge?', score=0.757442057132721, data=' Monday - Sunday 11:00 AM - 2:30 AM', document=Document(id=e8a323a4533b246c32c4f3b40c1c583c7684cad1b757abb5e4bd8b211c473ac4, content: 'Zi Lounge, Restaurant, Bar, International, Seafood, Pizza, Pasta, Meat, Steak, Salads, Cocktails, Al...', score: 19.801461834952015), context=None, document_offset=ExtractedAnswer.Span(start=125, end=160), context_offset=None, meta={}),
   ExtractedAnswer(query='When is open the restaurant Zi Lounge?', score=0.48066219687461853, data=' Monday - Saturday 4:00 PM - 12:00 AM, Sunday - Closed', document=Document(id=829cb8c526265bc03477b109eb89acda01db27d62b51c8fa586ca3c63ec56afd, content: 'Zarpe, Bar, Cocktails, Alcohol, LASENORA@ZARPE.BAR, Monday - Saturday 4:00 PM - 12:00 AM, Sunday - C...', score: 16.35872539077387), context=None, document_offset=ExtractedAnswer.Span(start=51, end=105), context_offset=None, meta={}),
   ExtractedAnswer(q