In [None]:
from talk_rag_scheduler.retrieval_pipeline import (
    prettify_document_info,
    RETRIEVAL_VECTOR_PIPE_IMG,
    create_retrieval_embedding_pipeline,
)
from talk_rag_scheduler.indexing_pipeline import (
    ingest_knowledge_and_run_indexing,
)
from haystack.document_stores.in_memory import InMemoryDocumentStore
from IPython.display import Markdown, Image

In [None]:
document_store = InMemoryDocumentStore()
ingest_knowledge_and_run_indexing(
    document_store, indexing_mode="semantic", ollama_embedding_model="mxbai-embed-large"
)

In [None]:
retrieval_embedding_pipeline = create_retrieval_embedding_pipeline(document_store)
Image(RETRIEVAL_VECTOR_PIPE_IMG)

In [None]:
results = retrieval_embedding_pipeline.run(
    {
        "text": "How can I schedule an asynchronous job using the scheduler python library?",
        "top_k": 5,
    }
)
for document in results["retriever"]["documents"]:
    print(prettify_document_info(document))

In [None]:
doc_gen = (document for document in results["retriever"]["documents"])

In [None]:
document = next(doc_gen)
print(prettify_document_info(document))
Markdown(f"{document.content}")

## Improvement Options

* everything that applies to the indexing step also applies here
* consider using 2-stage retrieval, hybrid retrieval, multi-vector retrieval or reranking