In [None]:
import time
from talk_rag_scheduler.indexing_pipeline import (
    ingest_knowledge_and_run_indexing,
)
from talk_rag_scheduler.rag_pipeline_concurrent import (
    CONCURRENT_KEYWORD_RAG_PIPELINE_IMG,
    CONCURRENT_SEMANTIC_RAG_PIPELINE_IMG,
    list_concurrent_replies,
    prettify_concurrent_rag_info,
    create_concurrent_rag_pipeline,
)
from talk_rag_scheduler.const import (
    OLLAMA_MODEL,
    NUM_PREDICT,
    TOP_K,
    OLLAMA_EMBEDDING_MODEL,
)
from haystack import Pipeline
from IPython.display import Markdown, Image
from haystack.document_stores.in_memory import InMemoryDocumentStore

## Concurrent RAG with Keyword search

In [None]:
document_store = InMemoryDocumentStore()
indexing_pipeline = ingest_knowledge_and_run_indexing(document_store)

In [None]:
concurrent_rag_pipeline: Pipeline = create_concurrent_rag_pipeline(
    document_store=document_store, n_concurrent=4, ollama_model=OLLAMA_MODEL
)
Image(CONCURRENT_KEYWORD_RAG_PIPELINE_IMG)

In [None]:
query = "How can I schedule an asynchronous job using the scheduler python library?"

t_start = time.perf_counter()
results = concurrent_rag_pipeline.run(
    {
        "retriever": {"query": query, "top_k": TOP_K},
        "prompt_builder": {"query": query},
        "llms": {"generation_kwargs": {"num_predict": NUM_PREDICT}},
    }
)
t_total = time.perf_counter() - t_start
results["t_total"] = t_total

In [None]:
res_gen = (res[0] for res in list_concurrent_replies(results))

In [None]:
res = next(res_gen)
Markdown(res)

In [None]:
print(prettify_concurrent_rag_info(results))

## Concurrent RAG with Semantic search

In [None]:
semantic_document_store = InMemoryDocumentStore()
semantic_indexing_pipeline = ingest_knowledge_and_run_indexing(
    semantic_document_store,
    ollama_embedding_model="mxbai-embed-large",
    indexing_mode="semantic_split",
    split_by="passage",
    split_length=16,
    split_overlap=4,
    split_threshold=12,
)

In [None]:
concurrent_rag_pipeline: Pipeline = create_concurrent_rag_pipeline(
    document_store=semantic_document_store,
    n_concurrent=4,
    ollama_model=OLLAMA_MODEL,
    indexing_mode="semantic_split",
    ollama_embedding_model=OLLAMA_EMBEDDING_MODEL,
)
query = "How can I schedule an asynchronous job using the scheduler python library?"
t_start = time.perf_counter()
semantic_results = concurrent_rag_pipeline.run(
    {
        "text_embedder": {"text": query},
        "retriever": {"top_k": TOP_K},
        "prompt_builder": {"query": query},
        "llms": {"generation_kwargs": {"num_predict": NUM_PREDICT}},
    }
)
t_total = time.perf_counter() - t_start
semantic_results["t_total"] = t_total
Image(CONCURRENT_SEMANTIC_RAG_PIPELINE_IMG)

In [None]:
semantic_res_gen = (res[0] for res in list_concurrent_replies(semantic_results))

In [None]:
res = next(semantic_res_gen)
Markdown(res)

In [None]:
print(prettify_concurrent_rag_info(semantic_results))