In [2]:
import logging
from uuid import uuid4

import pandas as pd
from tqdm import tqdm
from langchain_community.document_loaders import PyPDFLoader
from plum_chatbot.agents.agents import invoke_chatbot
from plum_chatbot.datasources.qdrant_datasource import QdrantDatasource
from plum_chatbot.di_containers.datasources_containers import Container, container
from plum_chatbot.schemas.schema import ChatMessage, UserInput
from pathlib import Path
from langchain.chat_models import init_chat_model

In [3]:
logger = logging.getLogger(__name__)
agent_id = "rag"
n_questions = 20

In [4]:
loader = PyPDFLoader(Path("../data/pdfs/pdf24_merged.pdf"))
content = ""
async for page in loader.alazy_load():
    content += page.page_content + "\n"

In [5]:
llm = init_chat_model(
    "llama3.2",  # qwen3:8b
    model_provider="ollama",
    base_url="host.docker.internal",
    temperature=0.5,
)

In [6]:
container.init_resources()
for k, v in Container.providers.items():
    provider = v()
    if isinstance(provider, QdrantDatasource):
        provider.setup()
    logger.info(f"Dependency {k} initialized with {v}")

  self.client = QdrantClient(


In [8]:
golden_set = pd.read_csv("data/generated_golden_set.csv")

In [9]:
answers = []

for q in tqdm(golden_set["question"]):
    # print(q)
    user_input: UserInput = UserInput(message=q, thread_id=str(uuid4()))
    answer: ChatMessage = await invoke_chatbot(user_input, agent_id)
    answers.append(answer.content)
    # print(answer.content)
    # print()
    # print()

100%|██████████| 19/19 [01:09<00:00,  3.66s/it]


In [10]:
golden_set["answers"] = answers
golden_set.to_csv("data/generated_golden_set_with_answers.csv", index=False)