In [None]:
import nest_asyncio
from r2r import R2RClient, R2RException

nest_asyncio.apply()

client = R2RClient(
    base_url='http://localhost:7272',
    timeout=600
)

In [2]:
with open(file="./questions.txt", mode="r", encoding="utf-8") as f:
    questions = f.readlines()
    questions = [q.strip() for q in questions]

In [3]:
search_settings = {
    "use_semantic_search": True,
    "limit": 3,
    "offset": 0,
    "include_metadatas": True,
    "include_scores": True,
    "search_strategy": "vanilla",
}

rag_generation_config = {
    "temperature": 0.1,
    "top_p": 1,
    "max_tokens_to_sample": 2048
}

In [None]:
import re
import ollama
import pandas as pd

# User question, context retrieved, actual answer, and LLM answer
df = pd.DataFrame(columns=["question", "retrieved_context", "generated_response", "reference_answer"])

for i, q in enumerate(questions):
    try:
        # Submit a user query
        resp = client.retrieval.rag(
            query = q,
            search_mode = "custom",
            search_settings = search_settings,
            rag_generation_config = rag_generation_config,
            include_title_if_available=True
        ).results
        
        # After getting the response summarize the context
        full_ctx = "\n".join([re.sub(r"\n+", "\n", chunk.text) for chunk in resp.search_results.chunk_search_results])
        
        # Generate the summary by using the LLM
        summary_ctx = ollama.generate(
            model="llama3.1",
            prompt = f"Please summarize the following text very concisely:\n\n{full_ctx}",
            options = {
                "temperature": 0.1
            }
        )['response']
        
        llm_asnwer = resp.completion

        # Save on each iteration
        df.loc[len(df)] = [q, summary_ctx, llm_asnwer, None]
        
        print(f"Question {i+1} out of {len(questions)}")
    except R2RException as r2re:
        print(f"Skipping {i+1} because of {str(r2re)}")
    except ollama.ResponseError | ollama.RequestError as oe:
        print(f"Skipping {i+1} because of {str(oe)}")
    except Exception as e:
        print(f"Skipping {i+1} because of {str(e)}")
    
# Finally, save data to disk
df.to_csv("dataset.csv", index=False)


Question 1 out of 30
Question 2 out of 30
Question 3 out of 30
Question 4 out of 30
Question 5 out of 30
Question 6 out of 30
