Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions src/ragas/testset/docstore.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
from abc import ABC, abstractmethod
from dataclasses import dataclass, field
from enum import Enum
from random import choices

import numpy as np
import numpy.typing as npt
Expand All @@ -17,13 +16,13 @@

from ragas.embeddings.base import BaseRagasEmbeddings
from ragas.executor import Executor
from ragas.testset.utils import rng

if t.TYPE_CHECKING:
from llama_index.readers.schema import Document as LlamaindexDocument

Embedding = t.Union[t.List[float], npt.NDArray[np.float64]]
logger = logging.getLogger(__name__)
rng = np.random.default_rng()


class Document(LCDocument):
Expand Down Expand Up @@ -243,7 +242,7 @@ def get_document(self, doc_id: str) -> Node:
raise NotImplementedError

def get_random_nodes(self, k=1) -> t.List[Node]:
return choices(self.nodes, k=k)
return rng.choice(np.array(self.nodes), size=k).tolist()

def get_similar(
self, node: Node, threshold: float = 0.7, top_k: int = 3
Expand Down
22 changes: 21 additions & 1 deletion src/ragas/testset/evolutions.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,14 @@
from numpy.random import default_rng

from ragas.llms import BaseRagasLLM
from ragas.llms.json_load import json_loader
from ragas.llms.prompt import Prompt
from ragas.testset.docstore import Direction, DocumentStore, Node
from ragas.testset.filters import EvolutionFilter, NodeFilter, QuestionFilter
from ragas.testset.prompts import (
compress_question_prompt,
conditional_question_prompt,
find_relevent_context_prompt,
multi_context_question_prompt,
question_answer_prompt,
reasoning_question_prompt,
Expand Down Expand Up @@ -139,7 +141,25 @@ def generate_datarow(
):
assert self.generator_llm is not None, "generator_llm cannot be None"

merged_nodes = self.merge_nodes(current_nodes)
node_content = [
f"{i}\t{n.page_content}" for i, n in enumerate(current_nodes.nodes)
]
results = self.generator_llm.generate_text(
prompt=find_relevent_context_prompt.format(
question=question, contexts=node_content
)
)
relevant_context_indices = json_loader.safe_load(
results.generations[0][0].text.strip(), llm=self.generator_llm
).get("relevant_context", None)
if relevant_context_indices is None:
relevant_context = CurrentNodes(
root_node=current_nodes.root_node, nodes=current_nodes.nodes
)
else:
relevant_context = current_nodes

merged_nodes = self.merge_nodes(relevant_context)
results = self.generator_llm.generate_text(
prompt=question_answer_prompt.format(
question=question, context=merged_nodes.page_content
Expand Down
32 changes: 32 additions & 0 deletions src/ragas/testset/prompts.py
Original file line number Diff line number Diff line change
Expand Up @@ -297,3 +297,35 @@
output_type="string",
language="english",
)


find_relevent_context_prompt = Prompt(
name="find_relevent_context",
instruction="Given a question and set of contexts, find the most relevant contexts to answer the question.",
examples=[
{
"question": "What is the capital of France?",
"contexts": [
"1. France is a country in Western Europe. It has several cities, including Paris, Lyon, and Marseille. Paris is not only known for its cultural landmarks like the Eiffel Tower and the Louvre Museum but also as the administrative center.",
"2. The capital of France is Paris. It is also the most populous city in France, with a population of over 2 million people. Paris is known for its cultural landmarks like the Eiffel Tower and the Louvre Museum.",
"3. Paris is the capital of France. It is also the most populous city in France, with a population of over 2 million people. Paris is known for its cultural landmarks like the Eiffel Tower and the Louvre Museum.",
],
"output": {
"relevent_contexts": [1, 2],
},
},
{
"question": "How does caffeine affect the body and what are its common sources?",
"contexts": [
"1. Caffeine is a central nervous system stimulant. It can temporarily ward off drowsiness and restore alertness. It primarily affects the brain, where it alters the function of neurotransmitters.",
"2. Regular physical activity is essential for maintaining good health. It can help control weight, combat health conditions, boost energy, and promote better sleep.",
"3. Common sources of caffeine include coffee, tea, cola, and energy drinks. These beverages are consumed worldwide and are known for providing a quick boost of energy.",
],
"output": {"relevant_contexts": [1, 2]},
},
],
input_keys=["question", "contexts"],
output_key="output",
output_type="json",
language="english",
)
4 changes: 4 additions & 0 deletions src/ragas/testset/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@
import re
import warnings

import numpy as np

rng = np.random.default_rng(seed=42)


def load_as_score(text):
"""
Expand Down