From 264b33cddf6bb90d52622af4ddd92734c6bd0333 Mon Sep 17 00:00:00 2001 From: Shahules786 Date: Thu, 18 Jan 2024 16:29:40 -0800 Subject: [PATCH 1/8] converted prompts to Prompt --- src/ragas/llms/prompt.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ragas/llms/prompt.py b/src/ragas/llms/prompt.py index 666ff5552..fddcc16dc 100644 --- a/src/ragas/llms/prompt.py +++ b/src/ragas/llms/prompt.py @@ -49,7 +49,7 @@ class Prompt(BaseModel): input_keys: t.List[str] output_key: str output_type: str = "json" - language = "en" + language = "english" @root_validator def validate_prompt(cls, values: t.Dict[str, t.Any]) -> t.Dict[str, t.Any]: From 4f292fdabb9df706157a21edc6fc53b0da151fb6 Mon Sep 17 00:00:00 2001 From: Shahules786 Date: Thu, 18 Jan 2024 16:30:09 -0800 Subject: [PATCH 2/8] Converted to Prompt Object --- src/ragas/testset/evolutions.py | 44 +-- src/ragas/testset/prompts.py | 554 ++++++++++++-------------------- 2 files changed, 212 insertions(+), 386 deletions(-) diff --git a/src/ragas/testset/evolutions.py b/src/ragas/testset/evolutions.py index 6bd5f6979..64828b7a3 100644 --- a/src/ragas/testset/evolutions.py +++ b/src/ragas/testset/evolutions.py @@ -5,30 +5,22 @@ from random import choice from fsspec.exceptions import asyncio -from langchain.prompts import ChatPromptTemplate from numpy.random import default_rng from ragas.llms import BaseRagasLLM from ragas.llms.json_load import load_as_json -from ragas.llms.prompt import PromptValue from ragas.testset.docstore import Direction, Document, DocumentStore, Node from ragas.testset.prompts import ( FILTER_QUESTION, MULTICONTEXT_QUESTION, SCORE_CONTEXT, SEED_QUESTION, - TABLE_QA, - demonstrations, ) rng = default_rng() logger = logging.getLogger(__name__) -def to_pv(prompt: ChatPromptTemplate) -> PromptValue: - return PromptValue(prompt_str=prompt.format()) - - @dataclass class Filter(ABC): ... @@ -43,9 +35,8 @@ def filter(self, node: Node) -> t.Dict: return asyncio.get_event_loop().run_until_complete(self.afilter(node)) async def afilter(self, node: Node) -> t.Dict: - human_prompt = SCORE_CONTEXT.format(context=node.page_content) - prompt = ChatPromptTemplate.from_messages([human_prompt]) - results = await self.llm.agenerate_text(prompt=to_pv(prompt)) + prompt = SCORE_CONTEXT.format(context=node.page_content) + results = await self.llm.agenerate_text(prompt=prompt) output = results.generations[0][0].text.strip() score = load_as_json(output) score.update({"score": score.get("score", 0) >= self.threshold}) @@ -60,10 +51,8 @@ def filter(self, question: str) -> bool: return asyncio.get_event_loop().run_until_complete(self.afilter(question)) async def afilter(self, question: str) -> bool: - human_prompt = FILTER_QUESTION.format(question=question) - prompt = ChatPromptTemplate.from_messages([human_prompt]) - - results = await self.llm.agenerate_text(prompt=to_pv(prompt)) + prompt = FILTER_QUESTION.format(question=question) + results = await self.llm.agenerate_text(prompt=prompt) results = results.generations[0][0].text.strip() json_results = load_as_json(results) logger.debug("filtered question: %s", json_results) @@ -161,25 +150,9 @@ async def aevolve(self, llm: BaseRagasLLM, docstore: DocumentStore): async def simple_evolution( llm: BaseRagasLLM, seed_doc: Document, is_table_present: bool = False ): - if is_table_present: - human_prompt = TABLE_QA.format(context=seed_doc.page_content) - else: - sample = rng.choice(demonstrations, 1)[0] # type: ignore - questions = rng.choice(sample["questions"], 2, replace=False) - questions = ( - "{" - + str({k: v for dic in questions.tolist() for k, v in dic.items()}).replace( - "'", '"' - ) - + "}" - ) - demo = f'Context:{sample["context"]}\nQuestions:{questions}' - human_prompt = SEED_QUESTION.format( - demonstration=demo, context=seed_doc.page_content - ) - prompt = ChatPromptTemplate.from_messages([human_prompt]) - results = llm.generate_text_with_hmpt(prompts=[prompt]) + prompt = SEED_QUESTION.format(context=seed_doc.page_content) + results = llm.generate_text(prompt=prompt) results = results.generations[0][0].text if is_table_present: return [results] @@ -194,10 +167,9 @@ async def multi_context_evolution( question = simple_evolution(llm, seed_node) print(question) similar_context = doc_store.get_similar(seed_node)[0] - human_prompt = MULTICONTEXT_QUESTION.format( + prompt = MULTICONTEXT_QUESTION.format( question=question, context1=seed_node.page_content, context2=similar_context ) - prompt = ChatPromptTemplate.from_messages([human_prompt]) - results = await llm.agenerate_text(prompt=to_pv(prompt)) + results = await llm.agenerate_text(prompt=prompt) question = results.generations[0][0].text.strip() return question diff --git a/src/ragas/testset/prompts.py b/src/ragas/testset/prompts.py index 04fe50a54..1b7c01c41 100644 --- a/src/ragas/testset/prompts.py +++ b/src/ragas/testset/prompts.py @@ -1,379 +1,233 @@ -from langchain.prompts import HumanMessagePromptTemplate +from ragas.llms.prompt import Prompt -SEED_QUESTION = HumanMessagePromptTemplate.from_template( - """ -Generate two questions from given context satisfying the rules given below: +SEED_QUESTION = Prompt( + name="seed_question", + instruction="""Generate two questions from given context satisfying the rules given below: 2.The question should be framed such that it must be clearly understood without providing context. - 3.The question should be fully answerable from information present in given context. - - -{demonstration} - - -Context: -{context} -Questions:""" # noqa: E501 + 3.The question should be fully answerable from information present in given context.""", + examples=[{ + "context": "The Eiffel Tower in Paris was originally intended as a temporary structure, built for the 1889 World's Fair. It was almost dismantled in 1909 but was saved because it was repurposed as a giant radio antenna.", + "output":"Who built the Eiffel Tower?", + }, + ], + input_keys=["context"], + output_key="output", + output_type="string", + language="english", ) -TABLE_QA = HumanMessagePromptTemplate.from_template( - """ -Frame a question from the given table following the rules given below - - Do no use phrases like 'provided context','provided table' etc in the question - -Context: -Table 2: Local Library Statistics - -Month New Memberships Books Loaned eBooks Downloaded -January 150 1200 950 -February 120 1100 1000 -March 200 1400 1100 -Framed Question from Table: How many books were loaned in January? - -Context: -{context} - -Framed Question from Table:""" # noqa: E501 +REASONING_QUESTION = Prompt( + name="reasoning_question", + instruction="""Complicate the given question by rewriting question into a multi-hop reasoning question based on the provided context. + Answering the question should require the reader to make multiple logical connections or inferences using the information available in given context. + Rules to follow when rewriting question: + 1. Ensure that the rewritten question can be answered entirely from the information present in the contexts. + 2. Do not frame questions that contains more than 15 words. Use abbreviation wherever possible. + 3. Make sure the question is clear and unambiguous. + 4. phrases like 'based on the provided context','according to the context',etc are not allowed to appear in the question.""", + examples=[{ + "question":"What is the capital of France?", + "context":"France is a country in Western Europe. It has several cities, including Paris, Lyon, and Marseille. Paris is not only known for its cultural landmarks like the Eiffel Tower and the Louvre Museum but also as the administrative center.", + "output":"Linking the Eiffel Tower and administrative center, which city stands as both?" + },{ + "question":"What does the append() method do in Python?", + "context":"In Python, lists are used to store multiple items in a single variable. Lists are one of 4 built-in data types used to store collections of data. The append() method adds a single item to the end of a list.", + "output":"If a list represents a variable collection, what method extends it by one item?" + }], + input_keys=["question", "context"], + output_key="output", + output_type="string", + language="english", ) -REASONING_QUESTION = HumanMessagePromptTemplate.from_template( - """\ -Complicate the given question by rewriting question into a multi-hop reasoning question based on the provided context. -Answering the question should require the reader to make multiple logical connections or inferences using the information available in given context. -Rules to follow when rewriting question: -1. Ensure that the rewritten question can be answered entirely from the information present in the contexts. -2. Do not frame questions that contains more than 15 words. Use abbreviation wherever possible. -3. Make sure the question is clear and unambiguous. -4. phrases like 'based on the provided context','according to the context',etc are not allowed to appear in the question. - -Initial Question: -What is the capital of France? - -Given Context: -France is a country in Western Europe. It has several cities, including Paris, Lyon, and Marseille. Paris is not only known for its cultural landmarks like the Eiffel Tower and the Louvre Museum but also as the administrative center. - -Complicated Multi-Hop Question: -Linking the Eiffel Tower and administrative center, which city stands as both? - -Initial Question: -What does the append() method do in Python? - -Given Context: -In Python, lists are used to store multiple items in a single variable. Lists are one of 4 built-in data types used to store collections of data. The append() method adds a single item to the end of a list. - -Complicated Multi-Hop Question: -If a list represents a variable collection, what method extends it by one item? - -Initial Question: -{question} -Given Context: -{context} - -Complicated Multi-Hop Question -""" # noqa: E501 +MULTI_CONTEXT_QUESTION = Prompt( + name="multi_context_question", + instruction=""" + The task is to rewrite and complicate the given question in a way that answering it requires information derived from both context1 and context2. + Follow the rules given below while rewriting the question. + 1. The rewritten question should not be very long. Use abbreviation wherever possible. + 2. The rewritten question must be reasonable and must be understood and responded by humans. + 3. The rewritten question must be fully answerable from information present in context1 and context2. + 4. Read and understand both contexts and rewrite the question so that answering requires insight from both context1 and context2. + 5. phrases like 'based on the provided context','according to the context?',etc are not allowed to appear in the question.""", + examples=[{ + "question":"What process turns plants green?", + "context1":"Chlorophyll is the pigment that gives plants their green color and helps them photosynthesize.", + "context2":"Photosynthesis in plants typically occurs in the leaves where chloroplasts are concentrated.", + "output":"In which plant structures does the pigment responsible for their verdancy facilitate energy production?" + },{ + "question":"How do you calculate the area of a rectangle?", + "context1":"The area of a shape is calculated based on the shape's dimensions. For rectangles, this involves multiplying the length and width.", + "context2":"Rectangles have four sides with opposite sides being equal in length. They are a type of quadrilateral.", + "output":"What multiplication involving equal opposites yields a quadrilateral's area?" + }], + input_keys=["question", "context1", "context2"], + output_key="output", + output_type="string", + language="english", ) -MULTICONTEXT_QUESTION = HumanMessagePromptTemplate.from_template( - """\ -The task is to rewrite and complicate the given question in a way that answering it requires information derived from both context1 and context2. -Follow the rules given below while rewriting the question. - 1. The rewritten question should not be very long. Use abbreviation wherever possible. - 2. The rewritten question must be reasonable and must be understood and responded by humans. - 3. The rewritten question must be fully answerable from information present in context1 and context2. - 4. Read and understand both contexts and rewrite the question so that answering requires insight from both context1 and context2. - 5. phrases like 'based on the provided context','according to the context?',etc are not allowed to appear in the question. - -Initial Question: -What process turns plants green? - -Context1: -Chlorophyll is the pigment that gives plants their green color and helps them photosynthesize. - -Context2: -Photosynthesis in plants typically occurs in the leaves where chloroplasts are concentrated. - -Complicated Multi-Hop Question: -In which plant structures does the pigment responsible for their verdancy facilitate energy production? - -Initial Question: -How do you calculate the area of a rectangle? - -Context1: -The area of a shape is calculated based on the shape's dimensions. For rectangles, this involves multiplying the length and width. - -Context2: -Rectangles have four sides with opposite sides being equal in length. They are a type of quadrilateral. - -Complicated Multi-Hop Question: -What multiplication involving equal opposites yields a quadrilateral's area? - - -Initial Question: -{question} -context1: -{context1} -context2: -{context2} -Complicated Multi-Hop Question: -""" # noqa: E501 +CONDITIONAL_QUESTION = Prompt( + name="conditional_question", + instruction="""Rewrite the provided question to increase its complexity by introducing a conditional element. + The goal is to make the question more intricate by incorporating a scenario or condition that affects the context of the question. + Follow the rules given below while rewriting the question. + 1. The rewritten question should not be longer than 25 words. Use abbreviation wherever possible. + 2. The rewritten question must be reasonable and must be understood and responded by humans. + 3. The rewritten question must be fully answerable from information present context. + 4. phrases like 'provided context','according to the context?',etc are not allowed to appear in the question.""", + examples=[{ + "question":"What is the function of the roots of a plant?", + "context":"The roots of a plant absorb water and nutrients from the soil, anchor the plant in the ground, and store food.", + "output":"What dual purpose do plant roots serve concerning soil nutrients and stability?" + },{ + "question":"How do vaccines protect against diseases?", + "context":"Vaccines protect against diseases by stimulating the body's immune response to produce antibodies, which recognize and combat pathogens.", + "output":"How do vaccines utilize the body's immune system to defend against pathogens?" + }], + input_keys=["question", "context"], + output_key="output", + output_type="string", + language="english", ) -CONDITIONAL_QUESTION = HumanMessagePromptTemplate.from_template( - """\ -Rewrite the provided question to increase its complexity by introducing a conditional element. -The goal is to make the question more intricate by incorporating a scenario or condition that affects the context of the question. -Follow the rules given below while rewriting the question. - 1. The rewritten question should not be longer than 25 words. Use abbreviation wherever possible. - 2. The rewritten question must be reasonable and must be understood and responded by humans. - 3. The rewritten question must be fully answerable from information present context. - 4. phrases like 'provided context','according to the context?',etc are not allowed to appear in the question. - -Initial Question: -What is the function of the roots of a plant? - -Context: -The roots of a plant absorb water and nutrients from the soil, anchor the plant in the ground, and store food. - -Rewritten Question: -What dual purpose do plant roots serve concerning soil nutrients and stability? - -Answer: -Plant roots serve a dual purpose by absorbing water and nutrients from the soil, which is vital for the plant's growth, and providing stability by anchoring the plant in the ground. - -Example 2: - -Initial Question: -How do vaccines protect against diseases? - -Context: -Vaccines protect against diseases by stimulating the body's immune response to produce antibodies, which recognize and combat pathogens. - -Rewritten Question: -How do vaccines utilize the body's immune system to defend against pathogens? - -Initial Question:: -{question} -Context: -{context} -Rewritten Question -""" # noqa: E501 +COMPRESS_QUESTION = Prompt( + name="compress_question", + instruction="""Rewrite the following question to make it more indirect and shorter while retaining the essence of the original question. + The goal is to create a question that conveys the same meaning but in a less direct manner. The rewritten question should shorter so use abbreviation wherever possible.""", + examples=[{ + "question":"What is the distance between the Earth and the Moon?", + "output":"How far is the Moon from Earth?" + },{ + "question":"What ingredients are required to bake a chocolate cake?", + "output":"What's needed for a chocolate cake?" + }], + input_keys=["question"], + output_key="output", + output_type="string", + language="english", ) -COMPRESS_QUESTION = HumanMessagePromptTemplate.from_template( - """\ -Rewrite the following question to make it more indirect and shorter while retaining the essence of the original question. -The goal is to create a question that conveys the same meaning but in a less direct manner. The rewritten question should shorter so use abbreviation wherever possible. - -Original Question: -What is the distance between the Earth and the Moon? - -Rewritten Question: -How far is the Moon from Earth? - -Original Question: -What ingredients are required to bake a chocolate cake? - -Rewritten Question: -What's needed for a chocolate cake? - -Original Question: -{question} -Rewritten Question: -""" # noqa: E501 +CONVERSATION_QUESTION = Prompt( + name="conversation_question", + instruction="""Reformat the provided question into two separate questions as if it were to be part of a conversation. Each question should focus on a specific aspect or subtopic related to the original question. + Follow the rules given below while rewriting the question. + 1. The rewritten question should not be longer than 25 words. Use abbreviation wherever possible. + 2. The rewritten question must be reasonable and must be understood and responded by humans. + 3. The rewritten question must be fully answerable from information present context. + 4. phrases like 'provided context','according to the context?',etc are not allowed to appear in the question.""", + examples=[{ + "question":"What are the advantages and disadvantages of remote work?", + "output":{"first_question":"What are the benefits of remote work?", + "second_question":"On the flip side, what challenges are encountered when working remotely?" + } + }], + input_keys=["question"], + output_key="output", + output_type="json", + language="english", ) - -CONVERSATION_QUESTION = HumanMessagePromptTemplate.from_template( - """\ -Reformat the provided question into two separate questions as if it were to be part of a conversation. Each question should focus on a specific aspect or subtopic related to the original question. -question: What are the advantages and disadvantages of remote work? -Reformatted Questions for Conversation: What are the benefits of remote work?\nOn the flip side, what challenges are encountered when working remotely? -question:{question} - -Reformatted Questions for Conversation: -""" # noqa: E501 -) - -SCORE_CONTEXT = HumanMessagePromptTemplate.from_template( - """Given a context, complete the two following tasks and output answer valid json format +SCORE_CONTEXT = Prompt( + name="score_context", + instruction="""Given a context, complete the two following tasks and output answer valid json format 1.Evaluate the provided context and assign a numerical score between 0 and 10 based on the following criteria: - Award a high score to context that thoroughly delves into and explains concepts. - - Assign a lower score to context that contains excessive references, acknowledgments, personal information, or other non-essential elements. -2.Check if context contains tables -Context: -Albert Einstein (/ˈaɪnstaɪn/ EYEN-styne;[4] German: [ˈalbɛɐt ˈʔaɪnʃtaɪn] ⓘ; 14 March 1879 – 18 April 1955) was a German-born theoretical physicist who is widely held to be one of the greatest and most influential scientists of all time. -Output: -{{"score":6.0, "is_table_present":false}} -Context: -{context} -Output:""" # noqa: E501 -) - -REWRITE_QUESTION = HumanMessagePromptTemplate.from_template( - """\ -Given a context, transform the given question to be clear and standalone by replacing its coreferences with specific details from the context: - -Contexts: -The Eiffel Tower was constructed using iron and was originally intended as a temporary exhibit for the 1889 World's Fair held in Paris. -Despite its initial temporary purpose, the Eiffel Tower quickly became a symbol of Parisian ingenuity and an iconic landmark of the city, attracting millions of visitors each year -The tower's design, created by Gustave Eiffel, was initially met with criticism from some French artists and intellectuals, but it has since been celebrated as a masterpiece of structural engineering and architectural design. -Question: -Who created the design for the Tower? -Rewritten question: -Who created the design for the Eiffel Tower? - -Contexts: -'Exploring Zero-Shot Learning in Neural Networks' was published by Smith and Lee in 2021, focusing on the application of zero-shot learning techniques in artificial intelligence. -Question: -What datasets were used for the zero-shot evaluations in this study? -Rewritten question: -What datasets were used for the zero-shot evaluations Exploring Zero-Shot Learning in Neural Networks paper? - - -Question:{question} -Context: {context} -Rewritten question: -""" + - Assign a lower score to context that contains excessive references, acknowledgments, personal information, or other non-essential elements.""", + examples=[{ + "context":"Albert Einstein (14 March 1879 - 18 April 1955) was a German-born theoretical physicist who is widely held to be one of the greatest and most influential scientists of all time.", + "output":{"score":6.0}, + }], + input_keys=["context"], + output_key="output", + output_type="json", + language="english", ) -FILTER_QUESTION = HumanMessagePromptTemplate.from_template( - """\ -Classify given question - -question: What is the discovery about space? -{{ - "reason":"The question is too vague and does not specify which discovery about space it is referring to." - "verdit":"No" -}} - -question: What caused the Great Depression? -{{ - "reason":"The question is specific and refers to a well-known historical economic event, making it clear and answerable.", - "verdict":"Yes" -}} - -question: What is the keyword that best describes the paper's focus in natural language understanding tasks? -{{ - "reason": "The question mentions a 'paper' in it without referring it's name which makes it unclear without it", - "verdict": "No" -}} -question: Who wrote 'Romeo and Juliet'? -{{ - "reason": "The question is clear and refers to a specific work by name therefore it is clear", - "verdict": "Yes" -}} -question: What did the study mention? -{{ - "reason": "The question is vague and does not specify which study it is referring to", - "verdict": "No" -}} -question: What is the focus of the REPLUG paper? -{{ - "reason": "The question refers to a specific work by it's name hence can be understood", - "verdict": "Yes" -}} - -question: What is the purpose of the reward-driven stage in the training process? -{{ -"reason": "The question lacks specific context regarding the type of training process, making it potentially ambiguous and open to multiple interpretations.", -"verdict": "No" -}} - - -question: {question}""" # noqa: E501 +REWRITE_QUESTION = Prompt( + name="rewrite_question", + instruction="""Given a context, transform the given question to be clear and standalone by replacing its coreferences with specific details from the context:""", + examples=[{ + "context":"The Eiffel Tower was constructed using iron and was originally intended as a temporary exhibit for the 1889 World's Fair held in Paris. Despite its initial temporary purpose, the Eiffel Tower quickly became a symbol of Parisian ingenuity and an iconic landmark of the city, attracting millions of visitors each year. The tower's design, created by Gustave Eiffel, was initially met with criticism from some French artists and intellectuals, but it has since been celebrated as a masterpiece of structural engineering and architectural design.", + "question":"Who created the design for the Tower?", + "output":"Who created the design for the Eiffel Tower?" + },{ + "context":"'Exploring Zero-Shot Learning in Neural Networks' was published by Smith and Lee in 2021, focusing on the application of zero-shot learning techniques in artificial intelligence.", + "question":"What datasets were used for the zero-shot evaluations in this study?", + "output":"What datasets were used for the zero-shot evaluations Exploring Zero-Shot Learning in Neural Networks paper?" + }], + input_keys=["context","question"], + output_key="output", + output_type="string", + language="english", ) -EVOLUTION_ELIMINATION = HumanMessagePromptTemplate.from_template( - """\ -Check if the given two questions are equal based on following requirements: -1. They have same constraints and requirements. -2. They have same depth and breadth of the inquiry. -Question 1: What are the primary causes of climate change? -Question 2: What factors contribute to global warming? -{{ - "reason": "While both questions deal with environmental issues, 'climate change' encompasses broader changes than 'global warming', leading to different depths of inquiry.", - "verdict": "Not Equal" -}} -Question 1: How does photosynthesis work in plants? -Question 2: Can you explain the process of photosynthesis in plants? -{{ - "reason": "Both questions ask for an explanation of the photosynthesis process in plants, sharing the same depth, breadth, and requirements for the answer.", - "verdict": "Equal" -}} -Question 1: {question1} -Question 2: {question2}""" # noqa: E501 +FILTER_QUESTION = Prompt( + name="filter_question", + instruction="""Given a question, classify it based on clarity and specificity""", + examples=[{ + "question":"What is the discovery about space?", + "output":{"reason":"The question is too vague and does not specify which discovery about space it is referring to.","verdit":"No"} + },{ + "question":"What caused the Great Depression?", + "output":{"reason":"The question is specific and refers to a well-known historical economic event, making it clear and answerable.","verdict":"Yes"} + },{ + "question":"What is the keyword that best describes the paper's focus in natural language understanding tasks?", + "output":{"reason":"The question mentions a 'paper' in it without referring it's name which makes it unclear without it","verdict":"No"} + },{ + "question":"Who wrote 'Romeo and Juliet'?", + "output":{"reason":"The question is clear and refers to a specific work by name therefore it is clear","verdict":"Yes"} + },{ + "question":"What did the study mention?", + "output":{"reason":"The question is vague and does not specify which study it is referring to","verdict":"No"} + },{ + "question":"What is the focus of the REPLUG paper?", + "output":{"reason":"The question refers to a specific work by it's name hence can be understood","verdict":"Yes"} + },{ + "question":"What is the purpose of the reward-driven stage in the training process?", + "output":{"reason":"The question lacks specific context regarding the type of training process, making it potentially ambiguous and open to multiple interpretations.","verdict":"No"} + }], + input_keys=["question"], + output_key="output", + output_type="json", + language="english", ) -ANSWER_FORMULATE = HumanMessagePromptTemplate.from_template( - """\ -Answer the question using the information from the given context. -question:{question} -context:{context} -answer: -""" # noqa: E501 +EVOLUTION_ELIMINATION = Prompt( + name="evolution_elimination", + instruction="""Check if the given two questions are equal based on following requirements: + 1. They have same constraints and requirements. + 2. They have same depth and breadth of the inquiry.""", + examples=[{ + "question1":"What are the primary causes of climate change?", + "question2":"What factors contribute to global warming?", + "output":{"reason":"While both questions deal with environmental issues, 'climate change' encompasses broader changes than 'global warming', leading to different depths of inquiry.","verdict":"Not Equal"} + },{ + "question1":"How does photosynthesis work in plants?", + "question2":"Can you explain the process of photosynthesis in plants?", + "output":{"reason":"Both questions ask for an explanation of the photosynthesis process in plants, sharing the same depth, breadth, and requirements for the answer.","verdict":"Equal"} + }], + input_keys=["question1","question2"], + output_key="output", + output_type="json", + language="english", ) - -INFORMAL_QUESTION = HumanMessagePromptTemplate.from_template( - """\ -Rewrite the following question into a casual, conversational form as if it's being asked by someone in an informal setting. -Keep the core information request intact, without including any additional details or questions. -Formal Question: What are the criteria for Objectives and Key Results? -Casual Rewrite: What should I be looking at when I'm setting up OKRs? -Formal Question: Could you delineate the primary responsibilities of a project manager? -Casual Rewrite: What's the main job of a project manager, in simple terms? -Formal Question: What mechanisms underlie the process of cellular respiration? -Casual Rewrite: How does cellular respiration actually work? -Formal Question:{question} -Casual Rewrite:""" -) - -CONTEXT_FORMULATE = HumanMessagePromptTemplate.from_template( - """Please extract relevant sentences from the provided context that can potentially help answer the following question. While extracting candidate sentences you're not allowed to make any changes to sentences from given context. - -question:{question} -context:\n{context} -candidate sentences:\n -""" # noqa: E501 -) - - -demonstrations = [ - { - "context": "The Eiffel Tower in Paris was originally intended as a temporary structure, built for the 1889 World's Fair. It was almost dismantled in 1909 but was saved because it was repurposed as a giant radio antenna.", - "questions": [ - { - "question_Why": "Why was the Eiffel Tower originally planned to be a temporary structure?" - }, - { - "question_Was": "Was the Eiffel Tower originally designed to be a permanent structure?" - }, - { - "question_What": "What was the original purpose of the Eiffel Tower when it was built for the 1889 World's Fair?" - }, - { - "question_How": "How did the Eiffel Tower avoid being dismantled in 1909?" - }, - {"question_Where": "Where is the Eiffel Tower?"}, - ], - }, - { - "context": "Photosynthesis is a process used by plants, algae, and certain bacteria to convert light energy into chemical energy.", - "questions": [ - {"question_Why": "Why do plants perform photosynthesis?"}, - { - "question_Was": "Was photosynthesis discovered in plants, algae, or bacteria first?" - }, - { - "question_What": "What converts light energy into chemical energy in photosynthesis?" - }, - {"question_How": "How do plants capture light energy for photosynthesis?"}, - {"question_Where": "Where in plants does photosynthesis primarily occur?"}, - {"question_Can": "Can photosynthesis occur in the absence of light?"}, - ], - }, -] +ANSWER_FORMULATE = Prompt( + name="answer_formulate", + instruction="""Answer the question using the information from the given context.""" + examples=[{ + "question":"", + "context":"" + "answer":"" + }], + input_keys=["question","context"], + output_key="answer", + output_type="string", + language="english", +) \ No newline at end of file From 82ce09052ac748f4f4747e7ed6ba1d9a65822b39 Mon Sep 17 00:00:00 2001 From: Shahules786 Date: Thu, 18 Jan 2024 18:20:15 -0800 Subject: [PATCH 3/8] migrated prompts --- src/ragas/testset/evolutions.py | 5 +- src/ragas/testset/prompts.py | 260 ++++++++++++++++++++------------ 2 files changed, 165 insertions(+), 100 deletions(-) diff --git a/src/ragas/testset/evolutions.py b/src/ragas/testset/evolutions.py index 64828b7a3..32c32de70 100644 --- a/src/ragas/testset/evolutions.py +++ b/src/ragas/testset/evolutions.py @@ -12,7 +12,7 @@ from ragas.testset.docstore import Direction, Document, DocumentStore, Node from ragas.testset.prompts import ( FILTER_QUESTION, - MULTICONTEXT_QUESTION, + MULTI_CONTEXT_QUESTION, SCORE_CONTEXT, SEED_QUESTION, ) @@ -150,7 +150,6 @@ async def aevolve(self, llm: BaseRagasLLM, docstore: DocumentStore): async def simple_evolution( llm: BaseRagasLLM, seed_doc: Document, is_table_present: bool = False ): - prompt = SEED_QUESTION.format(context=seed_doc.page_content) results = llm.generate_text(prompt=prompt) results = results.generations[0][0].text @@ -167,7 +166,7 @@ async def multi_context_evolution( question = simple_evolution(llm, seed_node) print(question) similar_context = doc_store.get_similar(seed_node)[0] - prompt = MULTICONTEXT_QUESTION.format( + prompt = MULTI_CONTEXT_QUESTION.format( question=question, context1=seed_node.page_content, context2=similar_context ) results = await llm.agenerate_text(prompt=prompt) diff --git a/src/ragas/testset/prompts.py b/src/ragas/testset/prompts.py index 1b7c01c41..fc4079bec 100644 --- a/src/ragas/testset/prompts.py +++ b/src/ragas/testset/prompts.py @@ -5,9 +5,10 @@ instruction="""Generate two questions from given context satisfying the rules given below: 2.The question should be framed such that it must be clearly understood without providing context. 3.The question should be fully answerable from information present in given context.""", - examples=[{ + examples=[ + { "context": "The Eiffel Tower in Paris was originally intended as a temporary structure, built for the 1889 World's Fair. It was almost dismantled in 1909 but was saved because it was repurposed as a giant radio antenna.", - "output":"Who built the Eiffel Tower?", + "output": "Who built the Eiffel Tower?", }, ], input_keys=["context"], @@ -26,15 +27,18 @@ 2. Do not frame questions that contains more than 15 words. Use abbreviation wherever possible. 3. Make sure the question is clear and unambiguous. 4. phrases like 'based on the provided context','according to the context',etc are not allowed to appear in the question.""", - examples=[{ - "question":"What is the capital of France?", - "context":"France is a country in Western Europe. It has several cities, including Paris, Lyon, and Marseille. Paris is not only known for its cultural landmarks like the Eiffel Tower and the Louvre Museum but also as the administrative center.", - "output":"Linking the Eiffel Tower and administrative center, which city stands as both?" - },{ - "question":"What does the append() method do in Python?", - "context":"In Python, lists are used to store multiple items in a single variable. Lists are one of 4 built-in data types used to store collections of data. The append() method adds a single item to the end of a list.", - "output":"If a list represents a variable collection, what method extends it by one item?" - }], + examples=[ + { + "question": "What is the capital of France?", + "context": "France is a country in Western Europe. It has several cities, including Paris, Lyon, and Marseille. Paris is not only known for its cultural landmarks like the Eiffel Tower and the Louvre Museum but also as the administrative center.", + "output": "Linking the Eiffel Tower and administrative center, which city stands as both?", + }, + { + "question": "What does the append() method do in Python?", + "context": "In Python, lists are used to store multiple items in a single variable. Lists are one of 4 built-in data types used to store collections of data. The append() method adds a single item to the end of a list.", + "output": "If a list represents a variable collection, what method extends it by one item?", + }, + ], input_keys=["question", "context"], output_key="output", output_type="string", @@ -52,17 +56,20 @@ 3. The rewritten question must be fully answerable from information present in context1 and context2. 4. Read and understand both contexts and rewrite the question so that answering requires insight from both context1 and context2. 5. phrases like 'based on the provided context','according to the context?',etc are not allowed to appear in the question.""", - examples=[{ - "question":"What process turns plants green?", - "context1":"Chlorophyll is the pigment that gives plants their green color and helps them photosynthesize.", - "context2":"Photosynthesis in plants typically occurs in the leaves where chloroplasts are concentrated.", - "output":"In which plant structures does the pigment responsible for their verdancy facilitate energy production?" - },{ - "question":"How do you calculate the area of a rectangle?", - "context1":"The area of a shape is calculated based on the shape's dimensions. For rectangles, this involves multiplying the length and width.", - "context2":"Rectangles have four sides with opposite sides being equal in length. They are a type of quadrilateral.", - "output":"What multiplication involving equal opposites yields a quadrilateral's area?" - }], + examples=[ + { + "question": "What process turns plants green?", + "context1": "Chlorophyll is the pigment that gives plants their green color and helps them photosynthesize.", + "context2": "Photosynthesis in plants typically occurs in the leaves where chloroplasts are concentrated.", + "output": "In which plant structures does the pigment responsible for their verdancy facilitate energy production?", + }, + { + "question": "How do you calculate the area of a rectangle?", + "context1": "The area of a shape is calculated based on the shape's dimensions. For rectangles, this involves multiplying the length and width.", + "context2": "Rectangles have four sides with opposite sides being equal in length. They are a type of quadrilateral.", + "output": "What multiplication involving equal opposites yields a quadrilateral's area?", + }, + ], input_keys=["question", "context1", "context2"], output_key="output", output_type="string", @@ -78,15 +85,18 @@ 2. The rewritten question must be reasonable and must be understood and responded by humans. 3. The rewritten question must be fully answerable from information present context. 4. phrases like 'provided context','according to the context?',etc are not allowed to appear in the question.""", - examples=[{ - "question":"What is the function of the roots of a plant?", - "context":"The roots of a plant absorb water and nutrients from the soil, anchor the plant in the ground, and store food.", - "output":"What dual purpose do plant roots serve concerning soil nutrients and stability?" - },{ - "question":"How do vaccines protect against diseases?", - "context":"Vaccines protect against diseases by stimulating the body's immune response to produce antibodies, which recognize and combat pathogens.", - "output":"How do vaccines utilize the body's immune system to defend against pathogens?" - }], + examples=[ + { + "question": "What is the function of the roots of a plant?", + "context": "The roots of a plant absorb water and nutrients from the soil, anchor the plant in the ground, and store food.", + "output": "What dual purpose do plant roots serve concerning soil nutrients and stability?", + }, + { + "question": "How do vaccines protect against diseases?", + "context": "Vaccines protect against diseases by stimulating the body's immune response to produce antibodies, which recognize and combat pathogens.", + "output": "How do vaccines utilize the body's immune system to defend against pathogens?", + }, + ], input_keys=["question", "context"], output_key="output", output_type="string", @@ -98,13 +108,16 @@ name="compress_question", instruction="""Rewrite the following question to make it more indirect and shorter while retaining the essence of the original question. The goal is to create a question that conveys the same meaning but in a less direct manner. The rewritten question should shorter so use abbreviation wherever possible.""", - examples=[{ - "question":"What is the distance between the Earth and the Moon?", - "output":"How far is the Moon from Earth?" - },{ - "question":"What ingredients are required to bake a chocolate cake?", - "output":"What's needed for a chocolate cake?" - }], + examples=[ + { + "question": "What is the distance between the Earth and the Moon?", + "output": "How far is the Moon from Earth?", + }, + { + "question": "What ingredients are required to bake a chocolate cake?", + "output": "What's needed for a chocolate cake?", + }, + ], input_keys=["question"], output_key="output", output_type="string", @@ -120,12 +133,15 @@ 2. The rewritten question must be reasonable and must be understood and responded by humans. 3. The rewritten question must be fully answerable from information present context. 4. phrases like 'provided context','according to the context?',etc are not allowed to appear in the question.""", - examples=[{ - "question":"What are the advantages and disadvantages of remote work?", - "output":{"first_question":"What are the benefits of remote work?", - "second_question":"On the flip side, what challenges are encountered when working remotely?" + examples=[ + { + "question": "What are the advantages and disadvantages of remote work?", + "output": { + "first_question": "What are the benefits of remote work?", + "second_question": "On the flip side, what challenges are encountered when working remotely?", + }, } - }], + ], input_keys=["question"], output_key="output", output_type="json", @@ -138,10 +154,12 @@ 1.Evaluate the provided context and assign a numerical score between 0 and 10 based on the following criteria: - Award a high score to context that thoroughly delves into and explains concepts. - Assign a lower score to context that contains excessive references, acknowledgments, personal information, or other non-essential elements.""", - examples=[{ - "context":"Albert Einstein (14 March 1879 - 18 April 1955) was a German-born theoretical physicist who is widely held to be one of the greatest and most influential scientists of all time.", - "output":{"score":6.0}, - }], + examples=[ + { + "context": "Albert Einstein (14 March 1879 - 18 April 1955) was a German-born theoretical physicist who is widely held to be one of the greatest and most influential scientists of all time.", + "output": {"score": 6.0}, + } + ], input_keys=["context"], output_key="output", output_type="json", @@ -151,16 +169,19 @@ REWRITE_QUESTION = Prompt( name="rewrite_question", instruction="""Given a context, transform the given question to be clear and standalone by replacing its coreferences with specific details from the context:""", - examples=[{ - "context":"The Eiffel Tower was constructed using iron and was originally intended as a temporary exhibit for the 1889 World's Fair held in Paris. Despite its initial temporary purpose, the Eiffel Tower quickly became a symbol of Parisian ingenuity and an iconic landmark of the city, attracting millions of visitors each year. The tower's design, created by Gustave Eiffel, was initially met with criticism from some French artists and intellectuals, but it has since been celebrated as a masterpiece of structural engineering and architectural design.", - "question":"Who created the design for the Tower?", - "output":"Who created the design for the Eiffel Tower?" - },{ - "context":"'Exploring Zero-Shot Learning in Neural Networks' was published by Smith and Lee in 2021, focusing on the application of zero-shot learning techniques in artificial intelligence.", - "question":"What datasets were used for the zero-shot evaluations in this study?", - "output":"What datasets were used for the zero-shot evaluations Exploring Zero-Shot Learning in Neural Networks paper?" - }], - input_keys=["context","question"], + examples=[ + { + "context": "The Eiffel Tower was constructed using iron and was originally intended as a temporary exhibit for the 1889 World's Fair held in Paris. Despite its initial temporary purpose, the Eiffel Tower quickly became a symbol of Parisian ingenuity and an iconic landmark of the city, attracting millions of visitors each year. The tower's design, created by Gustave Eiffel, was initially met with criticism from some French artists and intellectuals, but it has since been celebrated as a masterpiece of structural engineering and architectural design.", + "question": "Who created the design for the Tower?", + "output": "Who created the design for the Eiffel Tower?", + }, + { + "context": "'Exploring Zero-Shot Learning in Neural Networks' was published by Smith and Lee in 2021, focusing on the application of zero-shot learning techniques in artificial intelligence.", + "question": "What datasets were used for the zero-shot evaluations in this study?", + "output": "What datasets were used for the zero-shot evaluations Exploring Zero-Shot Learning in Neural Networks paper?", + }, + ], + input_keys=["context", "question"], output_key="output", output_type="string", language="english", @@ -170,28 +191,57 @@ FILTER_QUESTION = Prompt( name="filter_question", instruction="""Given a question, classify it based on clarity and specificity""", - examples=[{ - "question":"What is the discovery about space?", - "output":{"reason":"The question is too vague and does not specify which discovery about space it is referring to.","verdit":"No"} - },{ - "question":"What caused the Great Depression?", - "output":{"reason":"The question is specific and refers to a well-known historical economic event, making it clear and answerable.","verdict":"Yes"} - },{ - "question":"What is the keyword that best describes the paper's focus in natural language understanding tasks?", - "output":{"reason":"The question mentions a 'paper' in it without referring it's name which makes it unclear without it","verdict":"No"} - },{ - "question":"Who wrote 'Romeo and Juliet'?", - "output":{"reason":"The question is clear and refers to a specific work by name therefore it is clear","verdict":"Yes"} - },{ - "question":"What did the study mention?", - "output":{"reason":"The question is vague and does not specify which study it is referring to","verdict":"No"} - },{ - "question":"What is the focus of the REPLUG paper?", - "output":{"reason":"The question refers to a specific work by it's name hence can be understood","verdict":"Yes"} - },{ - "question":"What is the purpose of the reward-driven stage in the training process?", - "output":{"reason":"The question lacks specific context regarding the type of training process, making it potentially ambiguous and open to multiple interpretations.","verdict":"No"} - }], + examples=[ + { + "question": "What is the discovery about space?", + "output": { + "reason": "The question is too vague and does not specify which discovery about space it is referring to.", + "verdit": "No", + }, + }, + { + "question": "What caused the Great Depression?", + "output": { + "reason": "The question is specific and refers to a well-known historical economic event, making it clear and answerable.", + "verdict": "Yes", + }, + }, + { + "question": "What is the keyword that best describes the paper's focus in natural language understanding tasks?", + "output": { + "reason": "The question mentions a 'paper' in it without referring it's name which makes it unclear without it", + "verdict": "No", + }, + }, + { + "question": "Who wrote 'Romeo and Juliet'?", + "output": { + "reason": "The question is clear and refers to a specific work by name therefore it is clear", + "verdict": "Yes", + }, + }, + { + "question": "What did the study mention?", + "output": { + "reason": "The question is vague and does not specify which study it is referring to", + "verdict": "No", + }, + }, + { + "question": "What is the focus of the REPLUG paper?", + "output": { + "reason": "The question refers to a specific work by it's name hence can be understood", + "verdict": "Yes", + }, + }, + { + "question": "What is the purpose of the reward-driven stage in the training process?", + "output": { + "reason": "The question lacks specific context regarding the type of training process, making it potentially ambiguous and open to multiple interpretations.", + "verdict": "No", + }, + }, + ], input_keys=["question"], output_key="output", output_type="json", @@ -203,16 +253,25 @@ instruction="""Check if the given two questions are equal based on following requirements: 1. They have same constraints and requirements. 2. They have same depth and breadth of the inquiry.""", - examples=[{ - "question1":"What are the primary causes of climate change?", - "question2":"What factors contribute to global warming?", - "output":{"reason":"While both questions deal with environmental issues, 'climate change' encompasses broader changes than 'global warming', leading to different depths of inquiry.","verdict":"Not Equal"} - },{ - "question1":"How does photosynthesis work in plants?", - "question2":"Can you explain the process of photosynthesis in plants?", - "output":{"reason":"Both questions ask for an explanation of the photosynthesis process in plants, sharing the same depth, breadth, and requirements for the answer.","verdict":"Equal"} - }], - input_keys=["question1","question2"], + examples=[ + { + "question1": "What are the primary causes of climate change?", + "question2": "What factors contribute to global warming?", + "output": { + "reason": "While both questions deal with environmental issues, 'climate change' encompasses broader changes than 'global warming', leading to different depths of inquiry.", + "verdict": "Not Equal", + }, + }, + { + "question1": "How does photosynthesis work in plants?", + "question2": "Can you explain the process of photosynthesis in plants?", + "output": { + "reason": "Both questions ask for an explanation of the photosynthesis process in plants, sharing the same depth, breadth, and requirements for the answer.", + "verdict": "Equal", + }, + }, + ], + input_keys=["question1", "question2"], output_key="output", output_type="json", language="english", @@ -220,14 +279,21 @@ ANSWER_FORMULATE = Prompt( name="answer_formulate", - instruction="""Answer the question using the information from the given context.""" - examples=[{ - "question":"", - "context":"" - "answer":"" - }], - input_keys=["question","context"], + instruction="""Answer the question using the information from the given context. Answer '-1' if answer is not present in the context.""", + examples=[ + { + "context": """The novel '1984' by George Orwell is set in a dystopian future where the world is divided into three superstates. The story follows the life of Winston Smith, who lives in Oceania, a superstate constantly at war.""", + "question": "In which superstate does Winston Smith live in the novel '1984'?", + "answer": "Winston Smith lives in the superstate of Oceania in the novel '1984'.", + }, + { + "context": """The novel "Pride and Prejudice" by Jane Austen revolves around the character Elizabeth Bennet and her family. The story is set in the 19th century in rural England and deals with issues of marriage, morality, and misconceptions.""", + "question": "What year was 'Pride and Prejudice' published?", + "answer": "-1", + }, + ], + input_keys=["context", "question"], output_key="answer", output_type="string", language="english", -) \ No newline at end of file +) From e765bbedab05b79ad32b932520d50a7e7ca21ef6 Mon Sep 17 00:00:00 2001 From: Shahules786 Date: Fri, 19 Jan 2024 13:27:39 -0800 Subject: [PATCH 4/8] added old prompts back --- src/ragas/testset/prompts.py | 405 ++++++++++++++++++++++++++++++++++- 1 file changed, 394 insertions(+), 11 deletions(-) diff --git a/src/ragas/testset/prompts.py b/src/ragas/testset/prompts.py index fc4079bec..7a6d6cb9f 100644 --- a/src/ragas/testset/prompts.py +++ b/src/ragas/testset/prompts.py @@ -1,6 +1,6 @@ from ragas.llms.prompt import Prompt -SEED_QUESTION = Prompt( +seed_question_prompt = Prompt( name="seed_question", instruction="""Generate two questions from given context satisfying the rules given below: 2.The question should be framed such that it must be clearly understood without providing context. @@ -18,7 +18,7 @@ ) -REASONING_QUESTION = Prompt( +reasoning_question_prompt = Prompt( name="reasoning_question", instruction="""Complicate the given question by rewriting question into a multi-hop reasoning question based on the provided context. Answering the question should require the reader to make multiple logical connections or inferences using the information available in given context. @@ -46,7 +46,7 @@ ) -MULTI_CONTEXT_QUESTION = Prompt( +multi_context_question_prompt = Prompt( name="multi_context_question", instruction=""" The task is to rewrite and complicate the given question in a way that answering it requires information derived from both context1 and context2. @@ -76,7 +76,7 @@ language="english", ) -CONDITIONAL_QUESTION = Prompt( +conditional_question_prompt = Prompt( name="conditional_question", instruction="""Rewrite the provided question to increase its complexity by introducing a conditional element. The goal is to make the question more intricate by incorporating a scenario or condition that affects the context of the question. @@ -104,7 +104,7 @@ ) -COMPRESS_QUESTION = Prompt( +compress_question_prompt = Prompt( name="compress_question", instruction="""Rewrite the following question to make it more indirect and shorter while retaining the essence of the original question. The goal is to create a question that conveys the same meaning but in a less direct manner. The rewritten question should shorter so use abbreviation wherever possible.""", @@ -125,7 +125,7 @@ ) -CONVERSATION_QUESTION = Prompt( +conversational_question_prompt = Prompt( name="conversation_question", instruction="""Reformat the provided question into two separate questions as if it were to be part of a conversation. Each question should focus on a specific aspect or subtopic related to the original question. Follow the rules given below while rewriting the question. @@ -148,7 +148,7 @@ language="english", ) -SCORE_CONTEXT = Prompt( +context_scoring_prompt = Prompt( name="score_context", instruction="""Given a context, complete the two following tasks and output answer valid json format 1.Evaluate the provided context and assign a numerical score between 0 and 10 based on the following criteria: @@ -166,7 +166,7 @@ language="english", ) -REWRITE_QUESTION = Prompt( +question_rewrite_prompt = Prompt( name="rewrite_question", instruction="""Given a context, transform the given question to be clear and standalone by replacing its coreferences with specific details from the context:""", examples=[ @@ -188,7 +188,7 @@ ) -FILTER_QUESTION = Prompt( +filter_question_prompt = Prompt( name="filter_question", instruction="""Given a question, classify it based on clarity and specificity""", examples=[ @@ -248,7 +248,7 @@ language="english", ) -EVOLUTION_ELIMINATION = Prompt( +evolution_elimination_prompt = Prompt( name="evolution_elimination", instruction="""Check if the given two questions are equal based on following requirements: 1. They have same constraints and requirements. @@ -277,7 +277,7 @@ language="english", ) -ANSWER_FORMULATE = Prompt( +question_answer_prompt = Prompt( name="answer_formulate", instruction="""Answer the question using the information from the given context. Answer '-1' if answer is not present in the context.""", examples=[ @@ -297,3 +297,386 @@ output_type="string", language="english", ) + + +## TODO: remove this + +from langchain.prompts import HumanMessagePromptTemplate + +SEED_QUESTION = HumanMessagePromptTemplate.from_template( + """ +Generate two questions from given context satisfying the rules given below: + 2.The question should be framed such that it must be clearly understood without providing context. + 3.The question should be fully answerable from information present in given context. + + +{demonstration} + + +Context: +{context} +Questions:""" # noqa: E501 +) + +TABLE_QA = HumanMessagePromptTemplate.from_template( + """ +Frame a question from the given table following the rules given below + - Do no use phrases like 'provided context','provided table' etc in the question + +Context: +Table 2: Local Library Statistics + +Month New Memberships Books Loaned eBooks Downloaded +January 150 1200 950 +February 120 1100 1000 +March 200 1400 1100 + +Framed Question from Table: How many books were loaned in January? + +Context: +{context} + +Framed Question from Table:""" # noqa: E501 +) + + +REASONING_QUESTION = HumanMessagePromptTemplate.from_template( + """\ +Complicate the given question by rewriting question into a multi-hop reasoning question based on the provided context. +Answering the question should require the reader to make multiple logical connections or inferences using the information available in given context. +Rules to follow when rewriting question: +1. Ensure that the rewritten question can be answered entirely from the information present in the contexts. +2. Do not frame questions that contains more than 15 words. Use abbreviation wherever possible. +3. Make sure the question is clear and unambiguous. +4. phrases like 'based on the provided context','according to the context',etc are not allowed to appear in the question. + +Initial Question: +What is the capital of France? + +Given Context: +France is a country in Western Europe. It has several cities, including Paris, Lyon, and Marseille. Paris is not only known for its cultural landmarks like the Eiffel Tower and the Louvre Museum but also as the administrative center. + +Complicated Multi-Hop Question: +Linking the Eiffel Tower and administrative center, which city stands as both? + +Initial Question: +What does the append() method do in Python? + +Given Context: +In Python, lists are used to store multiple items in a single variable. Lists are one of 4 built-in data types used to store collections of data. The append() method adds a single item to the end of a list. + +Complicated Multi-Hop Question: +If a list represents a variable collection, what method extends it by one item? + +Initial Question: +{question} +Given Context: +{context} + +Complicated Multi-Hop Question +""" # noqa: E501 +) + +MULTICONTEXT_QUESTION = HumanMessagePromptTemplate.from_template( + """\ +The task is to rewrite and complicate the given question in a way that answering it requires information derived from both context1 and context2. +Follow the rules given below while rewriting the question. + 1. The rewritten question should not be very long. Use abbreviation wherever possible. + 2. The rewritten question must be reasonable and must be understood and responded by humans. + 3. The rewritten question must be fully answerable from information present in context1 and context2. + 4. Read and understand both contexts and rewrite the question so that answering requires insight from both context1 and context2. + 5. phrases like 'based on the provided context','according to the context?',etc are not allowed to appear in the question. + +Initial Question: +What process turns plants green? + +Context1: +Chlorophyll is the pigment that gives plants their green color and helps them photosynthesize. + +Context2: +Photosynthesis in plants typically occurs in the leaves where chloroplasts are concentrated. + +Complicated Multi-Hop Question: +In which plant structures does the pigment responsible for their verdancy facilitate energy production? + +Initial Question: +How do you calculate the area of a rectangle? + +Context1: +The area of a shape is calculated based on the shape's dimensions. For rectangles, this involves multiplying the length and width. + +Context2: +Rectangles have four sides with opposite sides being equal in length. They are a type of quadrilateral. + +Complicated Multi-Hop Question: +What multiplication involving equal opposites yields a quadrilateral's area? + + +Initial Question: +{question} +context1: +{context1} +context2: +{context2} +Complicated Multi-Hop Question: +""" # noqa: E501 +) + + +CONDITIONAL_QUESTION = HumanMessagePromptTemplate.from_template( + """\ +Rewrite the provided question to increase its complexity by introducing a conditional element. +The goal is to make the question more intricate by incorporating a scenario or condition that affects the context of the question. +Follow the rules given below while rewriting the question. + 1. The rewritten question should not be longer than 25 words. Use abbreviation wherever possible. + 2. The rewritten question must be reasonable and must be understood and responded by humans. + 3. The rewritten question must be fully answerable from information present context. + 4. phrases like 'provided context','according to the context?',etc are not allowed to appear in the question. + +Initial Question: +What is the function of the roots of a plant? + +Context: +The roots of a plant absorb water and nutrients from the soil, anchor the plant in the ground, and store food. + +Rewritten Question: +What dual purpose do plant roots serve concerning soil nutrients and stability? + +Answer: +Plant roots serve a dual purpose by absorbing water and nutrients from the soil, which is vital for the plant's growth, and providing stability by anchoring the plant in the ground. + +Example 2: + +Initial Question: +How do vaccines protect against diseases? + +Context: +Vaccines protect against diseases by stimulating the body's immune response to produce antibodies, which recognize and combat pathogens. + +Rewritten Question: +How do vaccines utilize the body's immune system to defend against pathogens? + +Initial Question:: +{question} +Context: +{context} +Rewritten Question +""" # noqa: E501 +) + + +COMPRESS_QUESTION = HumanMessagePromptTemplate.from_template( + """\ +Rewrite the following question to make it more indirect and shorter while retaining the essence of the original question. +The goal is to create a question that conveys the same meaning but in a less direct manner. The rewritten question should shorter so use abbreviation wherever possible. + +Original Question: +What is the distance between the Earth and the Moon? + +Rewritten Question: +How far is the Moon from Earth? + +Original Question: +What ingredients are required to bake a chocolate cake? + +Rewritten Question: +What's needed for a chocolate cake? + +Original Question: +{question} +Rewritten Question: +""" # noqa: E501 +) + + +CONVERSATION_QUESTION = HumanMessagePromptTemplate.from_template( + """\ +Reformat the provided question into two separate questions as if it were to be part of a conversation. Each question should focus on a specific aspect or subtopic related to the original question. +question: What are the advantages and disadvantages of remote work? +Reformatted Questions for Conversation: What are the benefits of remote work?\nOn the flip side, what challenges are encountered when working remotely? +question:{question} + +Reformatted Questions for Conversation: +""" # noqa: E501 +) + +SCORE_CONTEXT = HumanMessagePromptTemplate.from_template( + """Given a context, complete the two following tasks and output answer valid json format +1.Evaluate the provided context and assign a numerical score between 0 and 10 based on the following criteria: + - Award a high score to context that thoroughly delves into and explains concepts. + - Assign a lower score to context that contains excessive references, acknowledgments, personal information, or other non-essential elements. +2.Check if context contains tables +Context: +Albert Einstein (/ˈaɪnstaɪn/ EYEN-styne;[4] German: [ˈalbɛɐt ˈʔaɪnʃtaɪn] ⓘ; 14 March 1879 – 18 April 1955) was a German-born theoretical physicist who is widely held to be one of the greatest and most influential scientists of all time. +Output: +{{"score":6.0, "is_table_present":false}} +Context: +{context} +Output:""" # noqa: E501 +) + +REWRITE_QUESTION = HumanMessagePromptTemplate.from_template( + """\ +Given a context, transform the given question to be clear and standalone by replacing its coreferences with specific details from the context: + +Contexts: +The Eiffel Tower was constructed using iron and was originally intended as a temporary exhibit for the 1889 World's Fair held in Paris. +Despite its initial temporary purpose, the Eiffel Tower quickly became a symbol of Parisian ingenuity and an iconic landmark of the city, attracting millions of visitors each year +The tower's design, created by Gustave Eiffel, was initially met with criticism from some French artists and intellectuals, but it has since been celebrated as a masterpiece of structural engineering and architectural design. +Question: +Who created the design for the Tower? +Rewritten question: +Who created the design for the Eiffel Tower? + +Contexts: +'Exploring Zero-Shot Learning in Neural Networks' was published by Smith and Lee in 2021, focusing on the application of zero-shot learning techniques in artificial intelligence. +Question: +What datasets were used for the zero-shot evaluations in this study? +Rewritten question: +What datasets were used for the zero-shot evaluations Exploring Zero-Shot Learning in Neural Networks paper? + + +Question:{question} +Context: {context} +Rewritten question: +""" +) + +FILTER_QUESTION = HumanMessagePromptTemplate.from_template( + """\ +Classify given question + +question: What is the discovery about space? +{{ + "reason":"The question is too vague and does not specify which discovery about space it is referring to." + "verdit":"No" +}} + +question: What caused the Great Depression? +{{ + "reason":"The question is specific and refers to a well-known historical economic event, making it clear and answerable.", + "verdict":"Yes" +}} + +question: What is the keyword that best describes the paper's focus in natural language understanding tasks? +{{ + "reason": "The question mentions a 'paper' in it without referring it's name which makes it unclear without it", + "verdict": "No" +}} +question: Who wrote 'Romeo and Juliet'? +{{ + "reason": "The question is clear and refers to a specific work by name therefore it is clear", + "verdict": "Yes" +}} +question: What did the study mention? +{{ + "reason": "The question is vague and does not specify which study it is referring to", + "verdict": "No" +}} +question: What is the focus of the REPLUG paper? +{{ + "reason": "The question refers to a specific work by it's name hence can be understood", + "verdict": "Yes" +}} + +question: What is the purpose of the reward-driven stage in the training process? +{{ +"reason": "The question lacks specific context regarding the type of training process, making it potentially ambiguous and open to multiple interpretations.", +"verdict": "No" +}} + + +question: {question}""" # noqa: E501 +) + +EVOLUTION_ELIMINATION = HumanMessagePromptTemplate.from_template( + """\ +Check if the given two questions are equal based on following requirements: +1. They have same constraints and requirements. +2. They have same depth and breadth of the inquiry. + +Question 1: What are the primary causes of climate change? +Question 2: What factors contribute to global warming? +{{ + "reason": "While both questions deal with environmental issues, 'climate change' encompasses broader changes than 'global warming', leading to different depths of inquiry.", + "verdict": "Not Equal" +}} +Question 1: How does photosynthesis work in plants? +Question 2: Can you explain the process of photosynthesis in plants? +{{ + "reason": "Both questions ask for an explanation of the photosynthesis process in plants, sharing the same depth, breadth, and requirements for the answer.", + "verdict": "Equal" +}} +Question 1: {question1} +Question 2: {question2}""" # noqa: E501 +) + +ANSWER_FORMULATE = HumanMessagePromptTemplate.from_template( + """\ +Answer the question using the information from the given context. +question:{question} +context:{context} +answer: +""" # noqa: E501 +) + + +INFORMAL_QUESTION = HumanMessagePromptTemplate.from_template( + """\ +Rewrite the following question into a casual, conversational form as if it's being asked by someone in an informal setting. +Keep the core information request intact, without including any additional details or questions. +Formal Question: What are the criteria for Objectives and Key Results? +Casual Rewrite: What should I be looking at when I'm setting up OKRs? +Formal Question: Could you delineate the primary responsibilities of a project manager? +Casual Rewrite: What's the main job of a project manager, in simple terms? +Formal Question: What mechanisms underlie the process of cellular respiration? +Casual Rewrite: How does cellular respiration actually work? +Formal Question:{question} +Casual Rewrite:""" +) + +CONTEXT_FORMULATE = HumanMessagePromptTemplate.from_template( + """Please extract relevant sentences from the provided context that can potentially help answer the following question. While extracting candidate sentences you're not allowed to make any changes to sentences from given context. + +question:{question} +context:\n{context} +candidate sentences:\n +""" # noqa: E501 +) + + +demonstrations = [ + { + "context": "The Eiffel Tower in Paris was originally intended as a temporary structure, built for the 1889 World's Fair. It was almost dismantled in 1909 but was saved because it was repurposed as a giant radio antenna.", + "questions": [ + { + "question_Why": "Why was the Eiffel Tower originally planned to be a temporary structure?" + }, + { + "question_Was": "Was the Eiffel Tower originally designed to be a permanent structure?" + }, + { + "question_What": "What was the original purpose of the Eiffel Tower when it was built for the 1889 World's Fair?" + }, + { + "question_How": "How did the Eiffel Tower avoid being dismantled in 1909?" + }, + {"question_Where": "Where is the Eiffel Tower?"}, + ], + }, + { + "context": "Photosynthesis is a process used by plants, algae, and certain bacteria to convert light energy into chemical energy.", + "questions": [ + {"question_Why": "Why do plants perform photosynthesis?"}, + { + "question_Was": "Was photosynthesis discovered in plants, algae, or bacteria first?" + }, + { + "question_What": "What converts light energy into chemical energy in photosynthesis?" + }, + {"question_How": "How do plants capture light energy for photosynthesis?"}, + {"question_Where": "Where in plants does photosynthesis primarily occur?"}, + {"question_Can": "Can photosynthesis occur in the absence of light?"}, + ], + }, +] From b33c2d4d762aa9e6bc6a9959465f96127a1ebd10 Mon Sep 17 00:00:00 2001 From: Shahules786 Date: Fri, 19 Jan 2024 13:27:55 -0800 Subject: [PATCH 5/8] removed unwanted prompts --- src/ragas/testset/evolutions.py | 30 +++++++++++++----------------- 1 file changed, 13 insertions(+), 17 deletions(-) diff --git a/src/ragas/testset/evolutions.py b/src/ragas/testset/evolutions.py index 32c32de70..f6f14294f 100644 --- a/src/ragas/testset/evolutions.py +++ b/src/ragas/testset/evolutions.py @@ -11,10 +11,10 @@ from ragas.llms.json_load import load_as_json from ragas.testset.docstore import Direction, Document, DocumentStore, Node from ragas.testset.prompts import ( - FILTER_QUESTION, - MULTI_CONTEXT_QUESTION, - SCORE_CONTEXT, - SEED_QUESTION, + filter_question_prompt, + multi_context_question_prompt, + context_scoring_prompt, + seed_question_prompt, ) rng = default_rng() @@ -35,7 +35,7 @@ def filter(self, node: Node) -> t.Dict: return asyncio.get_event_loop().run_until_complete(self.afilter(node)) async def afilter(self, node: Node) -> t.Dict: - prompt = SCORE_CONTEXT.format(context=node.page_content) + prompt = context_scoring_prompt.format(context=node.page_content) results = await self.llm.agenerate_text(prompt=prompt) output = results.generations[0][0].text.strip() score = load_as_json(output) @@ -51,7 +51,7 @@ def filter(self, question: str) -> bool: return asyncio.get_event_loop().run_until_complete(self.afilter(question)) async def afilter(self, question: str) -> bool: - prompt = FILTER_QUESTION.format(question=question) + prompt = filter_question_prompt.format(question=question) results = await self.llm.agenerate_text(prompt=prompt) results = results.generations[0][0].text.strip() json_results = load_as_json(results) @@ -126,13 +126,13 @@ async def aevolve(self, llm: BaseRagasLLM, docstore: DocumentStore): self.nodes = docstore.get_random_nodes(k=1) self._root_node = self.nodes[0] merged_node = self.merged_nodes() - passed, table_is_present = await self.node_filter.afilter(self.nodes[0]) - if not passed: + passed = await self.node_filter.afilter(self.nodes[0]) + if not passed["score"]: self.nodes = docstore.get_random_nodes(k=1) return await self.aretry_evolve(llm, docstore, update_count=False) # frame a basic question with with node - seed_questions = await simple_evolution(llm, merged_node, table_is_present) + seed_questions = await simple_evolution(llm, merged_node) # NOTE: might need improvement # select only one seed question here seed_question = choice(seed_questions) @@ -148,16 +148,12 @@ async def aevolve(self, llm: BaseRagasLLM, docstore: DocumentStore): async def simple_evolution( - llm: BaseRagasLLM, seed_doc: Document, is_table_present: bool = False + llm: BaseRagasLLM, seed_doc: Document ): - prompt = SEED_QUESTION.format(context=seed_doc.page_content) + prompt = seed_question_prompt.format(context=seed_doc.page_content) results = llm.generate_text(prompt=prompt) results = results.generations[0][0].text - if is_table_present: - return [results] - else: - results = load_as_json(results) - return [v for v in results.values()] + return results async def multi_context_evolution( @@ -166,7 +162,7 @@ async def multi_context_evolution( question = simple_evolution(llm, seed_node) print(question) similar_context = doc_store.get_similar(seed_node)[0] - prompt = MULTI_CONTEXT_QUESTION.format( + prompt = multi_context_question_prompt.format( question=question, context1=seed_node.page_content, context2=similar_context ) results = await llm.agenerate_text(prompt=prompt) From 06e3f544f35074c4cf620b2085f68ad836a3a0e3 Mon Sep 17 00:00:00 2001 From: Shahules786 Date: Fri, 19 Jan 2024 15:45:49 -0800 Subject: [PATCH 6/8] remove multi seed --- src/ragas/testset/evolutions.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/src/ragas/testset/evolutions.py b/src/ragas/testset/evolutions.py index f6f14294f..5fea50941 100644 --- a/src/ragas/testset/evolutions.py +++ b/src/ragas/testset/evolutions.py @@ -2,7 +2,6 @@ import typing as t from abc import ABC, abstractmethod from dataclasses import dataclass, field -from random import choice from fsspec.exceptions import asyncio from numpy.random import default_rng @@ -11,9 +10,9 @@ from ragas.llms.json_load import load_as_json from ragas.testset.docstore import Direction, Document, DocumentStore, Node from ragas.testset.prompts import ( + context_scoring_prompt, filter_question_prompt, multi_context_question_prompt, - context_scoring_prompt, seed_question_prompt, ) @@ -132,10 +131,9 @@ async def aevolve(self, llm: BaseRagasLLM, docstore: DocumentStore): return await self.aretry_evolve(llm, docstore, update_count=False) # frame a basic question with with node - seed_questions = await simple_evolution(llm, merged_node) + seed_question = await simple_evolution(llm, merged_node) # NOTE: might need improvement # select only one seed question here - seed_question = choice(seed_questions) is_valid_question = await self.question_filter.afilter(seed_question) if not is_valid_question: # get more context to rewrite question @@ -147,9 +145,7 @@ async def aevolve(self, llm: BaseRagasLLM, docstore: DocumentStore): return seed_question -async def simple_evolution( - llm: BaseRagasLLM, seed_doc: Document -): +async def simple_evolution(llm: BaseRagasLLM, seed_doc: Document): prompt = seed_question_prompt.format(context=seed_doc.page_content) results = llm.generate_text(prompt=prompt) results = results.generations[0][0].text @@ -160,7 +156,6 @@ async def multi_context_evolution( llm: BaseRagasLLM, seed_node: Node, doc_store: DocumentStore ): question = simple_evolution(llm, seed_node) - print(question) similar_context = doc_store.get_similar(seed_node)[0] prompt = multi_context_question_prompt.format( question=question, context1=seed_node.page_content, context2=similar_context From 00e324715bc88be7e3bc114ff2f894ed116a06aa Mon Sep 17 00:00:00 2001 From: Shahules786 Date: Fri, 19 Jan 2024 15:45:57 -0800 Subject: [PATCH 7/8] prompt fixes --- src/ragas/testset/prompts.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/ragas/testset/prompts.py b/src/ragas/testset/prompts.py index 7a6d6cb9f..16269f362 100644 --- a/src/ragas/testset/prompts.py +++ b/src/ragas/testset/prompts.py @@ -1,8 +1,10 @@ +from langchain.prompts import HumanMessagePromptTemplate + from ragas.llms.prompt import Prompt seed_question_prompt = Prompt( name="seed_question", - instruction="""Generate two questions from given context satisfying the rules given below: + instruction="""Generate a question from given context satisfying the rules given below: 2.The question should be framed such that it must be clearly understood without providing context. 3.The question should be fully answerable from information present in given context.""", examples=[ @@ -301,8 +303,6 @@ ## TODO: remove this -from langchain.prompts import HumanMessagePromptTemplate - SEED_QUESTION = HumanMessagePromptTemplate.from_template( """ Generate two questions from given context satisfying the rules given below: From 2f07cc2bb9b9dcae3059a9074f6122280063a710 Mon Sep 17 00:00:00 2001 From: Shahules786 Date: Fri, 19 Jan 2024 15:55:14 -0800 Subject: [PATCH 8/8] add type hint --- src/ragas/llms/prompt.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ragas/llms/prompt.py b/src/ragas/llms/prompt.py index fddcc16dc..505849c77 100644 --- a/src/ragas/llms/prompt.py +++ b/src/ragas/llms/prompt.py @@ -49,7 +49,7 @@ class Prompt(BaseModel): input_keys: t.List[str] output_key: str output_type: str = "json" - language = "english" + language: str = "english" @root_validator def validate_prompt(cls, values: t.Dict[str, t.Any]) -> t.Dict[str, t.Any]: