In [1]:
import os
import openai

from langchain.document_loaders import ArxivLoader

base_docs = ArxivLoader(query="Retrieval Augmented Generation", load_max_docs=5).load()
len(base_docs)


5

In [2]:
from langchain_community.vectorstores.chroma import Chroma
from langchain_openai import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=250)

docs = text_splitter.split_documents(base_docs)

vectorstore = Chroma.from_documents(docs, OpenAIEmbeddings())

In [5]:
base_retriever = vectorstore.as_retriever(search_kwargs={"k" : 2})

In [37]:
from langchain.prompts import ChatPromptTemplate


template = """You are an AI-powered natural language processing expert in information retrieval and ranking. Your role is to provide advanced techniques and algorithms for generating superior prompts that optimize user queries and ensure the best performance of automatic prompt generation. Your expertise lies in understanding user intent, analyzing query patterns, and generating contextually relevant prompts that enable efficient and accurate retrieval of information. With your skills and abilities, you are capable of fine-tuning models to enhance prompt generation, leveraging semantic understanding and query understanding to deliver optimal results. By utilizing cutting-edge techniques in the field, you can generate automatic prompts that empower users to obtain the most relevant and comprehensive information for their queries.

Your task is to formulate exactly {num_of_prompts_to_generate} prompts from the provided original prompt that are better and using the given context.

Use the below format to output the prompts.

example:
["prompt1", "prompt2", "prompt3", "prompt4", "prompt5"]


The generated prompt must satisfy the rules given below:
0. The generated prompted should only contain the prompt and no numbering
1.The prompt should make sense to humans even when read without the given context.
2.The prompt should be fully created from the given context.
3.The prompt should be framed from a part of context that contains important information. It can also be from tables,code,etc.
4.The prompt must be reasonable and must be understood and responded by humans.
5.Do no use phrases like 'provided context',etc in the prompt
6.The prompt should not contain more than 10 words, make of use of abbreviation wherever possible.
    
### CONTEXT
{context}

### User Prompt
User Prompt: {user_prompt}
"""

prompt = ChatPromptTemplate.from_template(template)

In [50]:
from operator import itemgetter

from langchain_openai import ChatOpenAI
from langchain.schema.output_parser import StrOutputParser
from langchain.schema.runnable import RunnableLambda, RunnablePassthrough, RunnableParallel

primary_qa_llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)

retriever =  RunnableParallel({"context": itemgetter("user_prompt") | base_retriever, "user_prompt":itemgetter('user_prompt'), "num_of_prompts_to_generate":itemgetter("num_of_prompts_to_generate"),})

retrieval_augmented_qa_chain = retriever | {"response": prompt | primary_qa_llm, "context": itemgetter("context")}


In [52]:
import json
user_prompt = "What is RAG?"
num_of_prompts_to_generate =5
result = retrieval_augmented_qa_chain.invoke({"user_prompt":user_prompt, "num_of_prompts_to_generate":num_of_prompts_to_generate})
print(result)
prompts_generated = json.loads(result["response"].content)
prompts_generated

{'response': AIMessage(content='["Explain the concept of RAG.", "Provide an overview of RAG.", "Describe the purpose of RAG.", "What does RAG stand for?", "Can you give me information about RAG?"]'), 'context': [Document(page_content='2020). RAG consists of three primary components:\nTool Retrieval, Plan Generation, and Execution.1\nIn this study, we focus on enhancing tool retrieval,\nwith the goal of achieving subsequent improve-\nments in plan generation.', metadata={'Authors': 'Raviteja Anantha, Tharun Bethi, Danil Vodianik, Srinivas Chappidi', 'Published': '2023-12-09', 'Summary': "Large language models (LLMs) have the remarkable ability to solve new tasks\nwith just a few examples, but they need access to the right tools. Retrieval\nAugmented Generation (RAG) addresses this problem by retrieving a list of\nrelevant tools for a given task. However, RAG's tool retrieval step requires\nall the required information to be explicitly present in the query. This is a\nlimitation, as sema

['Explain the concept of RAG.',
 'Provide an overview of RAG.',
 'Describe the purpose of RAG.',
 'What does RAG stand for?',
 'Can you give me information about RAG?']

## Ground Truth Creation

In [17]:
def get_context_for_user_objective(user_objective):
    return base_retriever.get_relevant_documents(user_objective)

In [26]:
from langchain.output_parsers import ResponseSchema
from langchain.output_parsers import StructuredOutputParser

question_schema = ResponseSchema(
    name="questions",
    description="list of questions about the context with the example: ['What is rag'].",
    type="array(str)"
)

question_response_schemas = [
    question_schema,
]

question_output_parser = StructuredOutputParser.from_response_schemas(question_response_schemas)
format_instructions = question_output_parser.get_format_instructions()

question_generation_llm = ChatOpenAI(model="gpt-3.5-turbo-16k")

bare_prompt_template = "{content}"
bare_template = ChatPromptTemplate.from_template(template=bare_prompt_template)

In [28]:
from langchain.prompts import ChatPromptTemplate

qa_template = """\
You are a University Professor creating a test for advanced students. For each context, create 5 question that is specific to the context. Avoid creating generic or general questions.

Format the output as JSON with the following keys:
questions:

Format the output as the following:
questions: [
    "Question 1",
    "Question 2"
]

context: {context}
"""

prompt_template = ChatPromptTemplate.from_template(template=qa_template)

messages = prompt_template.format_messages(
    context=get_context_for_user_objective(user_prompt),
    format_instructions=format_instructions
)

question_generation_chain = bare_template | question_generation_llm

response = question_generation_chain.invoke({"content" : messages})
questions_dict = question_output_parser.parse(response.content)
print(questions_dict)

{'questions': ['What are the three primary components of RAG?', 'What is the goal of enhancing tool retrieval in this study?', "What is the limitation of RAG's tool retrieval step?", 'How does Context Tuning for RAG improve tool retrieval and plan generation?', 'What signals does the lightweight context retrieval model use to retrieve and rank context items?'], 'context': [{'page_content': '2020). RAG consists of three primary components:\nTool Retrieval, Plan Generation, and Execution.1\nIn this study, we focus on enhancing tool retrieval,\nwith the goal of achieving subsequent improve-\nments in plan generation.', 'metadata': {'Authors': 'Raviteja Anantha, Tharun Bethi, Danil Vodianik, Srinivas Chappidi', 'Published': '2023-12-09', 'Summary': "Large language models (LLMs) have the remarkable ability to solve new tasks\nwith just a few examples, but they need access to the right tools. Retrieval\nAugmented Generation (RAG) addresses this problem by retrieving a list of\nrelevant tools

#### Answer each question generated using GPT-4 that will act as the ground truth

In [32]:
from tqdm import tqdm

answer_generation_llm = ChatOpenAI(model="gpt-4-1106-preview", temperature=0)

answer_schema = ResponseSchema(
    name="answer",
    description="an answer to the question"
)

answer_response_schemas = [
    answer_schema,
]

answer_output_parser = StructuredOutputParser.from_response_schemas(answer_response_schemas)
format_instructions = answer_output_parser.get_format_instructions()

qa_template = """\
You are a University Professor creating a test for advanced students. For each question and context, create an answer.

answer: a answer about the context.

Format the output as JSON with the following keys:
answer

question: {question}
context: {context}
"""

prompt_template = ChatPromptTemplate.from_template(template=qa_template)
answer_generation_chain = bare_template | answer_generation_llm

question_answer_dict_list  = []

for question in tqdm(questions_dict['questions']):
    print(question)
    messages = prompt_template.format_messages(
        context=get_context_for_user_objective(user_prompt),
        question=question,
        format_instructions=format_instructions
    )

    response = answer_generation_chain.invoke({"content" : messages})
    try:
        output_dict = answer_output_parser.parse(response.content)
    except Exception as e:
        continue
    question_answer_dict_list.append({'question': output_dict["question"],'answer':output_dict["answer"]})

question_answer_dict_list

  0%|          | 0/5 [00:00<?, ?it/s]

What are the three primary components of RAG?


 20%|██        | 1/5 [00:49<03:19, 49.78s/it]

What is the goal of enhancing tool retrieval in this study?


 40%|████      | 2/5 [00:59<01:19, 26.40s/it]

What is the limitation of RAG's tool retrieval step?


 60%|██████    | 3/5 [01:13<00:40, 20.46s/it]

How does Context Tuning for RAG improve tool retrieval and plan generation?


 80%|████████  | 4/5 [01:34<00:20, 20.95s/it]

What signals does the lightweight context retrieval model use to retrieve and rank context items?


100%|██████████| 5/5 [01:41<00:00, 20.26s/it]


[{'question': 'What are the three primary components of RAG?',
  'answer': 'The three primary components of RAG are Tool Retrieval, Plan Generation, and Execution.'},
 {'question': 'What is the goal of enhancing tool retrieval in this study?',
  'answer': 'The goal of enhancing tool retrieval in this study is to achieve subsequent improvements in plan generation by providing a more effective method for retrieving relevant tools. This is accomplished through Context Tuning for Retrieval Augmented Generation (RAG), which aims to overcome the limitations of semantic search when the query is incomplete or lacks context. By employing a smart context retrieval system that uses numerical, categorical, and habitual usage signals, the study seeks to improve the accuracy of both tool retrieval and plan generation, ultimately leading to a reduction in hallucination during the planning process.'},
 {'question': "What is the limitation of RAG's tool retrieval step?",
  'answer': "The limitation of 

In [33]:
import pandas as pd
from datasets import Dataset

ground_truth_qac_set = pd.DataFrame(question_answer_dict_list)
# ground_truth_qac_set["context"] = ground_truth_qac_set["context"].map(lambda x: str(x.page_content))
ground_truth_qac_set = ground_truth_qac_set.rename(columns={"answer" : "ground_truth"})


eval_dataset = Dataset.from_pandas(ground_truth_qac_set)

  from .autonotebook import tqdm as notebook_tqdm


In [34]:
eval_dataset


Dataset({
    features: ['question', 'ground_truth'],
    num_rows: 5
})

In [35]:
eval_dataset[0]


{'question': 'What are the three primary components of RAG?',
 'ground_truth': 'The three primary components of RAG are Tool Retrieval, Plan Generation, and Execution.'}

#### Evaluating the RAG pipeline

In [None]:
from ragas.metrics import (
    answer_relevancy,
    faithfulness,
    context_recall,
    context_precision,
    context_relevancy,
    answer_correctness,
    answer_similarity
)

from ragas.metrics.critique import harmfulness
from ragas import evaluate

def create_ragas_dataset(rag_pipeline, eval_dataset):
  rag_dataset = []
  for row in tqdm(eval_dataset):
    answer = rag_pipeline.invoke( row["question"])
    rag_dataset.append(
        {"question" : row["question"],
         "answer" : answer["response"].content,
         "contexts" : [context.page_content for context in answer["context"]],
         "ground_truths" : [row["ground_truth"]]
         }
    )
  rag_df = pd.DataFrame(rag_dataset)
  rag_eval_dataset = Dataset.from_pandas(rag_df)
  return rag_eval_dataset

def evaluate_ragas_dataset(ragas_dataset):
  result = evaluate(
    ragas_dataset,
    metrics=[
        context_precision,
        faithfulness,
        answer_relevancy,
        context_recall,
        context_relevancy,
        answer_correctness,
        answer_similarity
    ],
  )
  return result