In [144]:
import os
from dotenv import load_dotenv
# api_key = os.environ.get("OPENAI_API_KEY")
# os.environ['OPENAI_API_KEY'] = api_key
load_dotenv()
print(os.environ['OPENAI_API_KEY'])

1q


In [67]:
from langchain import OpenAI
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain.prompts import ChatPromptTemplate

## Load retriever

In [68]:
template = """<human>: Answer the question based only on the following context. If you cannot answer the question with the context, please respond with 'I don't know':
### CONTEXT
{context}
### QUESTION
Question: {question}
\n
<bot>:
"""
prompt = ChatPromptTemplate.from_template(template)

In [69]:
from operator import itemgetter
from langchain.schema.runnable import RunnableLambda, RunnablePassthrough
loader = TextLoader('./week_6_challenge_doc.txt')
documents = loader.load()

text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size = 200, chunk_overlap=50, model_name = "gpt-4-1106-preview")
docs  = text_splitter.split_documents(documents)

llm = OpenAI(temperature=0)

embeddings = OpenAIEmbeddings()
store = Chroma.from_documents(texts,embeddings, collection_name="challenge_document")
retriever = store.as_retriever()
# chain = RetrievalQA.from_chain_type(llm,retriever=store.as_retriever())
retrieval_augmented_qa_chain = (
    {"context": itemgetter("question") | retriever, "question": itemgetter("question")}
    | RunnablePassthrough.assign(context=itemgetter("context"))
    | {"response": prompt | llm, "context": itemgetter("context")}
)

In [70]:
question = "what are the tasks for this challenge"
result = retrieval_augmented_qa_chain.invoke({"question" : question})
print(result['response'])

1. Understand Prompt Engineering Tools and Concepts: Gain a thorough understanding of the tools and theoretical concepts involved in prompt engineering for Language Models (LLMs).
2. Familiarize with Language Models: Learn about the capabilities and functionalities of advanced LLMs like GPT-4 and GPT-3.5-Turbo.
3. Develop a Plan for Prompt Generation and Testing: Create a comprehensive plan that outlines the approach for automated prompt generation, test case creation, and prompt evaluation.
4. Set Up a Development Environment: Prepare a suitable development environment that supports the integration and testing of LLMs in the prompt engineering process.


In [71]:
from langchain.output_parsers import ResponseSchema
from langchain.output_parsers import StructuredOutputParser

question_schema = ResponseSchema(
    name="question",
    description="a question about the context."
)
question_response_schemas = [
    question_schema,
]

In [72]:
question_output_parser = StructuredOutputParser.from_response_schemas(question_response_schemas)
format_instructions = question_output_parser.get_format_instructions()

In [73]:
from langchain.chat_models import ChatOpenAI
question_generation_llm = ChatOpenAI(model="gpt-3.5-turbo-1106")
bare_prompt_template = "{content}"
bare_template = ChatPromptTemplate.from_template(template=bare_prompt_template)

In [74]:
import ragas

## Generate Questions

In [75]:
qa_template = """\
You are a University Professor creating a test for advanced students. For each context, create a question that is specific to the context. Avoid creating generic or general questions.
question: a question about the context.
Format the output as JSON with the following keys:
question
context: {context}
"""
prompt_template = ChatPromptTemplate.from_template(template=qa_template)
messages = prompt_template.format_messages(
    context=docs[0],
    format_instructions=format_instructions
)
question_generation_chain = bare_template | question_generation_llm
response = question_generation_chain.invoke({"content" : messages})
output_dict = question_output_parser.parse(response.content)

In [76]:
for k, v in output_dict.items():
  print(k)
  print(v)

question
How can prompt tuning contribute to building enterprise-grade RAG systems?
context
10 Academy Cohort A
Weekly Challenge: Week 6
Precision RAG: Prompt Tuning For Building Enterprise Grade RAG Systems
metadata
{'source': './week_6_challenge_doc.txt'}


## Generating context

In [77]:
from tqdm import tqdm
import random
random.seed(42)
qac_triples = []
# randomly select 100 chunks from the ~1300 chunks
for text in tqdm(random.sample(docs, 15)):
  messages = prompt_template.format_messages(
      context=text,
      format_instructions=format_instructions
  )
  response = question_generation_chain.invoke({"content" : messages})
  try:
    output_dict = question_output_parser.parse(response.content)
  except Exception as e:
    continue
  output_dict["context"] = text
  qac_triples.append(output_dict)

100%|███████████████████████████████████████████| 15/15 [00:56<00:00,  3.77s/it]


In [78]:
qac_triples[0]

{'question': 'What are the key performance indicators for the Understanding Prompt engineering session on Monday?',
 'context': Document(page_content='________________\nTutorials Schedule\nIn the following, the colour purple indicates morning sessions, and blue indicates afternoon sessions.\nMonday: Understanding Prompt engineering \nHere the trainees will understand the week’s challenge.\n* Introduction to Week Challenge (Yabebal)\n* Introduction and challenge to prompt engineering (Fikerte)\n\n\nKey Performance Indicators:\n\n\n* Understanding week’s challenge\n* Understanding the prompt engineering\n* Ability to reuse previous knowledge\nTuesday\n* RAG components (Rehmet)\n* Techniques to improving R (Retrievers) in RAG (Emitnan)\n\n\nKey Performance Indicators:\n\n\n* Understanding Prompt ranking \n* Understanding prompt matching \n* Ability to reuse previous knowledge', metadata={'source': './week_6_challenge_doc.txt'})}

## Create Ground Truths

In [79]:
answer_generation_llm = ChatOpenAI(model="gpt-4-1106-preview", temperature=0)

answer_schema = ResponseSchema(
    name="answer",
    description="an answer to the question"
)

answer_response_schemas = [
    answer_schema,
]

answer_output_parser = StructuredOutputParser.from_response_schemas(answer_response_schemas)

format_instructions = answer_output_parser.get_format_instructions()

qa_template = """\
You are a University Professor creating a test for advanced students. For each question and context, create an answer.

answer: a answer about the context.

Format the output as JSON with the following keys:
answer

question: {question}
context: {context}
"""

prompt_template = ChatPromptTemplate.from_template(template=qa_template)

messages = prompt_template.format_messages(
    context=qac_triples[0]["context"],
    question=qac_triples[0]["question"],
    format_instructions=format_instructions
)

answer_generation_chain = bare_template | answer_generation_llm

response = answer_generation_chain.invoke({"content" : messages})

output_dict = answer_output_parser.parse(response.content)

In [80]:
for k, v in output_dict.items():
  print(k)
  print(v)

answer
The key performance indicators for the Understanding Prompt Engineering session on Monday are: 1) Understanding the week's challenge, 2) Understanding the prompt engineering concepts, and 3) Ability to reuse previous knowledge in the context of prompt engineering.
question
What are the key performance indicators for the Understanding Prompt engineering session on Monday?


In [81]:
for triple in tqdm(qac_triples):
  messages = prompt_template.format_messages(
      context=triple["context"],
      question=triple["question"],
      format_instructions=format_instructions
  )
  response = answer_generation_chain.invoke({"content" : messages})
  try:
    output_dict = answer_output_parser.parse(response.content)
  except Exception as e:
    continue
  triple["answer"] = output_dict["answer"]

100%|███████████████████████████████████████████| 14/14 [01:48<00:00,  7.78s/it]


## Combine questions, contexts, and answers for evaluation dataset

In [82]:
import pandas as pd
from datasets import Dataset

ground_truth_qac_set = pd.DataFrame(qac_triples)

ground_truth_qac_set["context"] = ground_truth_qac_set["context"].map(lambda x: str(x.page_content))

ground_truth_qac_set = ground_truth_qac_set.rename(columns={"answer" : "ground_truth"})

eval_dataset = Dataset.from_pandas(ground_truth_qac_set)

In [83]:
eval_dataset

Dataset({
    features: ['question', 'context', 'ground_truth', 'metadata'],
    num_rows: 14
})

In [84]:
eval_dataset[5]

{'question': 'How can automating and optimizing the prompt engineering process enhance LLM productivity?',
 'context': 'The need for simplified, efficient prompt engineering is clear. Automating and optimizing this process can save time, enhance LLM productivity, and make advanced AI capabilities more accessible to a broader range of users. The tasks of Automatic Prompt Generation, Evaluation Data Generation, and Prompt Testing and Ranking are aimed at addressing these challenges, streamlining the prompt engineering process for more effective use of LLMs.\nLearning Outcomes\nSkills Development\n* Prompt Engineering Proficiency: Gain expertise in crafting effective prompts that guide LLMs to desired outputs, understanding nuances and variations in language that impact model responses.\n* Critical Analysis: Develop the ability to critically analyze and evaluate the effectiveness of different prompts based on their performance in varied scenarios.\n* Technical Aptitude with LLMs: Enhance 

## RAG Evaluation Using ragas

In [119]:
from ragas.metrics import (
    faithfulness,
    context_recall,
    context_precision,

)

from ragas.metrics.critique import harmfulness
from ragas import evaluate

def create_ragas_dataset(rag_pipeline, eval_dataset):
  rag_dataset = []
  for row in tqdm(eval_dataset):
    answer = rag_pipeline.invoke({"question" : row["question"]})
    rag_dataset.append(
        {"question" : row["question"],
         "answer" : answer["response"],
         "contexts" : [context.page_content for context in answer["context"]],
         "ground_truths" : [row["ground_truth"]]
         }
    )
  rag_df = pd.DataFrame(rag_dataset)
  rag_eval_dataset = Dataset.from_pandas(rag_df)
  return rag_eval_dataset

def evaluate_ragas_dataset(ragas_dataset):
  result = evaluate(
    ragas_dataset,
    metrics=[
        context_precision,
        context_recall,
        faithfulness,
    ],
  )
  return result

In [86]:
from tqdm import tqdm
import pandas as pd
basic_qa_ragas_dataset = create_ragas_dataset(retrieval_augmented_qa_chain, eval_dataset)

100%|███████████████████████████████████████████| 14/14 [00:14<00:00,  1.02s/it]


In [87]:
basic_qa_ragas_dataset[0]

{'question': 'What are the key performance indicators for the Understanding Prompt engineering session on Monday?',
 'answer': '* Understanding week’s challenge\n* Understanding the prompt engineering\n* Ability to reuse previous knowledge',
 'contexts': ['________________\nTutorials Schedule\nIn the following, the colour purple indicates morning sessions, and blue indicates afternoon sessions.\nMonday: Understanding Prompt engineering \nHere the trainees will understand the week’s challenge.\n* Introduction to Week Challenge (Yabebal)\n* Introduction and challenge to prompt engineering (Fikerte)\n\n\nKey Performance Indicators:\n\n\n* Understanding week’s challenge\n* Understanding the prompt engineering\n* Ability to reuse previous knowledge\nTuesday\n* RAG components (Rehmet)\n* Techniques to improving R (Retrievers) in RAG (Emitnan)\n\n\nKey Performance Indicators:\n\n\n* Understanding Prompt ranking \n* Understanding prompt matching \n* Ability to reuse previous knowledge',
  '___

In [134]:
basic_qa_result = evaluate_ragas_dataset(basic_qa_ragas_dataset)

evaluating with [context_precision]


  0%|                                                     | 0/1 [00:00<?, ?it/s]


RuntimeError: ('Fatal error occurred while running async tasks.', AuthenticationError("Error code: 401 - {'error': {'message': 'Incorrect API key provided: 1q. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}"))