In [6]:
from langchain.prompts import ChatPromptTemplate
from langchain.chains import LLMChain
from langchain.chat_models import ChatOpenAI
from langchain.callbacks import get_openai_callback
from langchain.schema import Document
from langchain.vectorstores import FAISS

# Install required libraries:
# !pip install pypdf2 unstructured chromadb sentence-transformers langchain langchain_openai tiktoken pypdf faiss-cpu openai

from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings

def generate_test_cases(description, model_name="gpt-3.5-turbo", temperature=1.5, amount=10):
    """Generates test cases for a given task description using an LLMChain."""

    prompt_template = ChatPromptTemplate.from_template("""
    You are a helpful assistant who generates test cases for LLM applications.

    Please create {amount} test case scenarios for the following task description, each with a clear expected output:

    Task: {description}

    Format each test case like this:
    Scenario: [Scenario description]
    Expected output: [Expected output of the model]
    """)

    chain = LLMChain(llm=ChatOpenAI(model_name=model_name, temperature=temperature), prompt=prompt_template)

    response = chain.run({"description": description, "amount": amount})

    test_cases = []
    for case_str in response.split("Scenario:"):
        if not case_str.strip():  # Skip empty lines
            continue
        scenario, expected_output = case_str.split("Expected output:", 1)
        test_cases.append(
            {"prompt": scenario.strip(), "expected_output": expected_output.strip()}
        )

    return test_cases


def evaluate_answer(answer, test_case):
    """Evaluates an answer against the expected output of a test case."""
    # Placeholder for evaluation logic (you'll need to implement this)
    # Example: You could use a similarity metric like ROUGE or BLEU to compare the texts
    return {"score": len(answer)}  # Just a dummy score for demonstration


def load_docs(file_path):
    """Load documents from a PDF file."""
    loader = PyPDFLoader(file_path)
    documents = loader.load()
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
    docs = text_splitter.split_documents(documents)
    return docs

def create_rag_chain(prompt_template, db):
    retriever = db.as_retriever(search_type = "similarity")

    rag_chain = RetrievalQA.from_chain_type(
        llm=ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0),
        chain_type="stuff",
        retriever=retriever,
        return_source_documents=True,
        chain_type_kwargs={"prompt": prompt_template}
    )
    return rag_chain


# Example usage
description = "Explain the concept of few-shot learning."
test_cases = generate_test_cases(description)

# Function to create a RAG chain with a given prompt
def create_rag_chain_factory(prompt_template, db):
    """Creates a RAG chain factory function with the specified prompt and vectorstore."""
    def _create_rag_chain(prompt):
        return create_rag_chain(prompt_template, db)
    return _create_rag_chain



ValueError: File path path/to/your/document.pdf is not a valid file or url

In [14]:
from langchain.chat_models import ChatOpenAI

def generate_prompt_candidates(test_cases, description, amount=10):
    """Generates prompt candidates using a chat-based language model."""
    prompt_generation_model = "gpt-3.5-turbo"  # Or another suitable model
    system_messages = [
        "Generate a diverse and creative prompt that can retrieve relevant documents.",
        "Create an engaging and precise prompt for document retrieval.",
        "Craft a prompt that effectively guides the retrieval of relevant documents.",
    ]  # Add more system messages for variation

    candidates = []

    for i in range(amount):
        system_message = system_messages[i % len(system_messages)]

        # Format test cases for the prompt
        formatted_test_cases = "\n".join(
            [
                f"Test case #{j+1}:\nScenario: {test_case['prompt']}\nExpected output: {test_case['expected_output']}"
                for j, test_case in enumerate(test_cases)
            ]
        )

        prompt = f"""
        You are an AI assistant skilled at creating prompts for retrieving relevant documents. 

        Consider these test cases:

        {formatted_test_cases}

        Here's the goal for the final prompt:
        {description}

        Please create a prompt that would effectively guide the retrieval of relevant documents for similar scenarios.

        Constraints:
        * Do NOT include any specific details from the test cases in your prompt.
        * The prompt should be clear, concise, and easy to understand.
        * IF YOU USE EXAMPLES, ALWAYS USE ONES THAT ARE VERY DIFFERENT FROM THE TEST CASES.
        """

        model = ChatOpenAI(model=prompt_generation_model, temperature=1 if i > 0 else 0)

        try:
            response = model.invoke(system_message + prompt)
            candidate = response.content.strip() 

            if candidate and candidate not in candidates:  # Avoid duplicates
                candidates.append(candidate)

        except Exception as e:
            print(f"Error generating prompt candidate: {e}")
            # You might want to handle this error more gracefully in a production setting

    return candidates

# Example Usage:
description = "Explain the concept of few-shot learning."
test_cases = generate_test_cases(description) # Replace this with your actual test cases
candidates = generate_prompt_candidates(test_cases, description)

for i, candidate in enumerate(candidates):
    print(f"Candidate Prompt #{i+1}: {candidate}")



Candidate Prompt #1: Prompt: Describe how few-shot learning enables machines to quickly adapt to new tasks with minimal training examples, showcasing its efficiency in learning diverse concepts with limited data.
Candidate Prompt #2: Retrieve documents that provide an in-depth explanation of the concept of few-shot learning, focusing on how machines can quickly adapt to new tasks with minimal training examples. Be sure to include examples that showcase the practical applications and benefits of this approach.
Candidate Prompt #3: Prompt: Describe how few-shot learning allows machines to quickly adapt to new tasks with minimal training examples.
Candidate Prompt #4: Prompt: Describe how few-shot learning enables machines to quickly adapt to new tasks with minimal training examples.
Candidate Prompt #5: Prompt: Retrieve documents that provide a comprehensive explanation of few-shot learning, including its core principles and applications in various domains.
Candidate Prompt #6: Prompt: P

In [22]:
candidates

['Prompt: Describe how few-shot learning enables machines to quickly adapt to new tasks with minimal training examples, showcasing its efficiency in learning diverse concepts with limited data.',
 'Retrieve documents that provide an in-depth explanation of the concept of few-shot learning, focusing on how machines can quickly adapt to new tasks with minimal training examples. Be sure to include examples that showcase the practical applications and benefits of this approach.',
 'Prompt: Describe how few-shot learning allows machines to quickly adapt to new tasks with minimal training examples.',
 'Prompt: Describe how few-shot learning enables machines to quickly adapt to new tasks with minimal training examples.',
 'Prompt: Retrieve documents that provide a comprehensive explanation of few-shot learning, including its core principles and applications in various domains.',
 'Prompt: Provide an overview of a machine learning technique that allows models to quickly learn new tasks with mi

### Evaluate a single prompt candidate:

In [21]:
from langchain.chat_models import ChatOpenAI
from langchain.schema import Document
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from ragas import evaluate
from ragas.metrics import context_precision, faithfulness, answer_relevancy, context_recall


### Evaluate the prompts

In [23]:
def evaluate_prompts(test_cases, candidates, rag_chain_factory):
    # ... (Your other functions like retrieve_context and create_rag_chain_factory remain the same) ...
    results = []
    for test_case in test_cases:
        question = test_case["prompt"]
        expected_output = test_case["expected_output"]

        context = retrieve_context(question)
        retrieved_docs = [Document(page_content=doc) for doc in context]

        for candidate in candidates:
            # Extract the prompt text by removing the "Prompt: " prefix
            prompt = candidate[len("Prompt: "):].strip() 

            rag_chain = rag_chain_factory(prompt)

            chain = (
                {"context": retrieved_docs, "question": RunnablePassthrough()}
                | rag_chain.prompt
                | rag_chain.llm
                | StrOutputParser()
            )

            try:
                answer = chain.invoke(test_case["prompt"])
                # Update candidate's rating based on answer quality (runBattle logic)
                # ...

                data = {
                    "question": [question],
                    "answer": [answer],
                    "contexts": [
                        retrieved_docs,
                    ],
                }

                scores = evaluate(
                    data,
                    metrics=[
                        context_precision,
                        faithfulness,
                        answer_relevancy,
                        context_recall,
                    ],
                )

                results.append(
                    {
                        "test_case": test_case,
                        "candidate": candidate,
                        "answer": answer,
                        "scores": scores,
                    }
                )
            except Exception as e:
                print(f"Error evaluating prompt '{candidate}': {e}")

    return results


In [None]:
test_cases = [
    {"prompt": "What is few shot learning?", "expected_output": "Few-shot learning is a machine learning approach where a model can learn to perform a new task with only a few examples, while traditional machine learning typically requires large amounts of labeled data."},
    {"prompt": "Give an example of how few-shot learning could be applied in a real-world application.", "expected_output": "Few-shot learning could be used to develop a medical diagnosis system that can quickly learn to identify new diseases with limited data."}
]


In [26]:
loader = PyPDFLoader("/home/hilla/code/10Academy-training/week7/Prompt-Tuning-Enterprise-RAG/notebooks/prompt_ranking/Download Llama Terms and policy.pdf") # Adjust to your document loader
documents = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.split_documents(documents)

db = FAISS.from_documents(docs, OpenAIEmbeddings())

def retrieve_context(query):
    """Retrieves relevant context for a given query from your knowledge base or documents."""
    # Replace with your actual retrieval logic using your RAG chain.
    similar_docs = db.similarity_search(query)

    return [doc.page_content for doc in similar_docs]


In [29]:
from langchain.prompts import ChatPromptTemplate
from langchain.chains import RetrievalQA


def create_rag_chain_factory(prompt_template, db):
    def _create_rag_chain(prompt):
        return create_rag_chain(prompt_template, db)

    return _create_rag_chain

prompts = ["Prompt 1", "Prompt 2", "Prompt 3"]  # Replace with your actual prompts
prompt_template = ChatPromptTemplate.from_template(
    "Use the following context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.\n\n{context}\n\nQuestion: {question}\nAnswer:"
)
rag_chain_factory = create_rag_chain_factory(prompt_template, db)


results = []

for test_case in test_cases[:2]:  # Test on the first two test cases for demonstration
    question = test_case["prompt"]
    expected_output = test_case["expected_output"]

    # Use the function here to get context
    context = retrieve_context(question)
    retrieved_docs = [Document(page_content=doc) for doc in context]

    for candidate in candidates:
        # Extract the prompt text by removing the "Prompt: " prefix
        prompt = candidate[len("Prompt: "):].strip()

        rag_chain = rag_chain_factory(prompt)
        # ... (evaluation steps below) ...

        chain = (
            {"context": retrieved_docs, "question": RunnablePassthrough()}
            | rag_chain.prompt
            | rag_chain.llm
            | StrOutputParser()
        )

        # Generate answer from RAG chain
        answer = chain.invoke(test_case["prompt"])
        print(answer)

        data = {
            "question": [question],
            "answer": [answer],
            "contexts": [
                retrieved_docs,
            ],
        }

        scores = evaluate(
            data,
            metrics=[
                context_precision,
                faithfulness,
                answer_relevancy,
                context_recall,
            ],
        )

        results.append(
            {
                "test_case": test_case,
                "candidate": candidate,
                "answer": answer,
                "scores": scores,
            }
        )


AttributeError: 'RetrievalQA' object has no attribute 'prompt'

In [30]:
def initialize_distribution(candidates, sample_amount):
    """Initializes the distribution for Monte Carlo simulation."""
    distribution = {str(candidate["id"]): 0 for candidate in candidates}
    return distribution

def run_monte_carlo_simulation(candidates, distribution, sample_amount):
    """Performs Monte Carlo simulation to estimate prompt rankings."""
    for _ in range(sample_amount):
        samples = {
            str(candidate["id"]): randomNormal(
                mean=candidate["rating"], dev=candidate["sd"]
            )
            for candidate in candidates
        }
        winner = max(samples, key=samples.get)  # Get the prompt with the highest sample
        distribution[winner] += 1
    return distribution

def randomly_select_from_distribution(distribution, excluded=""):
    """Randomly selects a prompt from the distribution, excluding the 'excluded' prompt."""
    filtered_dist = {k: v for k, v in distribution.items() if k != excluded}
    total = sum(filtered_dist.values())
    rand = random.uniform(0, total)
    for prompt_id, count in filtered_dist.items():
        rand -= count
        if rand <= 0:
            return prompt_id

def create_battle(test_cases, prompt_a, prompt_b):
    """Creates a new battle object for the selected prompts."""
    return {
        "a": prompt_a,
        "b": prompt_b,
        "rounds": [{"testCase": tc, "result": None, "generation": {"a": '', "b": ''}} for tc in test_cases],
        "winner": None,
    }

#... (rest of code)

async def run_battle():
    sample_amount = 1000 # Replace with value from useSettings() hook
    learning_rate = 0.9 # Replace with value from useSettings() hook
    distribution = initialize_distribution(candidates.value, sample_amount)
    distribution = await run_monte_carlo_simulation(candidates.value, distribution, sample_amount)

    prompt_a = randomly_select_from_distribution(distribution)
    prompt_b = randomly_select_from_distribution(distribution, prompt_a)

    new_battle = create_battle(testCases.value, prompt_a, prompt_b)
    
    #... (rest of run_battle code)


In [19]:
from ragas import evaluate
from ragas.metrics import context_precision, faithfulness, answer_relevancy, context_recall
from langchain.chat_models import ChatOpenAI
from langchain.schema import Document
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

def evaluate_single_prompt(prompt, test_cases, rag_chain_factory):
    """Evaluates the quality of answers generated by a single prompt candidate using RAGAs metrics."""

    results = []

    for test_case in test_cases:
        question = test_case["prompt"]
        expected_output = test_case["expected_output"]

        context = retrieve_context(question)
        retrieved_docs = [Document(page_content=doc) for doc in context]

        rag_chain = rag_chain_factory(prompt)

        chain = (
            {"context": retrieved_docs, "question": RunnablePassthrough()}
            | rag_chain.prompt
            | rag_chain.llm
            | StrOutputParser()
        )

        try:
            answer = chain.invoke(test_case["prompt"])

            data = {
                "question": [question],
                "answer": [answer],
                "contexts": [
                    retrieved_docs,
                ],
            }

            scores = evaluate(
                data,
                metrics=[
                    context_precision,
                    faithfulness,
                    answer_relevancy,
                    context_recall,
                ],
            )

            results.append(
                {
                    "test_case": test_case,
                    "prompt": prompt,
                    "answer": answer,
                    "scores": scores,
                }
            )
        except Exception as e:
            print(f"Error evaluating prompt '{prompt}': {e}")

    return results


# Example usage
# ... (assumes test_cases, description, and a single candidate prompt are already defined) ...

candidate = "Your prompt goes here"  # The prompt you want to evaluate

evaluation_results = evaluate_single_prompt(candidate, test_cases, create_rag_chain_factory)

for result in evaluation_results:
    print(f"Test Case: {result['test_case']['prompt']}")
    print(f"Prompt: {result['prompt']}")
    print(f"Generated Answer: {result['answer']}")
    print(f"Scores: {result['scores']}")
    print("\n")


NameError: name 'retrieve_context' is not defined

In [24]:
from langchain.chat_models import ChatOpenAI
from langchain.schema import Document
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from ragas import evaluate
from ragas.metrics import context_precision, faithfulness, answer_relevancy, context_recall
import pLimit
import randomNormal
import resguard
from utils import timeout # Make sure you have this module if you use it in you original code
from utils.types import Candidate #Make sure you have this module if you use it in you original code
from utils.synced_state import useSyncedState #Make sure you have this module if you use it in you original code
import random

# ... (Your RAG chain creation function and `retrieve_context` function from previous responses) ...

def generate_prompt_candidates(test_cases, description, amount=10):
    # ... (same as the provided generate_prompt_candidates function) ...

def evaluate_prompts(test_cases, candidates, rag_chain_factory):
    """Evaluates a list of prompt candidates using RAGAs metrics and the provided RAG chain factory."""

    results = []

    for test_case in test_cases:
        question = test_case["prompt"]
        expected_output = test_case["expected_output"]

        context = retrieve_context(question)
        retrieved_docs = [Document(page_content=doc) for doc in context]

        for candidate in candidates:
            rag_chain = rag_chain_factory(candidate)

            chain = (
                {"context": retrieved_docs, "question": RunnablePassthrough()}
                | rag_chain.prompt
                | rag_chain.llm
                | StrOutputParser()
            )

            try:
                answer = chain.invoke(test_case["prompt"])
                
                data = {
                    "question": [question],
                    "answer": [answer],
                    "contexts": [
                        retrieved_docs,
                    ],
                }

                scores = evaluate(
                    data,
                    metrics=[
                        context_precision,
                        faithfulness,
                        answer_relevancy,
                        context_recall,
                    ],
                )

                results.append(
                    {
                        "test_case": test_case,
                        "candidate": candidate,
                        "answer": answer,
                        "scores": scores,
                    }
                )
            except Exception as e:
                print(f"Error evaluating prompt '{candidate}': {e}")

    return results


# Example usage
# ... (assumes test_cases, description, and candidates are already defined) ...
# Create RAG chain factory
def create_rag_chain_factory(prompt_template, db):
    def _create_rag_chain(prompt):
        return create_rag_chain(prompt_template, db)

    return _create_rag_chain
# Load documents and create vectorstore
docs = load_docs("path/to/your/document.pdf")
db = FAISS.from_documents(docs, OpenAIEmbeddings())

prompts = ["Prompt 1", "Prompt 2", "Prompt 3"]  # Replace with your actual prompts
prompt_template = ChatPromptTemplate.from_template(
    "Use the following context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.\n\n{context}\n\nQuestion: {question}\nAnswer:"
)
rag_chain_factory = create_rag_chain_factory(prompt_template, db)
evaluation_results = evaluate_prompts(test_cases, candidates, rag_chain_factory)
for result in evaluation_results:
    print(f"Test Case: {result['test_case']['prompt']}")
    print(f"Candidate: {result['candidate']}")
    print(f"Generated Answer: {result['answer']}")
    print(f"Scores: {result['scores']}")
    print("\n")


IndentationError: expected an indented block (3920130440.py, line 20)

In [None]:

# Load documents and create vectorstore
docs = load_docs("./Download Llama Terms and policy.pdf")
db = FAISS.from_documents(docs, OpenAIEmbeddings())

prompts = ["Prompt 1", "Prompt 2", "Prompt 3"]  # Replace with your actual prompts
prompt_template = ChatPromptTemplate.from_template(
    "Use the following context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.\n\n{context}\n\nQuestion: {question}\nAnswer:"
)
rag_chain_factory = create_rag_chain_factory(prompt_template, db)
results = evaluate_prompts(prompts, test_cases, rag_chain_factory)

# Replace the placeholder evaluation with your actual logic (e.g., ROUGE, BLEU, etc.)
# Example: Print the scores for each prompt and test case
for prompt, scores in results.items():
    print(f"\nPrompt: {prompt}")
    with get_openai_callback() as cb:
        for i, score in enumerate(scores):
            print(f"  Test Case #{i + 1}: {score}")
            print(f"Total Tokens: {cb.total_tokens}")
            print(f"Prompt Tokens: {cb.prompt_tokens}")
            print(f"Completion Tokens: {cb.completion_tokens}")
            print(f"Total Cost (USD): ${cb.total_cost}")

NameError: name 'RagasEvaluatorChain' is not defined

# Start from scratch

In [8]:
from langchain.chat_models import ChatOpenAI  # Assuming you're using OpenAI's models


class TestCase:
    def __init__(self, prompt, expected_output=None, id=None):
        self.prompt = prompt
        self.expected_output = expected_output
        self.id = id

## Generate test cases using chatgpt

In [9]:
from langchain.chat_models import ChatOpenAI
from langchain.schema import HumanMessage

class TestCase:
    def __init__(self, prompt, expected_output=None, id=None):
        self.prompt = prompt
        self.expected_output = expected_output
        self.id = id
        
def generate_test_cases(task_description, amount=3):
    """Generates test cases using a language model based on the task description."""

    prompt_template = """
    You are a helpful AI assistant. Please generate {amount} diverse test cases for the following task:

    Task: {description}

    Each test case should include:
    * Scenario: A clear description of the situation or input to be tested.
    * Expected output: The ideal or expected output from the system.

    Format each test case like this:
    Scenario: [Scenario description]
    Expected output: [Expected output]
    """

    model = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=1.5)

    messages = [
        HumanMessage(
            content=prompt_template.format(amount=amount, description=task_description)
        )
    ]

    response = model(messages)
    generated_text = response.content

    test_cases = []
    for case_str in generated_text.split("Scenario:"):
        case_str = case_str.strip() # Ensure that there are no spaces 
        if not case_str:
            continue
        try:
            scenario, expected_output = case_str.split("Expected output:", 1)
            test_cases.append(
                TestCase(scenario.strip(), expected_output.strip())
            )
        except ValueError:
            print(f"Skipping malformed test case: {case_str}")  # Handle cases where "Expected output:" is missing

    return test_cases


# Example usage
task_description = "Explain the concept of few-shot learning."
generated_test_cases = generate_test_cases(task_description)
for i, test_case in enumerate(generated_test_cases):
    print(f"\nTest Case #{i + 1}:")
    print(f"  Scenario: {test_case.prompt}")
    print(f"  Expected Output: {test_case.expected_output}")


  warn_deprecated(
  warn_deprecated(


Skipping malformed test case: Test Case 1:

Test Case #1:
  Scenario: The user is new to the concept of few-shot learning and wants a basic explanation.
  Expected Output: A concise definition of few-shot learning as a machine learning technique that involves training a model using a minimal amount of labelled examples, allowing it to make predictions with only a few examples.

Test Case 2:

Test Case #2:
  Scenario: A student in a machine learning course wants to understand the difference between few-shot learning and traditional supervised learning.
  Expected Output: An explanation that few-shot learning differs from traditional supervised learning in that it focuses on training models to learn from a small number of samples, as opposed to large amounts of labelled data typically required in traditional supervised learning.

Test Case 3:

Test Case #3:
  Scenario: An experienced data scientist is interested in practical applications of few-shot learning in the industry.
  Expected O

## Generation of prompt candidates

In [10]:
from langchain.chat_models import ChatOpenAI


In [11]:
test_cases = generated_test_cases

In [12]:
prompt_generation_model = "gpt-3.5-turbo"  # Or another suitable model
temperature = 1.0  # Adjust for creativity (higher values = more creative)
num_prompt_candidates = 3  # Number of prompts to generate

# This emulates the system messages from the JavaScript code
system_messages = [
    "You are an AI assistant skilled at creating prompts for retrieving relevant documents.",
    "Generate a concise and effective prompt to guide document retrieval.",
    "Craft a prompt that focuses on extracting specific information from documents."
]

In [13]:
prompt_template = """
You are an AI assistant skilled at creating prompts for retrieving relevant documents. 

Consider these test cases:

{test_cases}

Here's the goal for the final prompt:
{description}

Please create a prompt that would effectively guide the retrieval of relevant documents for similar scenarios.

Constraints:
* Do NOT include any specific details from the test cases in your prompt.
* The prompt should be clear, concise, and easy to understand.
* IF YOU USE EXAMPLES, ALWAYS USE ONES THAT ARE VERY DIFFERENT FROM THE TEST CASES.
"""


In [21]:
import nest_asyncio
nest_asyncio.apply()


In [32]:
generated_test_cases

[<__main__.TestCase at 0x7c86b06ea880>,
 <__main__.TestCase at 0x7c86b06ea910>,
 <__main__.TestCase at 0x7c86b06c7ca0>]

In [42]:
generated_test_cases[2].prompt

'An experienced data scientist is interested in practical applications of few-shot learning in the industry.'

In [45]:
len(generated_test_cases)

3

In [47]:
# for i in range(len(generated_test_cases)):
#     print(f"\nScenario: {generated_test_cases[i].prompt}\nExpected output: {generated_test_cases[i].expected_output}")



Scenario: The user is new to the concept of few-shot learning and wants a basic explanation.
Expected output: A concise definition of few-shot learning as a machine learning technique that involves training a model using a minimal amount of labelled examples, allowing it to make predictions with only a few examples.

Test Case 2:

Scenario: A student in a machine learning course wants to understand the difference between few-shot learning and traditional supervised learning.
Expected output: An explanation that few-shot learning differs from traditional supervised learning in that it focuses on training models to learn from a small number of samples, as opposed to large amounts of labelled data typically required in traditional supervised learning.

Test Case 3:

Scenario: An experienced data scientist is interested in practical applications of few-shot learning in the industry.
Expected output: Examples of real-world applications of few-shot learning such as personalized medical diag

In [57]:
# test_cases_list = []

# for i in range(len(generated_test_cases)):
#     test_cases_list.append(f"Test case #{i+1}:\nScenario: {generated_test_cases[i].prompt}\nExpected output: {generated_test_cases[i].expected_output}")

In [58]:
# formatted_test_cases_str = '\n'.join(test_cases_list)


In [102]:
from typing import List
from pydantic import BaseModel
from langchain.chat_models import ChatOpenAI  # Import the correct class for chat models
from langchain.schema import HumanMessage, SystemMessage, AIMessage

# TestCase class (unchanged)
# class TestCase(BaseModel):
#     prompt: str
#     expected_output: str

# System messages and model name
system_messages = [
    "You are an AI assistant skilled at creating prompts for getting information from relevant documents.",
    "Generate a concise and effective prompt to guide information retrieval from documents.",
    "Craft a prompt that focuses on extracting specific information from documents."
]
prompt_generation_model_name = "gpt-3.5-turbo"  # Use the correct model name


def generate_prompt_candidates(testcases: List[TestCase], description: str) -> List[str]:
    """Generates prompt candidates using a chat language model."""

    candidates = []

    for i in range(5):
        system_message = system_messages[i % len(system_messages)]

        formatted_test_cases_str = "\n".join(
            [
                f"Test case #{j+1}:\nScenario: {testcase.prompt}\nExpected output: {testcase.expected_output}"
                for j, testcase in enumerate(testcases)
            ]
        )

        prompt_content = f"""
        You are an AI assistant skilled at creating prompts for retrieving relevant documents. 
        
        Here are some test case scenarios and their expected outputs:
        {formatted_test_cases_str}

        Here is what the user wants the final prompt to accomplish:
        {description}
        
        Respond with your prompt, and nothing else. Be creative.
        NEVER CHEAT BY INCLUDING SPECIFICS ABOUT THE TEST CASES IN YOUR PROMPT. 
        ANY PROMPTS WITH THOSE SPECIFIC EXAMPLES WILL BE DISQUALIFIED.
        IF YOU USE EXAMPLES, ALWAYS USE ONES THAT ARE VERY DIFFERENT FROM THE TEST CASES.
        """

        temperature = 0.7 if i > 0 else 0 # First prompt deterministic, rest creative

        # Use ChatOpenAI for chat models
        model = ChatOpenAI(model_name=prompt_generation_model_name, temperature=temperature)  
        messages = [
            SystemMessage(content=system_message),
            HumanMessage(content=prompt_content)
        ]

        response = model(messages)
        candidates.append(response.content.strip())  # Extract the content directly

    return candidates

# ... (rest of your code) ...


In [103]:

# Example Usage:
description = "Explain the concept of few-shot learning."

candidates = generate_prompt_candidates(generated_test_cases, description)

for i, candidate in enumerate(candidates):
    print(f"Candidate Prompt #{i+1}: {candidate}")



Candidate Prompt #1: Describe the unique approach in machine learning that involves learning from a limited number of examples.
Candidate Prompt #2: Describe the principles and techniques involved in a unique approach to machine learning that aims to minimize the amount of training data required.
Candidate Prompt #3: Please provide detailed explanations, definitions, and examples to help clarify the concept of few-shot learning.
Candidate Prompt #4: Describe the unique approach of a learning method that involves acquiring knowledge from only a limited amount of data.
Candidate Prompt #5: Describe the principles and applications of few-shot learning.


In [104]:
candidates

['Describe the unique approach in machine learning that involves learning from a limited number of examples.',
 'Describe the principles and techniques involved in a unique approach to machine learning that aims to minimize the amount of training data required.',
 'Please provide detailed explanations, definitions, and examples to help clarify the concept of few-shot learning.',
 'Describe the unique approach of a learning method that involves acquiring knowledge from only a limited amount of data.',
 'Describe the principles and applications of few-shot learning.']