# Start from scratch

In [258]:
task_description = "Explain the concept of few-shot learning."

In [130]:
from langchain.chat_models import ChatOpenAI  # Assuming you're using OpenAI's models


class TestCase:
    def __init__(self, prompt, expected_output=None, id=None):
        self.prompt = prompt
        self.expected_output = expected_output
        self.id = id

## Generate test cases using chatgpt

In [153]:
from langchain.chat_models import ChatOpenAI
from langchain.schema import HumanMessage

class TestCase:
    def __init__(self, scenario, expected_output=None, id=None):
        self.scenario = scenario
        self.expected_output = expected_output
        self.id = id
        
def generate_test_cases(task_description, amount=3):
    """Generates test cases using a language model based on the task description."""

    prompt_template = """
    You are a helpful AI assistant. Please generate {amount} diverse test cases for the following task:

    Task: {description}

    Each test case should include:
    * Scenario: A clear description of the situation or input to be tested.
    * Expected output: The ideal or expected output from the system.

    Format each test case like this:
    Scenario: [Scenario description]
    Expected output: [Expected output]
    """

    model = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=1.5)

    messages = [
        HumanMessage(
            content=prompt_template.format(amount=amount, description=task_description)
        )
    ]

    response = model(messages)
    generated_text = response.content

    test_cases = []
    for case_str in generated_text.split("Scenario:"):
        case_str = case_str.strip() # Ensure that there are no spaces 
        if not case_str:
            continue
        try:
            scenario, expected_output = case_str.split("Expected output:", 1)
            test_cases.append(
                TestCase(scenario.strip(), expected_output.strip())
            )
        except ValueError:
            print(f"Skipping malformed test case: {case_str}")  # Handle cases where "Expected output:" is missing

    return test_cases


# Example usage
task_description = "Explain the concept of few-shot learning."
generated_test_cases = generate_test_cases(task_description)
for i, test_case in enumerate(generated_test_cases):
    print(f"\nTest Case #{i + 1}:")
    print(f"  Scenario: {test_case.scenario}")
    print(f"  Expected Output: {test_case.expected_output}")



Test Case #1:
  Scenario: A student is researching on machine learning and comes across the term few-shot learning for the first time.
  Expected Output: The system should explain that few-shot learning is a type of machine learning where a model can learn from very limited training data, usually only a few examples per class, in order to make precise predictions for new unseen data. It should also mention techniques such as meta-learning and transfer learning that are commonly used in few-shot learning approaches.

Test Case #2:
  Scenario: A software engineer working on developing an image recognition algorithm wants to understand how few-shot learning works.
  Expected Output: The system should provide a detailed explanation that in few-shot learning, the model is trained with a very small dataset compared to traditional machine learning models. It utilizes transfer learning, meta-learning, or other techniques to learn from few examples in order to quickly adapt to new information 

## Generation of prompt candidates

In [10]:
from langchain.chat_models import ChatOpenAI


In [11]:
test_cases = generated_test_cases

In [12]:
prompt_generation_model = "gpt-3.5-turbo"  # Or another suitable model
temperature = 1.0  # Adjust for creativity (higher values = more creative)
num_prompt_candidates = 3  # Number of prompts to generate

# This emulates the system messages from the JavaScript code
system_messages = [
    "You are an AI assistant skilled at creating prompts for retrieving relevant documents.",
    "Generate a concise and effective prompt to guide document retrieval.",
    "Craft a prompt that focuses on extracting specific information from documents."
]

In [13]:
prompt_template = """
You are an AI assistant skilled at creating prompts for retrieving relevant documents. 

Consider these test cases:

{test_cases}

Here's the goal for the final prompt:
{description}

Please create a prompt that would effectively guide the retrieval of relevant documents for similar scenarios.

Constraints:
* Do NOT include any specific details from the test cases in your prompt.
* The prompt should be clear, concise, and easy to understand.
* IF YOU USE EXAMPLES, ALWAYS USE ONES THAT ARE VERY DIFFERENT FROM THE TEST CASES.
"""


## Generate prompt candidates

In [208]:
from typing import List
from pydantic import BaseModel
from langchain.chat_models import ChatOpenAI  # Import the correct class for chat models
from langchain.schema import HumanMessage, SystemMessage, AIMessage

import nest_asyncio
nest_asyncio.apply()


# TestCase class (unchanged)
# class TestCase(BaseModel):
#     prompt: str
#     expected_output: str

# System messages and model name
system_messages = [
    "You are an AI assistant skilled at creating prompts for getting information from relevant documents.",
    "Generate a concise and effective prompt to guide information retrieval from documents.",
    "Craft a prompt that focuses on extracting specific information from documents."
]
prompt_generation_model_name = "gpt-3.5-turbo"  # Use the correct model name


def generate_prompt_candidates(testcases: List[TestCase], description: str) -> List[str]:
    """Generates prompt candidates using a chat language model."""

    candidates = []

    for i in range(5):
        system_message = system_messages[i % len(system_messages)]

        formatted_test_cases_str = "\n".join(
            [
                f"Test case #{j+1}:\nScenario: {testcase.prompt}\nExpected output: {testcase.expected_output}"
                for j, testcase in enumerate(testcases)
            ]
        )

        prompt_content = f"""
        You are an AI assistant skilled at creating prompts for retrieving relevant documents. 
        
        Here are some test case scenarios and their expected outputs:
        {formatted_test_cases_str}

        Here is what the user wants the final prompt to accomplish:
        {description}
        
        Respond with your prompt, and nothing else. Be creative.
        NEVER CHEAT BY INCLUDING SPECIFICS ABOUT THE TEST CASES IN YOUR PROMPT. 
        ANY PROMPTS WITH THOSE SPECIFIC EXAMPLES WILL BE DISQUALIFIED.
        IF YOU USE EXAMPLES, ALWAYS USE ONES THAT ARE VERY DIFFERENT FROM THE TEST CASES.
        """

        temperature = 0.7 if i > 0 else 0 # First prompt deterministic, rest creative

        # Use ChatOpenAI for chat models
        model = ChatOpenAI(model_name=prompt_generation_model_name, temperature=temperature)  
        messages = [
            SystemMessage(content=system_message),
            HumanMessage(content=prompt_content)
        ]

        response = model(messages)
        candidates.append(response.content.strip())  # Extract the content directly

    return candidates

# ... (rest of your code) ...


In [103]:

# Example Usage:
description = "Explain the concept of few-shot learning."

candidates = generate_prompt_candidates(generated_test_cases, description)

for i, candidate in enumerate(candidates):
    print(f"Candidate Prompt #{i+1}: {candidate}")



Candidate Prompt #1: Describe the unique approach in machine learning that involves learning from a limited number of examples.
Candidate Prompt #2: Describe the principles and techniques involved in a unique approach to machine learning that aims to minimize the amount of training data required.
Candidate Prompt #3: Please provide detailed explanations, definitions, and examples to help clarify the concept of few-shot learning.
Candidate Prompt #4: Describe the unique approach of a learning method that involves acquiring knowledge from only a limited amount of data.
Candidate Prompt #5: Describe the principles and applications of few-shot learning.


In [104]:
candidates

['Describe the unique approach in machine learning that involves learning from a limited number of examples.',
 'Describe the principles and techniques involved in a unique approach to machine learning that aims to minimize the amount of training data required.',
 'Please provide detailed explanations, definitions, and examples to help clarify the concept of few-shot learning.',
 'Describe the unique approach of a learning method that involves acquiring knowledge from only a limited amount of data.',
 'Describe the principles and applications of few-shot learning.']

## Get the scores for the prompts

In [108]:
import numpy as np
import asyncio
from langchain.embeddings import OpenAIEmbeddings

def get_score(test_case: TestCase, prompt_a: str, prompt_b: str, model: ChatOpenAI, embedding_model: OpenAIEmbeddings):
    """Calculates the score for a prompt comparison using human judgment or embedding similarity."""
    answer_a = asyncio.run(getGeneration(prompt_a, test_case, model))
    answer_b = asyncio.run(getGeneration(prompt_b, test_case, model))

    if not test_case.expected_output.strip():  # No expected output, rely on human judgment
        print(f"Test Case: {test_case.prompt}")
        print(f"Option A: {answer_a}")
        print(f"Option B: {answer_b}")

        winner = input("Which answer is better? (A/B/draw): ").upper()
        while winner not in ['A', 'B', 'DRAW']:
            winner = input("Invalid input. Please enter A, B, or draw: ").upper()

        return 1 if winner == 'A' else 0 if winner == 'B' else 0.5
    else:  # Use embeddings to calculate similarity
        embedding_a = embedding_model.embed_query(answer_a)
        embedding_b = embedding_model.embed_query(answer_b)
        embedding_expected = embedding_model.embed_query(test_case.expected_output)

        score_a = cosine_similarity(embedding_a, embedding_expected)
        score_b = cosine_similarity(embedding_b, embedding_expected)

        # Handle ties and near-ties
        if abs(score_a - score_b) < 0.1:  # Adjust threshold as needed
            return 0.5  # Draw

        return 1 if score_a > score_b else 0  # Return 1 if A wins, 0 if B wins
    
# Cosine similarity
def cosine_similarity(a, b):
    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))


##### Load documents

In [119]:
import bs4
from langchain_community.document_loaders import WebBaseLoader


# Load Documents
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
docs = loader.load()


##### Split into chunks and save them to the vectorstore

In [145]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

# Split
text_splitter = RecursiveCharacterTextSplitter(chunk_size=250, chunk_overlap=200)
from langchain_community.vectorstores import Chroma
splits = text_splitter.split_documents(docs)

embedding = OpenAIEmbeddings()

# Embed
vectorstore = Chroma.from_documents(documents=splits,
                                    embedding=embedding)

retriever = vectorstore.as_retriever(search_kwargs={"k": 5})

## Generate answers for a prompt candidate given a scenario

In [243]:
import asyncio
from langchain.chat_models import ChatOpenAI
from langchain.schema import HumanMessage, SystemMessage, Document
from langchain.embeddings import OpenAIEmbeddings
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

from langchain.prompts.chat import ChatPromptTemplate
from langchain_community.vectorstores import Chroma


import numpy as np

def retrieve_context(query, retriever):
    """Retrieves relevant context for a given query from your knowledge base or documents."""
    docs = retriever.invoke(query)
    return docs

def getGeneration(prompt_candidate: str, testCase: TestCase, model: ChatOpenAI, retriever):
    """Generates a response using the provided prompt and test case."""

    prompt_template = ChatPromptTemplate.from_template(
    """
        Use the following context to answer the question.
        If you don't know the answer, just say that you don't know, don't try to make up an answer.
        Context: {context}
        Question: {prompt_candidate}
        Scenario: {scenario}
        Answer:"""
    )

    # print(testCase.scenario)
    context = retrieve_context(testCase.scenario, retriever) # get the context documents 

    # Create RAG chain for current prompt
    # rag_chain = create_rag_chain_factory(prompt, vectorstore)
    chain = prompt_template | model | StrOutputParser()
    # Generate answer from RAG chain
    answer = chain.invoke({"context": context, "prompt_candidate": prompt_candidate, "scenario": testCase.scenario})

    return answer


In [244]:
candidates[0]
generated_test_cases[0].scenario

'A student is researching on machine learning and comes across the term few-shot learning for the first time.'

In [245]:
# Test getGeneration function
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)

answer_a = getGeneration(candidates[0], generated_test_cases[0], llm, retriever)

In [246]:
answer_a

'Few-shot learning is a unique approach in machine learning that involves learning from a limited number of examples, typically a small number of demonstrations consisting of both input and desired output on the target task. By providing the model with good examples, it can better understand human intention and criteria for the desired answers, leading to better performance compared to zero-shot learning. However, few-shot learning may require more token consumption and could hit the context length limit when dealing with long input and output text.'

##### Example: get answers for two prompt candidates in a given scenario

In [247]:
candidates[4]

'Describe the principles and applications of few-shot learning.'

In [248]:
answer_b = getGeneration(candidates[4], generated_test_cases[0], llm, retriever)

In [249]:
answer_b

'Few-shot learning involves training a model with only a few examples or demonstrations of the desired input and output for a specific task. By providing the model with good examples, it can better understand human intention and criteria for the desired answers. This approach often leads to better performance compared to zero-shot learning, where the model has no prior examples to learn from. However, few-shot learning may require more token consumption and could hit context length limits when dealing with long input and output text. Overall, few-shot learning is a valuable technique in machine learning for quickly adapting to new tasks with limited training data.'

#### Get embedding

In [250]:
# Embedding model example is OpenAIEmbeddings
def get_embedding(text, embedding_model):
    """Gets the embedding for a given text."""
    return embedding_model.embed_query(text)

###### Example to get embeddings A and B for prompt candidate's answers a and b

In [251]:
# Usage example to get embedding of the answer of the first prompt candidate
embedding_a = get_embedding(answer_a, embedding_model=embedding)
embedding_b = get_embedding(answer_b, embedding_model=embedding)

print(embedding_a)
print(embedding_b)

[-0.031100148625757905, -0.0017992906673401784, 0.039347991715055076, -0.0038317011561723575, 0.0033572530687696234, 0.0039269188785990075, 0.010598741570822138, 0.006218716094647809, -0.04843637765018528, -0.028237043212504035, 0.03131028294023229, 0.024730395756821545, -0.032833770224349024, -0.0184788463501601, -0.001621988267895558, 0.02561034099850966, 0.029707996452340884, -0.0009021078669957089, 0.022169362960822194, -0.02201176082798253, -0.029760530496620773, 0.013356777964193649, -0.030679876271518806, -0.008904518642795766, 0.011511520590185183, -0.0168896915106935, 0.020225603322466357, -0.021276284208064107, -0.002492083259865872, -0.01942445914719807, 0.013100674498329197, -0.001730339734222826, -0.0029205640585237046, -0.004409575410420474, -0.004573744298795123, -0.014184189161601878, 0.01023757001639791, -0.0007925251340056311, 0.009259123441685006, -0.021867292206212842, 0.013671982229872974, 0.040530007711352546, -0.007584601245924884, -0.0010375471416971325, 0.01904

#### Using cosine similarity

In [252]:
def cosine_similarity(a: np.ndarray, b: np.ndarray):
    """Calculates the cosine similarity between two vectors."""
    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))

###### Get the cosine similarity between embedding a and b

In [253]:
similarity = cosine_similarity(embedding_a, embedding_b)
print(similarity)

0.9869567061697153


## Get the scores and the winner between two prompts

### Use GPT to decide the best prompt between the two, otherwise use cosine similarity if no expected output is given in a specific scenario

In [257]:
ranking_prompt = """
        Your job is to rank the quality of two outputs generated by different prompts. The prompts are used to generate a response for a given task.

        You will be provided with the task description, the test prompt, and two generations - one for each system prompt.

        Rank the generations in order of quality. If Generation A is better, respond with 'A'. If Generation B is better, respond with 'B'.

        Remember, to be considered 'better', a generation must not just be good, it must be noticeably superior to the other.

        Also, keep in mind that you are a very harsh critic. Only rank a generation as better if it truly impresses you more than the other.

        Respond with your ranking, and nothing else. Be fair and unbiased in your judgement.
"""

In [None]:
task_description = "Explain the concept of few-shot learning."

In [288]:
from langchain_core.prompts import SystemMessagePromptTemplate, HumanMessagePromptTemplate
from langchain.prompts.chat import ChatPromptTemplate

system_message_prompt = SystemMessagePromptTemplate.from_template(ranking_prompt)

# LLM Judgment (if no expected output is provided)
human_message_prompt = HumanMessagePromptTemplate.from_template(
    """
    Task: {task_description}
    Prompt: {test_case_scenario}
    Generation A: {answer_a}
    Generation B: {answer_b}
    """
)

chat_prompt_template = ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt])

In [289]:
llm_gpt3 = ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo")

chain = chat_prompt_template | llm_gpt3 | StrOutputParser()

winner = chain.invoke({"task_description": task_description, "test_case_scenario": generated_test_cases[0].scenario, "answer_a": answer_a, "answer_b": answer_b})

In [290]:
winner

'B'

#### Get the winner between two prompts

In [291]:
def getWinner():
  return winner

In [292]:
def comparePromptsUsingLLM(task_description, test_case, answer_a, answer_b):
    winner = chain.invoke({
        "task_description": task_description, 
        "test_case_scenario": test_case.scenario, 
        "answer_a": answer_a, 
        "answer_b": answer_b
    })

    winner = getWinner()
    while winner not in ['A', 'B', 'DRAW']:
        print("Invalid input from the model. Please try generating again.")

        winner = getWinner()
    return 1 if winner == 'A' else 0 if winner == 'B' else 0.5 

In [293]:
def comparePromptsUsingCosineSimilarity(test_case, embedding_model):
    embedding_a = get_embedding(answer_a, embedding_model)
    embedding_b = get_embedding(answer_b, embedding_model)
    embedding_expected = get_embedding(test_case.expected_output, embedding_model)

    score_a = cosine_similarity(embedding_a, embedding_expected)
    score_b = cosine_similarity(embedding_b, embedding_expected)

    # Handle ties and near-ties
    if abs(score_a - score_b) < 0.1:  # Adjust threshold as needed
        return 0.5  # Draw

    return 1 if score_a > score_b else 0  # Return 1 if A wins, 0 if B wins

#### Get the score, 
##### this is 1 for prompt candidate A and 0 for B, else we get 0.5 for a draw

In [308]:
def get_score(test_case, prompt_a, prompt_b, model, embedding_model, retreiver):
    """
        Calculates the score for a prompt comparison using either using LLM or embedding similarity.
        Returns 1 if prompt A is better, 0 if prompt B is better, and 0.5 if they are equally good.
    """
    answer_a = getGeneration(prompt_a, test_case, model, retreiver)
    answer_b = getGeneration(prompt_b, test_case, model, retreiver)

    if test_case.expected_output.strip():
        print("USING LLM to compare prompts")
        return comparePromptsUsingLLM(task_description, test_case, answer_a, answer_b)

    else:  # Use embeddings to calculate similarity
        print("USING COSINE to compare prompts")
        return comparePromptsUsingCosineSimilarity(test_case, embedding_model)


##### Example on getting the score between two prompts

In [307]:
# Initialize Langchain models
model = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)
embedding_model = OpenAIEmbeddings()

# Sample Usage to Test get_score()
test_case = generated_test_cases[0]  # Choose the first test case
prompt_a = candidates[0]  # Choose the first prompt candidate
prompt_b = candidates[4]  # Choose the second prompt candidate

#Get scores
score = get_score(test_case, prompt_a, prompt_b, model, embedding_model, retriever)

print("Scores:", score)


USING LLM to compare prompts
Scores: 0


## 

## Prompt ranking

### Use Elo rating to rank the prompts

In [311]:
class Candidate(BaseModel):
    content: str
    rating: float
    sd: float
    id: int

candidates_playing = [
    Candidate(content=prompt, rating=1500, sd=350, id=i)
    for i, prompt in enumerate(candidates)
]

In [312]:
candidates_playing

[Candidate(content='Describe the unique approach in machine learning that involves learning from a limited number of examples.', rating=1500.0, sd=350.0, id=0),
 Candidate(content='Describe the principles and techniques involved in a unique approach to machine learning that aims to minimize the amount of training data required.', rating=1500.0, sd=350.0, id=1),
 Candidate(content='Please provide detailed explanations, definitions, and examples to help clarify the concept of few-shot learning.', rating=1500.0, sd=350.0, id=2),
 Candidate(content='Describe the unique approach of a learning method that involves acquiring knowledge from only a limited amount of data.', rating=1500.0, sd=350.0, id=3),
 Candidate(content='Describe the principles and applications of few-shot learning.', rating=1500.0, sd=350.0, id=4)]

In [318]:
import asyncio
from langchain.chat_models import ChatOpenAI
from langchain.embeddings import OpenAIEmbeddings
import numpy as np


# ##### ... (Your other functions like getGeneration, generate_prompt_candidates, etc. are defined) ...
# class Candidate(BaseModel):
#     content: str
#     rating: float
#     sd: float
#     id: int

# Initialize Langchain models
model = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)
embedding_model = OpenAIEmbeddings()

#### Your existing candidate prompts (formatted as dictionaries)
# candidates = [
#     Candidate(content=prompt, rating=1500, sd=350, id=i)
#     for i, prompt in enumerate(generated_prompt_candidates)
# ]

def calculate_expected_score(rating1, rating2):
    """Calculates the expected score of a player with rating1 against a player with rating2."""
    return 1 / (1 + 10 ** ((rating2 - rating1) / 400))

def update_elo(rating1, rating2, outcome, k_factor=32):
    """Updates the ELO ratings based on the outcome of a comparison."""

    expected1 = calculate_expected_score(rating1, rating2)
    expected2 = calculate_expected_score(rating2, rating1)

    new_rating1 = rating1 + k_factor * (outcome - expected1)
    new_rating2 = rating2 + k_factor * ((1-outcome) - expected2)

    return new_rating1, new_rating2


# Sample Usage to Test get_score() with ELO Update
test_case = generated_test_cases[0]  # Choose the first test case
prompt_a = candidates_playing[0].content  # Choose the first prompt candidate
prompt_b = candidates_playing[4].content  # Choose the second prompt candidate

# Get scores and update ELO ratings
score = get_score(test_case, prompt_a, prompt_b, model, embedding_model, retriever)
print(f"Score for '{prompt_a}' vs. '{prompt_b}': {score}")

# Update ELO ratings based on the score
candidates_playing[0].rating, candidates_playing[1].rating = update_elo(
    candidates_playing[0].rating, candidates_playing[1].rating, score
)


USING LLM to compare prompts
Score for 'Describe the unique approach in machine learning that involves learning from a limited number of examples.' vs. 'Describe the principles and applications of few-shot learning.': 0


In [322]:
def calculate_elo_rank(player_a_rank, player_b_rank, score, k=32):
  """
  Calculate the new Elo rank of a player based on the score of a game.
  
  player_a_rank: The current Elo rank of player A
  player_b_rank: The current Elo rank of player B
  score: The score of the game (1 for a win, 0.5 for a draw, 0 for a loss)
  k: The K-factor, which determines the maximum change in rank (default: 32)
  """
  # Calculate the expected score of player A
  expected_score_a = 1 / (1 + 10 ** ((player_b_rank - player_a_rank) / 400))
  
  # Update the Elo rank of player A
  new_rank_a = player_a_rank + k * (score - expected_score_a)
  
  return new_rank_a

# Initialize the Elo ranks of the prompts
elo_ranks = {prompt: 1000 for prompt in candidates}  # Start with a rank of 1000 for each prompt

# Calculate the new Elo ranks based on the scores
for i in range(len(candidates)):
  for j in range(i + 1, len(candidates)):
    # Get the score for the game between prompt i and prompt j
    score = get_score(test_case, candidates[i], candidates[j], model, embedding_model, retriever)
    
    # Calculate the new Elo ranks
    new_rank_i = calculate_elo_rank(elo_ranks[candidates[i]], elo_ranks[candidates[j]], score)
    new_rank_j = calculate_elo_rank(elo_ranks[candidates[j]], elo_ranks[candidates[i]], 1 - score)  # The score for prompt j is 1 - score because if prompt i wins, prompt j loses, and vice versa
    
    # Update the Elo ranks
    elo_ranks[candidates[i]] = new_rank_i
    elo_ranks[candidates[j]] = new_rank_j

print("Elo ranks:", elo_ranks)

USING LLM to compare prompts
USING LLM to compare prompts
USING LLM to compare prompts
USING LLM to compare prompts
USING LLM to compare prompts
USING LLM to compare prompts
USING LLM to compare prompts
USING LLM to compare prompts
USING LLM to compare prompts
USING LLM to compare prompts
Elo ranks: {'Describe the unique approach in machine learning that involves learning from a limited number of examples.': 940.2704738806459, 'Describe the principles and techniques involved in a unique approach to machine learning that aims to minimize the amount of training data required.': 969.9824283068954, 'Please provide detailed explanations, definitions, and examples to help clarify the concept of few-shot learning.': 999.8461918235291, 'Describe the unique approach of a learning method that involves acquiring knowledge from only a limited amount of data.': 1029.8642730003692, 'Describe the principles and applications of few-shot learning.': 1060.0366329885605}


In [321]:
from pprint import pprint
pprint(elo_ranks)

{'Describe the principles and applications of few-shot learning.': 1060.0366329885605,
 'Describe the principles and techniques involved in a unique approach to machine learning that aims to minimize the amount of training data required.': 969.9824283068954,
 'Describe the unique approach in machine learning that involves learning from a limited number of examples.': 940.2704738806459,
 'Describe the unique approach of a learning method that involves acquiring knowledge from only a limited amount of data.': 1029.8642730003692,
 'Please provide detailed explanations, definitions, and examples to help clarify the concept of few-shot learning.': 999.8461918235291}


In [325]:
# Get the highest rank
highest_rank = max(elo_ranks.values())

# Create a list of tuples, where each tuple is (prompt, rank, percentage score)
ranked_prompts = [(prompt, rank, rank / highest_rank * 100) for prompt, rank in elo_ranks.items()]

# Sort the list of tuples in descending order of rank
ranked_prompts.sort(key=lambda x: x[1], reverse=True)

# Display the ranked prompts along with the percentage score
for prompt, rank, percentage in ranked_prompts:
  print(f"Prompt: {prompt}\nRank: {rank}\nPercentage Score: {percentage}%\n")

Prompt: Describe the principles and applications of few-shot learning.
Rank: 1060.0366329885605
Percentage Score: 100.0%

Prompt: Describe the unique approach of a learning method that involves acquiring knowledge from only a limited amount of data.
Rank: 1029.8642730003692
Percentage Score: 97.15364931274813%

Prompt: Please provide detailed explanations, definitions, and examples to help clarify the concept of few-shot learning.
Rank: 999.8461918235291
Percentage Score: 94.32185272735938%

Prompt: Describe the principles and techniques involved in a unique approach to machine learning that aims to minimize the amount of training data required.
Rank: 969.9824283068954
Percentage Score: 91.50461390869337%

Prompt: Describe the unique approach in machine learning that involves learning from a limited number of examples.
Rank: 940.2704738806459
Percentage Score: 88.70169620739823%

