In [None]:
from promptolution.llms import APILLM
from sentence_transformers import SentenceTransformer
from sentence_transformers.models import Normalize
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
token = open("../deepinfratoken.txt", "r").read()
meta_llm = APILLM("meta-llama/Meta-Llama-3-8B-Instruct", token)

In [3]:
similarity_model = SentenceTransformer(
    "NovaSearch/stella_en_400M_v5", trust_remote_code=True, device="cuda"
)

A matching Triton is not available, some optimizations will not be enabled
Traceback (most recent call last):
  File "c:\Users\tzehl\anaconda3\envs\ds\Lib\site-packages\xformers\__init__.py", line 57, in _is_triton_available
    import triton  # noqa
    ^^^^^^^^^^^^^
ModuleNotFoundError: No module named 'triton'
Some weights of the model checkpoint at NovaSearch/stella_en_400M_v5 were not used when initializing NewModel: ['new.pooler.dense.bias', 'new.pooler.dense.weight']
- This IS expected if you are initializing NewModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing NewModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [4]:
similarity_model = similarity_model.append(Normalize())

In [103]:
def get_parents_sim(parents, offspring):
    fathers = [p[0] for p in parents]
    mothers = [p[1] for p in parents]

    offspring_embs = similarity_model.encode(offspring)
    father_embs = similarity_model.encode(fathers)
    mother_embs = similarity_model.encode(mothers)

    father_sims = offspring_embs @ father_embs.T
    father_sims = np.trace(father_sims) / father_sims.shape[0]
    mother_sims = offspring_embs @ mother_embs.T
    mother_sims = np.trace(mother_sims) / mother_sims.shape[0]


    return father_sims, mother_sims

# Sample Prompts

In [9]:
base_prompts = [
    "Your task is to identify the primary topic of the news artical and choose from World, Sports, Business and Tech.",
    "Your job is to determine whether a news article belongs to the World, Sports, Business, or Tech category based on its primary theme.",
    "You will be given a news article and asked to classify it as World, Sports, Business and Tech, depending on its main topic.",
    "Your responsibility is to assign a news article to one of four categories: World, Sports, Business, or Tech, based on its main idea.",
    "Categorize the news article into one of four categories: World, Sports, Business, or Tech, based on its content.",
]

# EvoPrompt GA

=> remove step 2 (only perform crossover)

In [10]:
meta_prompt = """Please follow the instruction step-by-step to generate a better prompt.
1. Crossover the following prompts and generate a new prompt:
Prompt 1: Rewrite the input text into simpler text.
Prompt 2: Rewrite my complex sentence in simpler terms, but keep the meaning.
2. Generate a final prompt bracketed with <prompt> and </prompt>.

1. Crossover Prompt: Rewrite the complex text into simpler text while keeping its meaning.
2. <prompt>Transform the provided text into simpler language, maintaining its essence.</prompt>

Please follow the instruction step-by-step to generate a better prompt.
1. Crossover the following prompts and generate a new prompt:
Prompt 1: <prompt1>
Prompt 2: <prompt2>
2. Generate a final prompt bracketed with <prompt> and </prompt>.

1."""

In [12]:
offspring = []
parents = []

for _ in range(2):
    for prompt_1 in base_prompts:
        for prompt_2 in base_prompts:
            mp = meta_prompt.replace("<prompt1>", prompt_1).replace("<prompt2>", prompt_2)
            new_prompt = meta_llm.get_response([mp])[0]
            new_prompt = new_prompt.split("<prompt>")[-1].split("</prompt>")[0]
            offspring.append(new_prompt)
            parents.append((prompt_1, prompt_2))

In [None]:
get_parents_sim(parents, offspring)

(0.9564170074462891, 0.9585012054443359)

# Evoprompt without 1 shot example

In [28]:
meta_prompt = """Please follow the instruction step-by-step to generate a better prompt.
1. Crossover the following prompts and generate a new prompt:
Prompt 1: <prompt1>
Prompt 2: <prompt2>
2. Generate a final prompt bracketed with <prompt> and </prompt>.

1."""

In [91]:
offspring = []
parents = []

for _ in range(2):
    for prompt_1 in base_prompts:
        for prompt_2 in base_prompts:
            mp = meta_prompt.replace("<prompt1>", prompt_1).replace("<prompt2>", prompt_2)
            new_prompt = meta_llm.get_response([mp])[0]
            new_prompt = new_prompt.split("<prompt>")[-1].split("</prompt>")[0]
            offspring.append(new_prompt)
            parents.append((prompt_1, prompt_2))

In [None]:
get_parents_sim(parents, offspring)

(0.946613540649414, 0.9466203308105469)

# Basic really random prompt

In [34]:
meta_prompt = """Combine the following prompts to create a new prompt:
Prompt 1: <prompt1>
Prompt 2: <prompt2>

Return the new prompt in the following format:
<prompt>new prompt</prompt>"""

In [None]:
offspring = []
parents = []

for _ in range(2):
    for prompt_1 in base_prompts:
        for prompt_2 in base_prompts:
            mp = meta_prompt.replace("<prompt1>", prompt_1).replace("<prompt2>", prompt_2)
            new_prompt = meta_llm.get_response([mp])[0]
            new_prompt = new_prompt.split("<prompt>")[-1].split("</prompt>")[0]
            offspring.append(new_prompt)
            parents.append((prompt_1, prompt_2))

get_parents_sim(parents, offspring)

(0.954144287109375, 0.9492565155029297)

In [96]:
parents[1]

('Your task is to identify the primary topic of the news artical and choose from World, Sports, Business and Tech.',
 'Your job is to determine whether a news article belongs to the World, Sports, Business, or Tech category based on its primary theme.')

In [97]:
offspring[1]

'Read a news article and categorize it as World, Sports, Business, or Tech by identifying the primary topic.'

# Optimal Prompt

In [105]:
meta_prompt = """Merge the following two sentences into a single coherent sentence. Maintain the key linguistic features from both original sentences:
Prompt 1: <prompt1>
Prompt 2: <prompt2>

Return the new instruction in the following format:
<prompt>new instruction</prompt>"""

In [106]:
offspring = []
parents = []

for _ in range(2):
    for prompt_1 in base_prompts:
        for prompt_2 in base_prompts:
            mp = meta_prompt.replace("<prompt1>", prompt_1).replace("<prompt2>", prompt_2)
            new_prompt = meta_llm.get_response([mp])[0]
            new_prompt = new_prompt.split("<prompt>")[-1].split("</prompt>")[0]
            offspring.append(new_prompt)
            parents.append((prompt_1, prompt_2))



In [107]:
get_parents_sim(parents, offspring)

(0.962232666015625, 0.9581562042236328)

In [100]:
print(offspring)

['Identify the primary topic of the news article and categorize it into one of the following categories: World, Sports, Business, or Tech.', 'Determine the Category: Identify the primary topic of the news article and categorize it as either World, Sports, Business, or Tech based on whether the article primarily focuses on global issues, athletic competitions, financial matters, or cutting-edge technology.', 'Identify the primary topic of the given news article and classify it as World, Sports, Business, or Tech, depending on its main theme.', 'Assign News Articles to Categories: Identify the primary topic of the news article and categorize it as World, Sports, Business, or Tech based on its main idea.', 'Identify and Categorize a News Article', 'Identify the primary topic of the news article and determine which category it best fits: World, Sports, Business, or Tech.', "Read and identify the primary theme of a news article and categorize it as either World, Sports, Business, or Tech, d