In [1]:
%pip install -qU langchain langchain-openai langsmith python-dotenv pydantic

Note: you may need to restart the kernel to use updated packages.


In [2]:
from dotenv import load_dotenv
load_dotenv()

from langsmith import Client
client = Client()
from langchain.schema import SystemMessage, HumanMessage, BaseMessage, AIMessage


In [3]:
example_system_prompt = """You are a helpful assistant. 

Rules:
- You are only allowed to talk about coding
- <HERE>
- Try to be concise"""

example_instruction = "The agent should only respond in English."

examples = [[AIMessage(content="Hola, como estas?")], "Hi there!"]

In [4]:
from typing import List, Union
import random
from langsmith.schemas import Run, Example
from langsmith import evaluate
from langchain.chat_models import init_chat_model
from schemas import Rule, Rules, CorrectnessEvaluationResult


class Promptnado:
    def __init__(self, system_prompt: str, instruction: str, examples: List[Union[str, List[BaseMessage]]],
                 rule_token="<HERE>", max_attempts=10,
                 rule_gen_model=init_chat_model(
                     "gpt-4o-mini", temperature=0.7),
                 eval_model=init_chat_model("gpt-4o-mini", temperature=0.7),
                 prediction_model=init_chat_model("gpt-4o-mini", temperature=0.7)):
        
        # rule_token is not in the prompt throw
        if rule_token not in system_prompt:
            raise ValueError(f"Rule token {rule_token} not found in system prompt")
        
        self.system_prompt = system_prompt
        self.instruction = instruction
        self.examples = examples
        self.rule_token = rule_token

        # Create random dataset name
        self.dataset_name = f"Promptnado_{random.randint(0, 1000000)}"

        self.attempts = 1
        self.solved = False
        self.current_rule = None
        self.current_prompt = None
        self.successful_prompt = None
        self.rule_gen_model = rule_gen_model
        self.eval_model = eval_model
        self.prediction_model = prediction_model
        self.max_attempts = max_attempts


    def _create_dataset(self):
        """Create a dataset with a unique name"""
        dataset = client.create_dataset(self.dataset_name, description=self.instruction)
        for example in self.examples:
            if isinstance(example, str):
                client.create_example(inputs={"inputs": example}, dataset_id=dataset.id)
            elif isinstance(example, list) and all(isinstance(msg, BaseMessage) for msg in example):
                client.create_example(inputs={"inputs": example}, dataset_id=dataset.id)
            else:
                raise ValueError("Invalid example format. Must be a string or a list of BaseMessages.")

        self.dataset = dataset
        print(f"Created dataset: {self.dataset_name} with {len(self.examples)} examples")
        return dataset

    def _generate_rules(self):
        """Use an LLM to generate a list of rules"""

        system_prompt = f"""You are an expert LLM Prompt Engineer. Your job is to try to solve for the provided <Instructions> \
by making adjustments to the <Original Prompt>. You should attempt to make 5 suggestions for prompts that might work. Each suggestion you \
make will be interpolated into the prompt where {self.rule_token} is, and then evaluated for correctness against a dataset of \
examples.

<Instructions>
{self.instruction}
</Instructions>

<Original Prompt>
{self.system_prompt}
</Original Prompt>
"""

        structured_llm = self.rule_gen_model.with_structured_output(Rules)

        rules: Rules = structured_llm.invoke(system_prompt)

        self.rules = rules.rules
        print(f"Generated {len(self.rules)} rules\n")
        print(self.rules)
        return self.rules

    def _build_prompt(self, rule: Rule):
        """Interpolate the rules into the system prompt"""
        interpolated_prompt = self.system_prompt.replace(
            self.rule_token, rule.prompt)

        print(f"Interpolated prompt:\n\n{interpolated_prompt}")
        return interpolated_prompt

    def _evaluate_correctness(self, run: Run, example: Example):
        """Eval function to use an LLM to validate that the instruction was followed"""
        system_prompt = f"""Your job is to validate whether the <Result> meets the criteria for <Instruction>. Try to be a harsh judge.

<Instruction>
{self.instruction}
</Instruction>

<Result>
{run.outputs["output"]}
</Result>
"""

        structured_llm = self.eval_model.with_structured_output(
            CorrectnessEvaluationResult)

        result: CorrectnessEvaluationResult = structured_llm.invoke(
            system_prompt)

        return {"score": 1 if result.correct else 0, "key": "correctness", "comment": result.reasoning}


    def _predict(self, inputs: dict):
        """Run current prompt against example in the dataset"""
        try:
            if isinstance(inputs["inputs"], str):
                messages = [
                    SystemMessage(content=self.current_prompt),
                    HumanMessage(content=inputs["inputs"]),
                ]
                
            elif isinstance(inputs["inputs"], list):
                messages = [SystemMessage(content=self.current_prompt)]
                for msg_dict in inputs["inputs"]:
                    if isinstance(msg_dict, dict):
                        if msg_dict["type"] == "human":
                            messages.append(HumanMessage(**msg_dict))
                        elif msg_dict["type"] == "ai":
                            messages.append(AIMessage(**msg_dict))
                        else:
                            messages.append(BaseMessage(**msg_dict))
                    elif isinstance(msg_dict, BaseMessage):
                        messages.append(msg_dict)
                    else:
                        raise ValueError(f"Invalid message format: {msg_dict}")
            else:
                raise ValueError("Invalid input format")


            # Invoke the model
            response = self.prediction_model.invoke(messages)
            if response.tool_calls:
                return {"output": f"Tool Calls:\n{response.tool_calls}"}

            return {"output": response.content}

        except Exception as e:
            print(f"Error predicting: {e}")
            raise e


    def _is_solved(self, eval_results):
        """Validate the results"""

        results = eval_results._results

        # If any of the result scores are not a 1, return false
        if len(results) == 0:
            raise Exception("No results found")

        for result in results:
            score = result['evaluation_results']["results"][0].score
            if score != 1:
                return False

        return True

    def _test_rule(self, rule: Rule):
        """Evaluate a given rule"""
        print(f'\nTesting rule: "{rule.prompt}"')
        self.current_rule = rule

        self.current_prompt = self._build_prompt(self.current_rule)

        results = evaluate(
            self._predict,
            data=self.dataset_name,
            evaluators=[self._evaluate_correctness],
            experiment_prefix=f"Attempt-{self.attempts}",
        )

        self.attempts += 1

        return results

    def run(self):
        """Run the promptnado"""
        print(f"Running Promptnado with instruction: {self.instruction}")
        self._create_dataset()

        while not self.solved and self.attempts < self.max_attempts:
            try:
                self._generate_rules()
                for rule in self.rules:
                    if self.attempts > self.max_attempts:
                        print("Max attempts reached")
                        return
                    results = self._test_rule(rule)
                    if self._is_solved(results):
                        self.results = results
                        self.solved = True
                        self.successful_prompt = self.current_prompt
                        break
            except Exception as e:
                print(f"Fatal error encountered: {e}")
                return  # Exit the while loop on error

        print("\n\nSolved!! Current prompt can be found at `self.successful_prompt`\n\n")

        print(
            f"Successful prompt:\n====================\n{self.current_prompt}\n=================")
        print(self.results._manager._experiment.url)

  warn_beta(


In [5]:
pn = Promptnado(example_system_prompt, example_instruction,
                examples, max_attempts=2)
pn.run()

Running Promptnado with instruction: The agent should only respond in English.
Created dataset: Promptnado_36011 with 2 examples
Generated 5 rules

[Rule(reasoning='This prompt explicitly states that the assistant should respond solely in English, which aligns with the instruction provided.', prompt='You should respond only in English.'), Rule(reasoning='By phrasing the rule as a directive, it reinforces the requirement that all communication must occur in English.', prompt='All responses must be in English only.'), Rule(reasoning='This version emphasizes clarity and understanding, ensuring that the assistant knows to use English for all interactions.', prompt='Communicate exclusively in English for all responses.'), Rule(reasoning='This version of the prompt directly instructs the assistant to use English as the sole language for replies, making the requirement very clear.', prompt='Respond in English and avoid using any other languages.'), Rule(reasoning='This prompt explicitly restr

  from .autonotebook import tqdm as notebook_tqdm


View the evaluation results for experiment: 'Attempt-1-4be5a837' at:
https://smith.langchain.com/o/d967989d-4221-53db-b0a5-665b504acba2/datasets/0365f92d-e4fb-4921-83b6-210b3266c5e3/compare?selectedSessions=0b33ebdb-6f40-4e0b-b184-cabd893fb550




0it [00:00, ?it/s]

Messages: [SystemMessage(content='You are a helpful assistant. \n\nRules:\n- You are only allowed to talk about coding\n- You should respond only in English.\n- Try to be concise'), HumanMessage(content='Hi there!')]
Messages: [SystemMessage(content='You are a helpful assistant. \n\nRules:\n- You are only allowed to talk about coding\n- You should respond only in English.\n- Try to be concise'), AIMessage(content='Hola, como estas?')]


2it [00:02,  1.10s/it]



Solved!! Current prompt can be found at `self.successful_prompt`


Successful prompt:
You are a helpful assistant. 

Rules:
- You are only allowed to talk about coding
- You should respond only in English.
- Try to be concise
https://smith.langchain.com/o/d967989d-4221-53db-b0a5-665b504acba2/projects/p/0b33ebdb-6f40-4e0b-b184-cabd893fb550





In [6]:
# Try running with tools
from pydantic.v1 import BaseModel, Field


class RespondToUser(BaseModel):
    """A tool to use"""
    response: str = Field(description="Your response to the user")


llm = init_chat_model(model="gpt-4o-mini")

llm_with_tools = llm.bind_tools([RespondToUser])


pn = Promptnado(example_system_prompt, example_instruction,
                examples, max_attempts=2)
# pn.run()