In [1]:
%pip install -qU langchain langchain-openai langchain-anthropic langsmith python-dotenv pydantic

Note: you may need to restart the kernel to use updated packages.


In [2]:
from dotenv import load_dotenv
load_dotenv()

from langsmith import Client

client = Client()


In [3]:
example_system_prompt = """You are a helpful assistant

Rules:
- You are only allowed to talk about coding
- <HERE>
- Try to be concise"""

example_instruction = "The agent should only respond in English. No other languages"

examples = ["Hola, como estas?", "Hi there!"]

In [9]:
from typing import List
import random
from langsmith.schemas import Run, Example
from langsmith import evaluate
from langchain_core.messages import SystemMessage, HumanMessage, AIMessage
from pydantic.v1 import BaseModel, Field
from langchain.chat_models import init_chat_model

# Schemas


class Rule(BaseModel):
    """A single rule for the prompt"""
    reasoning: str = Field(
        ..., description="The thought process and direction for why we think this is a good solution to the instruction.")
    prompt: str = Field(..., description="A single prompt rule that we can try to solve for the instruction. \
This prompt rule will be interpolated into the system prompt over the <HERE> token.")


class Rules(BaseModel):
    """Set of prompt rules that should be tried"""
    rules: List[Rule] = Field(
        ..., description="A list of prompt rules that we can try to solve for the instruction.")


class CorrectnessEvaluationResult(BaseModel):
    """Result of an evaluation of correctness"""
    reasoning: str = Field(
        ..., description="The thought process behind why you think the answer is correct or incorrect.")
    correct: bool = Field(..., description="Correctness score")


class Promptnado:
    def __init__(self, system_prompt: str, instruction: str, examples: List[str], rule_token="<HERE>"):
        self.system_prompt = system_prompt
        self.instruction = instruction
        self.examples = examples
        self.rule_token = rule_token

        # Create random dataset name
        self.dataset_name = f"Promptnado_{random.randint(0, 1000000)}"

        self.attempts = 1
        self.solved = False
        self.current_rule = None
        self.current_prompt = None

    def _create_dataset(self):
        """Create a dataset with a unique name"""
        dataset = client.create_dataset(
            self.dataset_name, description=self.instruction)
        for example in self.examples:
            client.create_example(
                inputs={"input": example}, dataset_id=dataset.id)

        self.dataset = dataset
        print(
            f"Created dataset: {self.dataset_name} with {len(self.examples)} examples")
        return dataset

    def _generate_rules(self):
        """Use an LLM to generate a list of rules"""

        system_prompt = f"""You are an expert LLM Prompt Engineer. Your job is to try to solve for the provided <Instructions> \
by making adjustments to the <Original Prompt>. You should attempt to make 5 suggestions for prompts that might work. Each suggestion you \
make will be interpolated into the prompt where {self.rule_token} is, and then evaluated for correctness against a dataset of \
examples.

<Instructions>
{self.instruction}
</Instructions>

<Original Prompt>
{self.system_prompt}
</Original Prompt>
"""

        structured_llm = init_chat_model(
            model="gpt-4o", temperature=0.7).with_structured_output(Rules)

        rules: Rules = structured_llm.invoke(system_prompt)

        self.rules = rules.rules
        print(f"Generated {len(self.rules)} rules\n")
        print(self.rules)
        return self.rules

    def _build_prompt(self, rule: Rule):
        """Interpolate the rules into the system prompt"""
        interpolated_prompt = self.system_prompt.replace(
            self.rule_token, rule.prompt)

        print(f"Interpolated prompt:\n\n{interpolated_prompt}")
        return interpolated_prompt

    def _evaluate_correctness(self, run: Run, example: Example):
        """Eval function to use an LLM to validate that the instruction was followed"""
        system_prompt = f"""Your job is to validate whether the <Result> meets the criteria for <Instruction>. Try to be a harsh judge.

<Instruction>
{self.instruction}
</Instruction>

<Result>
{run.outputs["output"]}
</Result>
"""

        structured_llm = init_chat_model(
            model="gpt-4o", temperature=0).with_structured_output(CorrectnessEvaluationResult)

        result: CorrectnessEvaluationResult = structured_llm.invoke(
            system_prompt)

        return {"score": 1 if result.correct else 0, "key": "correctness", "comment": result.reasoning}

    def _predict(self, inputs: dict):
        """Run current prompt against example in the dataset"""

        messages = [
            SystemMessage(content=self.current_prompt),
            HumanMessage(content=inputs["input"]),
        ]

        # Invoke the model
        llm = init_chat_model(model="gpt-4o", temperature=0.7)
        response = llm.invoke(messages)

        return {"output": response.content}

    def _test_rule(self, rule: Rule):
        """Evaluate a given rule"""
        print(f"\nTesting rule: {rule.prompt}")
        self.current_rule = rule

        self.current_prompt = self._build_prompt(self.current_rule)

        results = evaluate(
            self._predict,
            data=self.dataset_name,
            evaluators=[self._evaluate_correctness],
            experiment_prefix=f"Attempt-{self.attempts}",
        )

        self.attempts += 1

        return results

    def run(self):
        """Run the promptnado"""
        print(f"Running Promptnado with instruction: {self.instruction}")
        # Create the dataset
        self._create_dataset()

        while self.solved == False:
            # Get a list of rules
            self._generate_rules()

            # For each rule
            for rule in self.rules:
                results = self._test_rule(rule)

                # Add results validation here

                self.results = results
                self.solved = True
                break

        print("\n\nSolved!! Current prompt can be found at `self.current_prompt\n\n")

        print(f"Current prompt:\n\n{self.current_prompt}")

In [10]:
pn = Promptnado(example_system_prompt, example_instruction, examples)
pn.run()

Running Promptnado with instruction: The agent should only respond in English. No other languages
Created dataset: Promptnado_863457 with 2 examples
Generated 5 rules

[Rule(reasoning='This rule explicitly instructs the LLM to respond only in English, ensuring that no other languages are used.', prompt='You must only respond in English. No other languages are allowed.'), Rule(reasoning='By specifying the language explicitly, it reinforces the instruction to not deviate from English in responses.', prompt='All your responses must be in English only.'), Rule(reasoning='A clear directive that emphasizes the exclusivity of using English in all responses.', prompt='You are required to communicate exclusively in English.'), Rule(reasoning='This rule adds a constraint to use only English, making it clear that responses in other languages are not acceptable.', prompt='Respond only in English. Use of other languages is prohibited.'), Rule(reasoning='A straightforward rule that makes it clear th

2it [00:03,  1.64s/it]



Solved!! Current prompt can be found at `self.current_prompt


Current prompt:

You are a helpful assistant

Rules:
- You are only allowed to talk about coding
- You must only respond in English. No other languages are allowed.
- Try to be concise





In [13]:
pn.results._results

[{'run': RunTree(id=UUID('bcc2dcf8-ab91-41a2-952c-6f406ca5ce95'), name='Target', start_time=datetime.datetime(2024, 7, 17, 21, 48, 34, 437668, tzinfo=datetime.timezone.utc), run_type='chain', end_time=datetime.datetime(2024, 7, 17, 21, 48, 35, 457420, tzinfo=datetime.timezone.utc), extra={'metadata': {'revision_id': 'b3f9496-dirty', 'num_repetitions': 1, 'example_version': '2024-07-17T21:48:29.706692+00:00', 'ls_method': 'traceable'}, 'runtime': {'sdk': 'langsmith-py', 'sdk_version': '0.1.89', 'library': 'langsmith', 'platform': 'macOS-12.5.1-arm64-arm-64bit', 'runtime': 'python', 'py_implementation': 'CPython', 'runtime_version': '3.10.1', 'langchain_version': '0.2.9', 'langchain_core_version': '0.2.20', 'thread_count': 21.0, 'mem': {'rss': 145113088.0}, 'cpu': {'time': {'sys': 0.917068864, 'user': 4.751277056}, 'ctx_switches': {'voluntary': 37312.0, 'involuntary': 0.0}, 'percent': 0.0}}}, error=None, serialized={'name': 'Target', 'signature': '(inputs: dict)', 'doc': 'Run current pro