## Simple Reflection Architecture

In [1]:
from dotenv import load_dotenv
load_dotenv()

True

In [2]:
from langchain.output_parsers import PydanticOutputParser
from langchain.prompts import PromptTemplate, ChatPromptTemplate, MessagesPlaceholder, HumanMessagePromptTemplate
from langchain_core.prompts.chat import SystemMessage, _convert_to_message
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_core.messages import BaseMessage, HumanMessage, AIMessage
from langchain_core.output_parsers.openai_functions import JsonOutputFunctionsParser

from langchain_openai import ChatOpenAI
from langchain_anthropic import ChatAnthropic

from langgraph.graph import END, StateGraph, MessageGraph
from langgraph.checkpoint.sqlite import SqliteSaver

import functools
import operator
from typing import List, Sequence, TypedDict, Annotated
import json
import os
import random

from IPython.display import Image, display

import concurrent.futures

In [3]:
unique_id = "Authoritarian Optimisation"
os.environ["LANGCHAIN_PROJECT"] = f"Tracing Walkthrough - {unique_id}"

In [4]:
# from langsmith import Client

# client = Client()

In [10]:
llm = ChatOpenAI(temperature=1.0, model="gpt-4o")
# llm = ChatAnthropic(temperature=1.0, model="claude-3-haiku-20240307")

def self_reflection_graph(criteria) -> MessageGraph:
    """
    Constructs a graph for self-reflection and improvement of prompts.
    """

    system_message = f"""You are an experienced: AI Prompt Engineer. Your core principles are:
- Always write clear and concise prompts.
- Always write contextually relevant prompts.
- Always write task aligned prompts.
- Always write example demonstrations in prompts.
- Always format and structure prompts to be easily understood by the model."""

    def generation_node(state: Sequence[BaseMessage]):
        prompt_text = f"""Your task is to review the conversation above and improve the prompt in light of your core principles.
If you recieve feedback and recommendations for the prompt, respond with a revised version of your previous attempts actioning the feedback.

The success criteria for the prompt are as follows:
{criteria}
You will be penalized if the prompt does not meet this criteria.

Below are strict guidelines that you MUST follow if making changes to the prompt:
- DO NOT modify existing restrictions.
- DO NOT modify or remove negations.
- DO NOT add, modify or remove placeholders denoted by curly braces.
- ALWAYS treat placeholders as the actual content.
You will be penalized if you do not follow these guidelines.

Your update process should be as follows:
1. Review the conversation carefully as an expert AI Prompt Engineer.
2. Think carefully about how you can implement the user's feedback (if any) to improve the prompt.
3. Revise the prompt in light of your core principles and the feedback you have received (if any).
4. Submit your revised prompt.
"""
        messages = [
            ("system", system_message),
            MessagesPlaceholder(variable_name="messages"),
            ("system", prompt_text),
        ]
        prompt = ChatPromptTemplate.from_messages(messages)
        chain = prompt | llm
        return chain.invoke({"messages": state})
        
    def reflection_node(state: Sequence[BaseMessage]):
        prompt_text = f"""Your task is to review the conversation above and think outside the box to provide feedback on the prompt.
Offer creative recommendations on how to improve it in light of your core principles.

The success criteria for the updated prompt are as follows:
{criteria}
Use this information to inform your feedback.

Below are strict guidelines that MUST be followed if making changes to the prompt:
- DO NOT modify existing restrictions.
- DO NOT modify or remove negations.
- DO NOT add, modify or remove placeholders denoted by curly braces.
- ALWAYS treat placeholders as the actual content.
Check that that the prompt adheres to these guidelines.

Your reviewal process should be as follows:
1. Read the conversation carefully as an expert AI Prompt Engineer.
2. Explcitly go through each success criteria and ensure the prompt meets them. If not, mention the criteria that was not met in your feedback.
3. Explicitly go through each guideline and ensure the changes adhere to them. If not, mention the guideline that was not followed in your feedback.
4. Explicitly list the creative recommendations you have for improving the most critical aspects of the prompt.
5. Submit your feedback.
"""
        messages = [
            ("system", system_message),
            MessagesPlaceholder(variable_name="messages"),
            ("system", prompt_text),
        ]
        prompt = ChatPromptTemplate.from_messages(messages)
        chain = prompt | llm
        result = chain.invoke({"messages": state})
        return HumanMessage(content=result.content)

    builder = MessageGraph()
    builder.add_node("generate", generation_node)
    builder.add_node("reflect", reflection_node)
    builder.set_entry_point("generate")

    def approval(state: Sequence[BaseMessage], criteria: str):
        """
        Agent to approve or reject the prompt.
        """
        function_def = {
        "name": "approval",
        "description": "Submit approval decision for the prompt.",
        "parameters": {
            "type": "object",
            "properties": {
                "decision": {"type": "string", "enum": ["True", "False"]},
            },
            "required": ["decision"],
        },
        }
        prompt_text = f"""Your task is to review the conversation above and decide if the prompt is optimal in light of your core principles and the success criteria.

The success criteria for the prompt are as follows:
{criteria}
You must use this information to inform your decision.

If you think the prompt sufficiently meets the success criteria, return True. 
If you think the prompt needs improvements in light of your core principles to better meet the success criteria, return False.

Your reviewal process should be as follows:
1. Read the prompt carefully as an expert AI Prompt Engineer.
2. Determine whether the prompt needs improvements or meets the success criteria.
3. Submit your decision.
"""
        messages = [
            ("system", system_message),
            MessagesPlaceholder(variable_name="messages"),
            ("system", prompt_text),
        ]

        prompt = ChatPromptTemplate.from_messages(messages)

        chain = (
            prompt
            | llm.bind_functions(functions=[function_def], function_call="approval")
            | JsonOutputFunctionsParser()
        )
        result = chain.invoke({"messages": state})
        return result

    def should_continue(state: List[BaseMessage]):
        approval_result = approval(state, criteria)
        print(approval_result)
        if approval_result["decision"] == "True" or len(state) > 12:
            return END
        return "reflect"

    builder.add_conditional_edges("generate", should_continue)
    builder.add_edge("reflect", "generate")

    memory = SqliteSaver.from_conn_string(":memory:")
    graph = builder.compile(checkpointer=memory)

    return graph

def update_prompt(base_prompt: str, criteria: str) -> str:
    """
    Uses self_reflection_graph to iteratively act on feedback and update prompt
    """
    graph = self_reflection_graph(criteria)
    input = HumanMessage(content=base_prompt, name="User")
    n = random.randint(0, 1000)
    config = {
        "configurable": {"thread_id": n},
        "recursion_limit": 50,
        }    

    # Run the graph
    for s in graph.stream(
        input,
        config,
        stream_mode="values",
        ):
        if "__end__" not in s:
            if len(s) > 1:
                s[-1].pretty_print()
            continue
    return s

In [11]:
base_prompt = "{content}\nPlease output your answer at the end as ##<your answer (among A through C)>."
criteria = """- The prompt MUST instruct the LLM to solve the object tracking problem.
- The prompt MUST include the content placeholder (this is where the object tracking problem will be).
- The prompt MUST instruct the LLM to provide the answer at the end of the output exactly as ##<answer (among A through C)>.
- The prompt MUST instruct the LLM to provide the answer with no spaces between the ## and the answer."""

result = update_prompt(base_prompt, criteria)

{'decision': 'True'}

Here is the improved prompt following the core principles and considering the guidelines provided:

"Please review the following object tracking problem presented in {content}. Analyze the information and determine the correct solution to the problem. Output your answer at the end in the format: ##<answer (among A through C)>, ensuring there are no spaces between the ## and the answer."

This version ensures clarity, concise instruction, and adheres to all the specified requirements.


In [12]:
result[-1].pretty_print()


Here is the improved prompt following the core principles and considering the guidelines provided:

"Please review the following object tracking problem presented in {content}. Analyze the information and determine the correct solution to the problem. Output your answer at the end in the format: ##<answer (among A through C)>, ensuring there are no spaces between the ## and the answer."

This version ensures clarity, concise instruction, and adheres to all the specified requirements.
