## Simple Reflection Architecture

In [1]:
from dotenv import load_dotenv
load_dotenv()

True

In [2]:
from langchain.output_parsers import PydanticOutputParser
from langchain.prompts import PromptTemplate, ChatPromptTemplate, MessagesPlaceholder, HumanMessagePromptTemplate
from langchain_core.prompts.chat import SystemMessage, _convert_to_message
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_core.messages import BaseMessage, HumanMessage, AIMessage
from langchain_core.output_parsers.openai_functions import JsonOutputFunctionsParser

from langchain_openai import ChatOpenAI
from langchain_anthropic import ChatAnthropic

from langgraph.graph import END, StateGraph, MessageGraph
from langgraph.checkpoint.sqlite import SqliteSaver

import functools
import operator
from typing import List, Sequence, TypedDict, Annotated
import json
import os
import random

from IPython.display import Image, display

import concurrent.futures

In [3]:
unique_id = "Reflection Optimisation"
os.environ["LANGCHAIN_PROJECT"] = f"Tracing Walkthrough - {unique_id}"

In [4]:
# from langsmith import Client

# client = Client()

In [10]:
llm = ChatOpenAI(temperature=0.0, model="gpt-4o")
# llm = ChatAnthropic(temperature=1.0, model="claude-3-haiku-20240307")

def self_reflection_graph(criteria) -> MessageGraph:
    """
    Constructs a graph for self-reflection and improvement of prompts.
    """

    system_message = f"""You are an experienced: AI Prompt Engineer. Your core principles are:
- Always write clear and concise prompts.
- Always write contextually relevant prompts.
- Always write task aligned prompts.
- Always write example demonstrations in prompts.
- Always format and structure prompts to be easily understood by the model."""

    def generation_node(state: Sequence[BaseMessage]):
        prompt_text = f"""Your task is to improve the prompt in the conversation above in light of your core principles.
If you recieve feedback and recommendations for the prompt, respond with a revised version of your previous attempts actioning the feedback.
Always think outside the box and consider unconventional ideas on how to implement the feedback.

The success criteria for the prompt are as follows:
{criteria}
You will be penalized if the prompt does not meet this criteria.

Below are strict guidelines that you MUST follow if making changes to the prompt:
- DO NOT modify existing restrictions.
- DO NOT modify or remove negations.
- DO NOT add, modify or remove placeholders denoted by curly braces.
- ALWAYS treat placeholders as the actual content.
You will be penalized if you do not follow these guidelines.

Your update process should be as follows:
1. Review the conversation carefully as an experienced AI Prompt Engineer.
2. Think carefully about how you can implement the most recent feedback and revise the prompt.
3. Explcitly go through each success criteria and ensure the prompt meets them. If not, revise the prompt to make sure it does.
4. Explicitly go through each guideline and ensure the changes adhere to them. If not, revise the prompt to make sure it does.
5. Submit your revised prompt."""
        messages = [
            ("system", system_message),
            MessagesPlaceholder(variable_name="messages"),
            ("system", prompt_text),
        ]
        prompt = ChatPromptTemplate.from_messages(messages)
        chain = prompt | llm
        return chain.invoke({"messages": state})
        
    def reflection_node(state: Sequence[BaseMessage]):
        prompt_text = f"""Your task is to provide feedback on the prompt in the conversation above in light of your core princples.
Always think outside the box and consider unconventional ideas on how to enforce your core principles in the prompt.

The success criteria for the updated prompt are as follows:
{criteria}
You must use this information to inform your feedback.

Your reviewal process should be as follows:
1. Read the conversation carefully as an experienced: AI Prompt Engineer.
2. Explain how you think the prompt can improved in light of your core principles.
3. Submit your feedback."""
        messages = [
            ("system", system_message),
            MessagesPlaceholder(variable_name="messages"),
            ("system", prompt_text),
        ]
        prompt = ChatPromptTemplate.from_messages(messages)
        chain = prompt | llm
        result = chain.invoke({"messages": state})
        return HumanMessage(content=result.content)

    builder = MessageGraph()
    builder.add_node("generate", generation_node)
    builder.add_node("reflect", reflection_node)
    builder.set_entry_point("generate")

    def approval(state: Sequence[BaseMessage], criteria: str):
        """
        Agent to approve or reject the prompt.
        """
        function_def = {
        "name": "approval",
        "description": "Submit approval decision for the prompt.",
        "parameters": {
            "type": "object",
            "properties": {
                "decision": {"type": "string", "enum": ["True", "False"]},
            },
            "required": ["decision"],
        },
        }
        prompt_text = f"""Your task is to review the conversation above and decide if the prompt is optimal in light of your core principles and the success criteria.

The success criteria for the prompt are as follows:
{criteria}
You must use this information to inform your decision.

If you think the prompt sufficiently meets the success criteria, return True. 
If you think the prompt needs improvements in light of your core principles to better meet the success criteria, return False.

Your reviewal process should be as follows:
1. Read the prompt carefully as an expert AI Prompt Engineer.
2. Determine whether the prompt needs improvements or meets the success criteria.
3. Submit your decision.
"""
        messages = [
            ("system", system_message),
            MessagesPlaceholder(variable_name="messages"),
            ("system", prompt_text),
        ]

        prompt = ChatPromptTemplate.from_messages(messages)

        chain = (
            prompt
            | llm.bind_functions(functions=[function_def], function_call="approval")
            | JsonOutputFunctionsParser()
        )
        result = chain.invoke({"messages": state})
        return result

    def should_continue(state: List[BaseMessage]):
        approval_result = approval(state, criteria)
        print(approval_result)
        if approval_result["decision"] == "True" or len(state) > 12:
            return END
        return "reflect"

    builder.add_conditional_edges("generate", should_continue)
    builder.add_edge("reflect", "generate")

    memory = SqliteSaver.from_conn_string(":memory:")
    graph = builder.compile(checkpointer=memory)

    return graph

def update_prompt(base_prompt: str, criteria: str) -> str:
    """
    Uses self_reflection_graph to iteratively act on feedback and update prompt
    """
    graph = self_reflection_graph(criteria)
    input = HumanMessage(content=base_prompt, name="User")
    n = random.randint(0, 1000)
    config = {
        "configurable": {"thread_id": n},
        "recursion_limit": 50,
        }    

    # Run the graph
    for s in graph.stream(
        input,
        config,
        stream_mode="values",
        ):
        if "__end__" not in s:
            # if len(s) > 1:
            #     s[-1].pretty_print()
            continue
        
    def message_to_dict(obj):
        if isinstance(obj, HumanMessage) or isinstance(obj, AIMessage):
            return {obj.name: obj.content}
        raise TypeError(f'Object of type {obj.__class__.__name__} is not JSON serializable')
    
    if not os.path.exists("conversations_reflection.json"):
        with open("conversations_reflection.json", "w") as f:
            json.dump([], f)
    
    with open("conversations_reflection.json", "r") as f:
        # write messages to json file
        data = json.load(f)
        # get the current key number then increment it
        key = len(data)
        data.append({key: json.dumps(s, default=message_to_dict)})
        
    with open("conversations_reflection.json", "w") as f:
        json.dump(data, f, indent=4)
    
    return s

In [11]:
from human_eval_prompts import HumanEvalPrompts

In [12]:
human_eval_prompts = HumanEvalPrompts()
baseline_prompt = human_eval_prompts.get_baseline_prompt()
criteria = human_eval_prompts.get_criteria()

In [13]:
import time

times = []
for _ in range(10):
    start = time.time()
    result = update_prompt(baseline_prompt, criteria)    
    end = time.time()
    times.append(end - start)
    result[-1].pretty_print()
    print("--------------------")

{'decision': 'True'}

```python
# Complete the function based on its signature and docstring provided below.
# Ensure the function is correctly implemented according to the given details.
{content}

# Output your answer at the end as
```python
<your answer>
```
--------------------
{'decision': 'True'}

```python
# Complete the function based on its signature and docstring provided below.
# Ensure the function is correctly implemented and follows the given specifications.
{content}

# Output your answer at the end as
```python
<your answer>
```
--------------------
{'decision': 'True'}

Here is the revised prompt based on the given criteria and guidelines:

```python
{content}
```
Please complete the function based on its signature and docstring. Output your answer at the end as:
```python
<your answer>
```
--------------------
{'decision': 'True'}

```python
# Complete the function based on its signature and docstring provided below.
# Ensure the function is correctly implemented and 