In [141]:
import pandas as pd
import numpy as np
import requests
import uuid
import json
from copy import deepcopy

In [2]:
url = "http://127.0.0.1:1234"
endpoint = "/v1/chat/completions"
api_url = url + endpoint

In [40]:
query  = ""
context = ""

In [27]:
def call_llm(payload):
    response = requests.post(api_url, json=payload).json()
    output = response["choices"][0]["message"]["content"]
    return output

In [192]:
# sample playbook instruction:

# Generate a random UUID version 4 (randomly generated)
uuid_value = str(uuid.uuid4())

counter = {
    "helpful": 0,
    "harmful": 0,
    "neutral": 0
}
counter_benefit = 0
counter_harm = 0

content = "When you open a door, you either have a bedroom, a bathroom, or a bigger space to enter into."

sample_playbook_rule = {
    uuid_value: {
        "counter": counter,
        "content": content
    }
}

sample_playbook_rule

{'4b5c9ec8-1ed3-42a9-9b57-b63eeeb21ced': {'counter': {'helpful': 0,
   'harmful': 0,
   'neutral': 0},
  'content': 'When you open a door, you either have a bedroom, a bathroom, or a bigger space to enter into.'}}

## Defining the Playbook

In [193]:
playbook = {
    uuid_value: {
        "counter": counter,
        "content": content
    }
}

playbook

{'4b5c9ec8-1ed3-42a9-9b57-b63eeeb21ced': {'counter': {'helpful': 0,
   'harmful': 0,
   'neutral': 0},
  'content': 'When you open a door, you either have a bedroom, a bathroom, or a bigger space to enter into.'}}

## Defining the Generator

In [145]:
def create_generator_prompt(query, context):
    generator_system_prompt = """
    You are an analysis expert tasked with answering questions using your knowledge, a curated playbook of strategies and insights and a
    reflection that goes over the diagnosis of all previous mistakes made while answering the question.

    Instructions: - Read the playbook carefully and apply relevant strategies, formulas, and insights - Pay attention to common mistakes
    listed in the playbook and avoid them - Show your reasoning step-by-step - Be concise but thorough in your analysis - If the playbook
    contains relevant code snippets or formulas, use them appropriately - Double-check your calculations and logic before providing the final
    answer

    Your output should be a json object, which contains the following fields: - reasoning: your chain of thought / reasoning / thinking process,
    detailed analysis and calculations - bullet_ids: each line in the playbook has a bullet_id. all bulletpoints in the playbook that’s relevant,
    helpful for you to answer this question, you should include their bullet_id in this list - final_answer: your concise final answer

    Avoid generating any thinking outputs. Just give the straight answer without any thinking.
    """

    generator_user_prompt = f"""
    Here is the following information you will require:
    Playbook:
    {playbook}

    Question:
    {query}

    Context:
    {context}

    Answer in this exact JSON format witht the following keys:
    "reasoning": "[Your chain of thought / reasoning / thinking process, detailed analysis and calculations]",
    "bullet_ids": ["all bullet ids in a list that are relevant helpful for you to answer this question"],
    "final_answer": "[Your concise final answer here]"
    """

    return generator_system_prompt, generator_user_prompt

In [None]:
# additional context that helps it get the right answer:  and include any transitional pieces like doors and stairs too. You may also include landmarks and orientations in the house in the linked list.

In [146]:
query = "tell me the path to my personal bedroom in a linkedlist format starting from the master bedroom."
context = """the house has 10 doors on the basement floor. it has 6 doors on the upper floor. 
            there are two bedrooms downstairs and two upstairs. 
            there are two bathrooms downstairs and two upstairs. 
            there is a stair connecting the basement and upper floor.
            the master room is on the basement floor. the door from the basement floor opens up the living room.
            the stair is in the living room. 
            there's another door by the kitchen that leads to the living room.
            the main door of the house leads to the living room.
            there is a bedroom in a corner and to the right of the other bedroom upstairs.
            the stairs leads up to a upper living room.
            the stairs is near the ac control box.
            one of the bedrooms is near the ac control box.
            the ac control box is in the center of the upstairs.
             """
ground_truth = "Master Bedroom -> Master Bedroom Door -> Living Room -> Stairs -> Upper Living Room -> AC Control Box -> Bedroom Near AC Control Box -> Bedroom to the Right, in the Corner"

In [147]:
generator_system_prompt, generator_user_prompt = create_generator_prompt(query, context)

In [148]:
generator_payload = {
    "messages": [
        {
            "role": "system",
            "content": generator_system_prompt
        },
        {
            "role": "user",
            "content": generator_user_prompt
        }
    ]
}
generate_response = call_llm(generator_payload)

In [149]:
generator_final_output =  generate_response.split("</think>")[1]

# Clean up the JSON string by stripping leading/trailing whitespace
json_str = generator_final_output.strip()

# Now, load it into a Python dictionary
generator_final_output = json.loads(json_str)

In [150]:
generator_final_output

{'reasoning': "The provided information does not include a direct link or path from the master bedroom to the personal bedroom. The context describes the layout of the house, including the number of doors and rooms on different floors, but does not explicitly state how the master bedroom is connected to the personal bedroom. The playbook's content does not provide any additional useful information for constructing a linked list path between the rooms. Therefore, it is impossible to determine the exact path from the master bedroom to the personal bedroom based on the given information.",
 'bullet_ids': ['eea2b590-4063-441e-aca4-30b48944d46f'],
 'final_answer': 'It is not possible to determine the path from the master bedroom to the personal bedroom based on the given information.'}

## Defining the Reflector

In [204]:
def create_reflector_prompt(query, context, generator_final_output, ground_truth):

    reasoning_trace = generator_final_output["reasoning"]
    model_answer = generator_final_output["final_answer"]

    reflector_sys_prompt = """
    You are an expert analyst and educator. Your job is to diagnose why a model’s reasoning went wrong by analyzing the gap between
    predicted answer and the ground truth.

    Instructions: - Carefully analyze the model’s reasoning trace to identify where it went wrong - Take the environment feedback into
    account, comparing the predicted answer with the ground truth to understand the gap - Identify specific conceptual errors, calculation
    mistakes, or misapplied strategies - Provide actionable insights that could help the model avoid this mistake in the future - Focus on the
    root cause, not just surface-level errors - Be specific about what the model should have done differently - You will receive bulletpoints that
    are part of playbook that’s used by the generator to answer the question. - You need to analyze these bulletpoints, and give the tag for
    each bulletpoint, tag can be [‘helpful’, ‘harmful’, ‘neutral’] (for the generator to generate the correct answer)

    Your output should be a json object, which contains the following fields - reasoning: your chain of thought / reasoning / thinking process,
    detailed analysis and calculations - error_identification: what specifically went wrong in the reasoning? - root_cause_analysis: why did this
    error occur? What concept was misunderstood? - correct_approach: what should the model have done instead? - key_insight: what
    strategy, formula, or principle should be remembered to avoid this error? - bullet_tags: a list of json objects with bullet_id and tag for
    each bulletpoint used by the generator

    Avoid generating any thinking outputs. Just give the straight answer without any thinking.
    """

    reflector_user_prompt = f"""
        Question:
        {query}

        Context:
        {context}

        Model’s Reasoning Trace:
        {reasoning_trace}

        Model’s Predicted Answer:
        {model_answer}

        Ground Truth Answer:
        {ground_truth}

        Playbook:
        {playbook}        

        Answer in this exact JSON format with the following keys:
        {{
            "reasoning": "[Your chain of thought / reasoning / thinking process, detailed analysis and calculations]",
            "error_identification": "[What specifically went wrong in the reasoning?]",
            "root_cause_analysis": "[Why did this error occur? What concept was misunderstood?]",
            "correct_approach": "[What should the model have done instead?]",
            "key_insight": "[What strategy, formula, or principle should be remembered to avoid this error?]",
            "bullet_tags": ["a list of json with keys: bullet id and tag determining bullet points and their effect."]
        }}
    """

    reflector_payload = {
    "messages": [
        {
            "role": "system",
            "content": reflector_sys_prompt
        },
        {
            "role": "user",
            "content": reflector_user_prompt
        }
    ]
}

    return reflector_payload

In [205]:
reflector_payload = create_reflector_prompt(query, context, generator_final_output, ground_truth)

In [206]:
reflector_response = call_llm(reflector_payload)

In [210]:
reflector_response

'<think>\n</think>\n\n{\n    "reasoning": "The model attempted to trace the path from the master bedroom to the personal bedroom by following the given context. It correctly identified that the master bedroom is on the basement floor and connected to the living room. However, it missed including specific intermediate steps such as \'Master Bedroom Door\' and \'Stairs\', which are explicitly mentioned in the ground truth. Additionally, it oversimplified by skipping details about \'AC Control Box\' and assuming a direct path to the bedroom, rather than following the sequential steps provided in the context.",\n    "error_identification": "The model omitted critical intermediate nodes such as \'Master Bedroom Door\', \'Stairs\', and \'AC Control Box\', which are explicitly mentioned in the ground truth. Additionally, it did not follow the exact sequence of steps outlined in the context and assumed a direct path instead.",\n    "root_cause_analysis": "The model misunderstood the requiremen

In [211]:
reflector_final_output =  reflector_response.split("</think>")[1]

# Clean up the JSON string by stripping leading/trailing whitespace
json_str = reflector_final_output.strip()

# Now, load it into a Python dictionary
reflector_final_output = json.loads(json_str)

reflector_final_output

{'reasoning': "The model attempted to trace the path from the master bedroom to the personal bedroom by following the given context. It correctly identified that the master bedroom is on the basement floor and connected to the living room. However, it missed including specific intermediate steps such as 'Master Bedroom Door' and 'Stairs', which are explicitly mentioned in the ground truth. Additionally, it oversimplified by skipping details about 'AC Control Box' and assuming a direct path to the bedroom, rather than following the sequential steps provided in the context.",
 'error_identification': "The model omitted critical intermediate nodes such as 'Master Bedroom Door', 'Stairs', and 'AC Control Box', which are explicitly mentioned in the ground truth. Additionally, it did not follow the exact sequence of steps outlined in the context and assumed a direct path instead.",
 'root_cause_analysis': 'The model misunderstood the requirement to include all explicitly mentioned nodes in t

In [198]:
# playbook["4b5c9ec8-1ed3-42a9-9b57-b63eeeb21ced"]["counter"]["neutral"] = 1
# playbook

{'4b5c9ec8-1ed3-42a9-9b57-b63eeeb21ced': {'counter': {'helpful': 0,
   'harmful': 0,
   'neutral': 1},
  'content': 'When you open a door, you either have a bedroom, a bathroom, or a bigger space to enter into.'},
 'fa42a8b8-2be3-492a-89fe-6e9911dc907f': {'counter': {'helpful': 0,
   'harmful': 0,
   'neutral': 0},
  'content': "To trace a path, always start from the initial room and sequentially follow each door's explicit destination as described in the context. Prioritize sequential connections over assumptions about room locations, especially when doors and stairs are involved."}}

In [212]:
# calculate playbook stats:
playbook_stats = {
    "helpful": 0,
    "harmful": 0,
    "neutral": 0
}

bullets = reflector_final_output["bullet_tags"]

# Iterate over each bullet in the list
for bullet in bullets:
    id = bullet.get("bullet_id")
    tag = bullet.get('tag')  # Get the 'tag' value, or None if not present

    # update playbook tag:
    playbook[id]["counter"][tag] += 1

    if tag:
        if tag in playbook_stats:
            playbook_stats[tag] += 1
        else:
            playbook_stats[tag] = 1

playbook

{'4b5c9ec8-1ed3-42a9-9b57-b63eeeb21ced': {'counter': {'helpful': 0,
   'harmful': 0,
   'neutral': 2},
  'content': 'When you open a door, you either have a bedroom, a bathroom, or a bigger space to enter into.'},
 'fa42a8b8-2be3-492a-89fe-6e9911dc907f': {'counter': {'helpful': 1,
   'harmful': 0,
   'neutral': 0},
  'content': "To trace a path, always start from the initial room and sequentially follow each door's explicit destination as described in the context. Prioritize sequential connections over assumptions about room locations, especially when doors and stairs are involved."}}

In [213]:
playbook_stats

{'helpful': 1, 'harmful': 0, 'neutral': 1}

## Defining a Curator

In [214]:
def create_curator_prompt(query, context, reflector_final_output, playbook_stats):

    reflec_data = deepcopy(reflector_final_output)
    del reflec_data["bullet_tags"]

    string_relec_data = json.dumps(reflec_data)


    curator_sys_prompt = """
    You are a master curator of knowledge. Your job is to identify what new insights should be added to an existing playbook based on a
    reflection from a previous attempt.

    Context: - The playbook you created will be used to help answering similar questions. - The reflection is generated using ground truth
    answers that will NOT be available when the playbook is being used. So you need to come up with content that can aid the playbook user
    to create predictions that likely align with ground truth.

    CRITICAL: You MUST respond with valid JSON only. Do not use markdown formatting or code blocks.
    Instructions: - Review the existing playbook and the reflection from the previous attempt - Identify ONLY the NEW insights, strategies,
    or mistakes that are MISSING from the current playbook - Avoid redundancy - if similar advice already exists, only add new content that
    is a perfect complement to the existing playbook - Do NOT regenerate the entire playbook - only provide the additions needed - Focus on
    quality over quantity - a focused, well-organized playbook is better than an exhaustive one - Format your response as a PURE JSON object
    with specific sections - For any operation if no new content to add, return an empty list for the operations field - Be concise and specific -
    each addition should be actionable
    
    Avoid generating any thinking outputs. Just give the straight answer without any thinking.
    """

    curator_user_prompt = f"""
        Current Playbook Stats:
        {playbook_stats}

        Reasoning Trace and Analysis:
        {string_relec_data}
        
        Current Playbook:
        {playbook}

        Question:
        {query}

        Context:
        {context}

        Your Task: Output ONLY a valid JSON object with these exact fields: - reasoning: your chain of thought / reasoning / thinking process,
        detailed analysis and calculations - operations: a list of operations to be performed on the playbook - type: the type of operation to be
        performed - section: the section to add the bullet to - content: the new content of the bullet
        Available Operations: 1. ADD: Create new bullet points with fresh IDs - section: the section to add the new bullet to - content: the new
        content of the bullet. Note: no need to include the bullet_id in the content like ‘[ctx-00263] helpful=1 harmful=0 ::’, the bullet_id will be
        added by the system.

        RESPONSE FORMAT - Output ONLY this JSON structure (no markdown, no code blocks):
        This is an example format so do not copy the contents; just the bigger structure
        {{
        "reasoning": "[Your chain of thought / reasoning / thinking process, detailed analysis and calculations here]",
        "operations": [
            {{
            "type": "ADD",
            "section": "formulas_and_calculations",
            "content": "[New calculation method...]"
            }}
        ]
        }}
    """

    curator_payload = {
        "messages": [
            {
                "role": "system",
                "content": curator_sys_prompt
            },
            {
                "role": "user",
                "content": curator_user_prompt
            }
        ]
    }

    return curator_payload

In [215]:
curator_payload = create_curator_prompt(query, context, reflector_final_output, playbook_stats)

In [216]:
curator_response = call_llm(curator_payload)

In [186]:
curator_final_output =  curator_response.split("</think>")[1]

# Clean up the JSON string by stripping leading/trailing whitespace
json_str = curator_final_output.strip()

# Now, load it into a Python dictionary
curator_final_output = json.loads(json_str)

curator_final_output

{'reasoning': "The model failed to trace the path by not following each door's explicit destination as described. The reflection indicates a need to systematically map navigational paths by explicitly following each door's connection, even if the path seems indirect. The current playbook lacks a strategy to trace such paths step-by-step using door connections and stair links.",
 'operations': [{'type': 'ADD',
   'section': 'formulas_and_calculations',
   'content': "To trace a path, always start from the initial room and sequentially follow each door's explicit destination as described in the context. Prioritize sequential connections over assumptions about room locations, especially when doors and stairs are involved."}]}

In [217]:
#edit playbook
operations = curator_final_output["operations"]
operations

for elem in operations:
    print(elem)
    if elem["type"] == "ADD":

        # Generate a random UUID version 4 (randomly generated)
        uuid_value = str(uuid.uuid4())

        counter = {
            "helpful": 0,
            "harmful": 0,
            "neutral": 0
        }

        content = elem["content"]

        playbook[uuid_value] = {
            "counter": counter,
            "content": content
        }

{'type': 'ADD', 'section': 'formulas_and_calculations', 'content': "To trace a path, always start from the initial room and sequentially follow each door's explicit destination as described in the context. Prioritize sequential connections over assumptions about room locations, especially when doors and stairs are involved."}


In [218]:
playbook

{'4b5c9ec8-1ed3-42a9-9b57-b63eeeb21ced': {'counter': {'helpful': 0,
   'harmful': 0,
   'neutral': 2},
  'content': 'When you open a door, you either have a bedroom, a bathroom, or a bigger space to enter into.'},
 'fa42a8b8-2be3-492a-89fe-6e9911dc907f': {'counter': {'helpful': 1,
   'harmful': 0,
   'neutral': 0},
  'content': "To trace a path, always start from the initial room and sequentially follow each door's explicit destination as described in the context. Prioritize sequential connections over assumptions about room locations, especially when doors and stairs are involved."},
 '34c2830a-bcec-4f9e-8ef2-df9517fbed24': {'counter': {'helpful': 0,
   'harmful': 0,
   'neutral': 0},
  'content': "To trace a path, always start from the initial room and sequentially follow each door's explicit destination as described in the context. Prioritize sequential connections over assumptions about room locations, especially when doors and stairs are involved."}}

In [219]:
generator_system_prompt, generator_user_prompt = create_generator_prompt(query, context)
generator_payload = {
    "messages": [
        {
            "role": "system",
            "content": generator_system_prompt
        },
        {
            "role": "user",
            "content": generator_user_prompt
        }
    ]
}
generate_response = call_llm(generator_payload)
generator_final_output =  generate_response.split("</think>")[1]

# Clean up the JSON string by stripping leading/trailing whitespace
json_str = generator_final_output.strip()

# Now, load it into a Python dictionary
generator_final_output = json.loads(json_str)

# Iteration 0:

{'4b5c9ec8-1ed3-42a9-9b57-b63eeeb21ced': {'counter': {'helpful': 0,
   'harmful': 0,
   'neutral': 0},
  'content': 'When you open a door, you either have a bedroom, a bathroom, or a bigger space to enter into.'}}
{'4b5c9ec8-1ed3-42a9-9b57-b63eeeb21ced': {'counter': {'helpful': 0,
   'harmful': 0,
   'neutral': 0},
  'content': 'When you open a door, you either have a bedroom, a bathroom, or a bigger space to enter into.'}}
{'reasoning': "The provided information does not include a direct link or path from the master bedroom to the personal bedroom. The context describes the layout of the house, including the number of doors and rooms on different floors, but does not explicitly state how the master bedroom is connected to the personal bedroom. The playbook's content does not provide any additional useful information for constructing a linked list path between the rooms. Therefore, it is impossible to determine the exact path from the master bedroom to the personal bedroom based on the given information.",
 'bullet_ids': ['eea2b590-4063-441e-aca4-30b48944d46f'],
 'final_answer': 'It is not possible to determine the path from the master bedroom to the personal bedroom based on the given information.'}

## Iteration 1:

In [202]:
generator_final_output

{'reasoning': "To determine the path to your personal bedroom starting from the master bedroom, we follow the given context and apply the strategy of tracing a path by sequentially following each door's destination. Starting from the master bedroom on the basement floor, we move to the living room via the door. From there, using the stair to reach the upper floor leads us to an upper living room. The path then continues by locating the bedroom near the AC control box, which is in the center of the upper floor. Thus, the linked list begins with the master bedroom and sequentially connects through each room.",
 'bullet_ids': ['4b5c9ec8-1ed3-42a9-9b57-b63eeeb21ced',
  'fa42a8b8-2be3-492a-89fe-6e9911dc907f'],
 'final_answer': ['master bedroom',
  'living room',
  'upper living room',
  'bedroom near ac control box']}

## Iteration 2:

In [220]:
generator_final_output

{'reasoning': "The question asks for a path from the master bedroom to the personal bedroom in a linked list format. The playbook suggests starting from the initial room and following each door's explicit destination, prioritizing sequential connections. The context indicates that the master bedroom is on the basement floor. The only door from the basement opens to the living room. From there, stairs lead to an upper floor. The upstairs has two bedrooms, one of which is near the AC control box, and another is in a corner. The personal bedroom must be identified based on these details. However, the exact mapping of rooms to doors is unclear, making it difficult to trace a definitive path. The key is identifying the personal bedroom and mapping its connection back to the master bedroom through the described structure.",
 'bullet_ids': ['fa42a8b8-2be3-492a-89fe-6e9911dc907f',
  '34c2830a-bcec-4f9e-8ef2-df9517fbed24'],
 'final_answer': ['Master Bedroom -> Living Room -> Upper Floor Living 