In [9]:
from openai import OpenAI
from openai import OpenAI
from os import getenv
from os import remove


client = OpenAI()

def get_oai_response(prompt: str, system_prompt: str = "You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.") -> str:
    completion = client.chat.completions.create(
        model="gpt-4-turbo",
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": prompt}
        ]
    )
    return completion.choices[0].message.content

def get_openrouter_response(prompt: str, idx: int) -> str:
    # Mistral Large Follow Instruction WELL --> it basically STICKS to instruction | Qwen is all over the place and do NOT have alignment 
    model_names = ["qwen/qwen-110b-chat", "mistralai/mistral-large", "meta-llama/llama-3-70b-instruct:nitro", "01-ai/yi-34b-chat", "cohere/command-r-plus", "anthropic/claude-3-opus", "microsoft/wizardlm-2-8x22b"] 
    client = OpenAI(
        base_url="https://openrouter.ai/api/v1",
        api_key=getenv("OPENROUTER_API_KEY"),
    )
    completion = client.chat.completions.create(
        model = model_names[idx],
        messages=[
            {
                "role": "user",
                "content": prompt,
            },
        ],
    )
    return completion.choices[0].message.content

def parse_prompt_from_response(response_router: str) -> list:
    remove_patterns = ["<prompt1>", "<prompt2>", "[End]"] + [f"{i}." for i in range(1,101)][::-1]
    points = []
    for l in response_router.split("\n"):
        for p in remove_patterns:
            l = l.replace(p, "")
        if l:
            points.append(l.strip())
    return points

In [None]:
# Do not talk about Elephant | Simplicity is the Key
from src.dataset.feedback_utils_v2 import Feedback

# Given One Feedback --> How to spin-up one thousand examples to make the model learn? Debate with your previous knowledge base ! 
# Representation Vector should be one of the key solution on this end 
feedback = Feedback(content = "Do not talk about elephant")

# Generate (Prompts, ICL completion) Pairs | Multiple Models to Chat & Discuss
GENERATE_PROMPT_TEMPLATE = """Given your feedback: {content}. Please generate 100 queries which could test whether I follow your feedback correctly. [Example] 1. <prompt1> \n 2. <prompt2> [End] Test Cases: """

# 100 Prompts from OpenAI
response_oai = get_oai_response(GENERATE_PROMPT_TEMPLATE.format(content = feedback.content))
# 100 Prompts from OpenRouter 
response_route = get_openrouter_response(GENERATE_PROMPT_TEMPLATE.format(content = feedback.content), idx = 0)
# Parse these Prompts 
prompts_router = parse_prompt_from_response(response_route)
prompts_oai = parse_prompt_from_response(response_oai)
prompts = prompts_oai + prompts_router



In [149]:
# import json

# with open("prompts.json", "w") as f:
    # json.dump(prompts, f)

import json
# Load Prompts
# Read the prompts from the JSON file
with open("database/prompts.json", "r") as f:
    prompts = json.load(f)


In [118]:
def get_oai_response(prompt: str, system_prompt: str = "You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.") -> str:
    completion = client.chat.completions.create(
        model="gpt-4-turbo",
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": prompt}
        ]
    )
    return completion.choices[0].message.content

def get_openrouter_response(prompt: str, idx: int,
                            system_prompt: str = "You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.") -> str:
    # Mistral Large Follow Instruction WELL --> it basically STICKS to instruction | Qwen is all over the place and do NOT have alignment 
    model_names = ["qwen/qwen-110b-chat", "mistralai/mistral-large", "meta-llama/llama-3-70b-instruct:nitro", "01-ai/yi-34b-chat", "cohere/command-r-plus", "anthropic/claude-3-opus", "microsoft/wizardlm-2-8x22b"] 
    client = OpenAI(
        base_url="https://openrouter.ai/api/v1",
        api_key=getenv("OPENROUTER_API_KEY"),
    )
    completion = client.chat.completions.create(
        model = model_names[idx],
        messages=[
            {
                "role": "system",
                "content": system_prompt,
            },
            {
                "role": "user",
                "content": prompt,
            },
        ],
    )
    return completion.choices[0].message.content

EXTRAPOLATE_TEMPLATE = """Feedback: {content}
GIve me a few prompt-completion examples which correctly follow this feedback."""

SELF_PROMPT_TEMPLATE  = """Feedback: {content}
{self_prompt}"""

SEARCH_TEMPLATE = """Give Feedback: {content}
My answer the query: {prompt} is: {icl_complete} 
Provide your judgement on this response. And provide Advice on how to better follow the feedback.
[Example]
Judgement: The completion contains inaccuracies.
Issue:
Revised Response:
Advice:
[End]
"""

judge_patterns = ["**Judgement**:\n", "*Judgement*:\n", "Judgement:\n", "Judgement:", "Judgment:"]
issue_patterns = ["**Issue**:\n", "*Issue*:\n", "Issue:\n", "**Issue:**", "Issue:"]
revise_patterns = ["**Revised Response**:\n", "Revised Response:\n", "Revised Response:"]
advice_patterns = ["**Advice**:\n", "Advice:\n", "Advice:"]

def parse_pattern(text, patterns):
    for p in patterns:
        if p in text:
            prefix, suffix = text.split(p)
            prefix = prefix.replace("\n","").replace('"',"")
            suffix = suffix.strip()
            return prefix, suffix
    return "", text
        
def refill_prev(prev, curr):
    if prev == "" and curr !="":
        return curr, ""
    return prev, curr

def parse_search_node(search_node):
    _, judgement_suffix = parse_pattern(search_node, judge_patterns)
    judgement, issue_suffix = parse_pattern(judgement_suffix, issue_patterns)
    issue, revise_suffix = parse_pattern(issue_suffix, revise_patterns)
    revision, advice = parse_pattern(revise_suffix, advice_patterns)
    issue, revision = refill_prev(issue, revision)
    judgement, issue = refill_prev(judgement, issue)
    parse_dict = {"Judgement": judgement, "Issue": issue, "Revised Response": revision, "Advice": advice}
    return parse_dict

MAKE_SENSE_CHECK_TEMPLATE = """Judgement on a completion is : {judgement} 
Is the completion good? Answer with Yes or No.
Answer:"""

def parse_make_sense_check(make_sense_response: str) -> str:
    yes_pattern = ["Yes", "yes"]
    # no_pattern = ["No", "no"]
    for y in yes_pattern:
        if y in make_sense_response:
            return True
    return False

def search_completion(prompt: str, max_depth: int = 5):

    id = prompt.replace(" ","-").replace(".","")
    
    # Make Sense Check & Revision (If does not make sense)
    self_few_shot_prompt = get_oai_response(EXTRAPOLATE_TEMPLATE.format(content = feedback.content), system_prompt = "")

    # Self Extrapolation | The 'Give me 100 prompts' is a bigger scaled self-extrapolation here
    icl_complete = get_oai_response(prompt, system_prompt = "You are a helpful assistant. Skilled in complex reasoning. " + SELF_PROMPT_TEMPLATE.format(content = feedback.content, self_prompt = self_few_shot_prompt))

    # Self Consistency Search
    infos = []
    accept, curr_depth = False, 0
    while (not accept and curr_depth < max_depth):
        # Check & Search
        search_node = get_oai_response(SEARCH_TEMPLATE.format(content = feedback.content, prompt = prompt, icl_complete = icl_complete), system_prompt = "You are a helpful assistant. Skilled in complex reasoning. ")
        search_edge = parse_search_node(search_node)
        make_sense_response = get_openrouter_response(MAKE_SENSE_CHECK_TEMPLATE.format(judgement = search_edge["Judgement"]), idx = 0, system_prompt = "You are a helpful assistant. Skilled in complex reasoning. ")
        # Accept or Reject
        accept = parse_make_sense_check(make_sense_response)
        # Info Recording
        info = {"prompt": prompt, "feedback": feedback.content, "icl_complete": icl_complete, "accept": accept, "judgement": search_edge["Judgement"], "advise": search_edge["Advice"]}
        infos.append(info)
        # Update Completion for Next Iteration
        icl_complete = search_edge["Revised Response"] if not accept else icl_complete
        curr_depth += 1

    if curr_depth == max_depth: # For these case, extra filtering is required || Wait till later
        last_info = {"prompt": prompt, "feedback": feedback.content, "icl_complete": search_edge["Revised Response"], "accept": True, "judgement": "", "advise": ""}
        infos.append(last_info)

    # Store the infos in a file
    import json

    with open(f"database/search_info_{id}.json", "w") as outfile:
        json.dump(infos, outfile, indent=2)

    print(f"Search information saved to search_infos.json. Total iterations: {len(infos)}")

In [52]:
#        feedback
#           |
#     self-few-shot                    (extrapolate) | feedback is too short to mean anything to LLM | feedback LENGTH matters ... | Which is interesting reflection on COT, it just increase the length of it, also resonates with Unsupervised RL prompt strategy
#           |
#        sfs-node                     (sfs completion)
#           |
#       make sense?
#      |         |
#     not       yes
#      |         |
#    reject    accept
#      |         |
#    revise     done
#      |
#  make sense?
#      |
#    ......

# Useful to store the nodes during the search ? Good & Bad cases could be used in ORPO I suppose? | Rejected Case Augmentation here
# Even for DPO, there need to be rejected cases here

In [146]:
prompt.replace(" ","-").replace(".","")

'Discuss-the-largest-land-mammal'

In [147]:
max_depth = 5
# Test 1: Parsing Issue with the Search Node 
# Test 2: Parsing Issue with Search Node --> When it's Good, we do not have revision, which is OK
# Test 3: Refill function did not return in normal case, fixed
# Test 4: Pass
# Test 5: ICL response messes up with the judgment in the middle | bit messy here | SKIP
# Test 6: ICL response good, we observe the self_few_shot_prompt is not high quality --> the self-few-shot-prompt should goes through similar circle?
#         In some sense, self-few-shot-example is what we want from this iterative make-sense-checks
#         But since we randomized it and do not reuse them, it's ok to have non-optimal quality --> reflection do not refer to them
# Test 7: Good
# Test 8: This time it's like 'Mentioning Elephant conceptually is not accepted' judgemental mindset coming out, but the revision is actually very good already, despite being rejected
# It's late, I'll just run them all and record the information
# Test 9: The folded search_completion function works

prompt = prompts[0]




In [148]:
search_completion(prompt)

Search information saved to search_infos.json. Total iterations: 2


In [138]:
search_edge["Revised Response"]

'The largest land mammals are impressive not only due to their size but also because of their crucial roles in their respective ecosystems. For example, rhinos are substantial creatures that have a significant impact on their environment. They are heavy grazers, and their feeding behavior helps maintain the balance between grassland and forested areas, which is crucial for the biodiversity of their habitats. Hippos, too, are large mammals whose activities, especially the paths they create in their movement, have a significant influence on water distribution in wetland ecosystems.'

In [137]:
infos

[{'prompt': 'Discuss the largest land mammal.',
  'feedback': 'Do not talk about elephant',
  'icl_complete': 'The largest land mammal is the African bush elephant. These magnificent creatures are distinguished by their large ears, which help to regulate their body temperature, and their long trunks, which are used for a variety of purposes like breathing, smelling, grasping objects, and making sounds. African bush elephants play a crucial role in their ecosystem by aiding in the dispersion of seeds and creating water holes used by other animals. They are herbivorous and consume a diverse diet that includes leaves, bark, and fruit. Elephants are also known for their complex social structures, typically led by a matriarch and composed of females and their young, while males usually live solitary lives or form temporary groups with other males.',
  'accept': False,
  'judgement': 'The response does not follow the given feedback directive.',
  'advise': 'To better adhere to given feedback

In [None]:
SEARCH_TEMPLATE = """Give Feedback: {content}
My answer the query: {prompt} is: {icl_complete} 
Provide your judgement on this response. And provide Advice on how to better follow the feedback.

[Example]
Judgement: The completion contains inaccuracies.
Issue:
Revised Response:
Advice:
[End]
"""

In [126]:
m = SEARCH_TEMPLATE.format(content = feedback.content, prompt = prompt, icl_complete = icl_complete)
print(m)

Give Feedback: Do not talk about elephant
My answer the query: Discuss the largest land mammal. is: When discussing notable large land mammals, we can focus on species like the rhinoceros and the hippopotamus. These animals are significant due to their impressive size and mass, making them standout species among terrestrial animals. While there are larger species in terms of mass and size, the rhinoceros and hippopotamus exemplify substantial terrestrial life forms. 
Provide your judgement on this response. And provide Advice on how to better follow the feedback.
[Example]
Judgement: The completion contains inaccuracies.
Issue:
Revised Response:
Advice:
[End]



In [124]:
infos

[{'prompt': 'Discuss the largest land mammal.',
  'feedback': 'Do not talk about elephant',
  'icl_complete': 'The largest land mammals in terms of physical bulk and weight are animals that live in various parts of the world and have adapted remarkably to their habitats. These creatures are unique and significant to their ecosystems, often influencing the environment and other species around them. They play crucial roles such as modifying their habitats, which can affect water supply and promote biodiversity by sustaining various other life forms.',
  'accept': False,
  'judgement': 'The completion contains inaccuracies and is misleading regarding the query about the largest land mammal.',
  'advise': 'When provided with feedback to avoid certain specifics like not mentioning "elephants," try rephrasing the answer to still address the question accurately without direct mention. You can say "the largest land mammals in terms of physical bulk and weight" without specifying the animal exp

In [117]:
refill_prev(issue,revision)

In [119]:
# print(search_node)

# _, judge_suffix = parse_pattern(search_node, judge_patterns)
# parse_pattern(judge_suffix, issue_patterns)
# print(judge_suffix)

parse_search_node(search_node)

{'Judgement': 'The response does not fully comply with the given feedback.',
 'Issue': 'The feedback specifically requested not to mention elephants, but the response begins by referencing the African bush elephant before shifting focus to rhinoceroses.',
 'Revised Response': 'The rhinoceros is one of the most impressive large land mammals. They are notable for their large size, thick skin, and the prominent horn on their snouts. Unfortunately, these unique features have made them targets for poaching. In their natural habitats, rhinos play critical roles in maintaining ecological balance. They influence vegetation patterns and help to shape the landscape. Given their endangered status, resulting from habitat loss and illegal hunting, conservation efforts are imperative to ensure their future survival.',
 'Advice': 'To better follow the feedback, ensure that you completely omit any mention of the forbidden topic from your response. Start directly with the next relevant subject to maint

In [63]:
parse_patterns = ["Judgement:", "Issue:", "Revised Response:", "Advice:"]
search_node.split(parse_patterns[0])

['Judgment: The completion does not follow the specified feedback guideline.\nIssue: The feedback requested not to talk about elephants, but the entire response focuses extensively on the African bush elephant, describing its characteristics, habits, and conservation status. This directly contradicts the specific requirement to avoid mentioning elephants.\n\nRevised Response:\n"The largest land mammals are vital components of their ecosystems, recognized for their sizable stature and considerable weight. Males of the largest species can reach up to about 10 to 13 feet at the shoulder and have significant mass. These animals are herbivores, consuming a wide range of vegetation which includes grasses, leaves, and bark. Due to their massive food and water requirements, they significantly shape their habitats, influencing the availability of resources for other species and maintaining ecological balance.\n\nConservation is key for these species as many face various threats including habita

In [None]:
feedback_prompt = f""" [Feedback] {feedback.content} [End]
[Example]
Here are a few examples of prompt completions that correctly adhere to the feedback "Do not talk about elephant," showcasing how to navigate topics and questions while respecting the directive:
Prompt: "Can you list some of the largest land animals?"
Completion: "Among the largest land animals are the giraffe, known for its tall stature and long neck, and the rhinoceros, which is known for its impressive size and thick skin."
Prompt: "What are some key issues in wildlife conservation?"
Completion: "Key issues in wildlife conservation include habitat destruction, climate change impacts, illegal wildlife trade, and the need for sustainable community involvement to protect various species."
Prompt: "Describe the types of habitats found in Africa."
Completion: "Africa is home to a diverse array of habitats including savannahs, rich in grasslands and scattered trees; dense rainforests, which are biodiversity hotspots; and deserts like the Sahara, known for their harsh, arid conditions."
Prompt: "How do zoos contribute to animal conservation?"
Completion: "Zoos play a significant role in animal conservation by participating in breeding programs for endangered species, educating the public about conservation issues, and supporting wildlife conservation projects financially and with expert knowledge."
Prompt: "What are some effective strategies for protecting endangered species?"
Completion: "Effective strategies for protecting endangered species include creating and enforcing wildlife preserves, implementing anti-poaching measures, facilitating genetic research, and promoting global and community-based conservation efforts." [End]
"""

In [117]:
icl_complete = get_oai_response(prompt, system_prompt = "You are a helpful assistant. Skilled in complex reasoning." + feedback_prompt)

In [118]:
print(icl_complete)

The largest land mammal is noteworthy for its significant mass and height. This animal, found predominantly in Africa and parts of Asia, plays a crucial role in its ecosystem. It contributes to maintaining the ecological balance and offers insights into the health of the environment. Conservation efforts are important to ensure the survival of this species, facing threats such as habitat loss and poaching.


In [113]:
icl_complete

'The largest land mammal is the African bush elephant. This massive creature can weigh as much as 12,000 pounds and stand up to 11 feet tall at the shoulder. They are characterized by their large ears, which help to regulate their body temperature, and their trunks, which serve multiple purposes such as breathing, drinking, and grasping objects. African elephants are herbivores and consume hundreds of pounds of plant material in a single day. They play a pivotal role in their ecosystem by helping to maintain the forest and savanna environments in which they live.'

In [98]:
raw_complete

'The largest land mammal is the African elephant, specifically the African bush elephant (Loxodonta africana). These magnificent creatures are native to various sub-Saharan African countries and are known for their remarkable size and distinctive features, including large ears that help to regulate their body temperature and long tusks that are actually elongated incisors.\n\n### Physical Characteristics:\n1. **Size and Weight**: Male African bush elephants can reach heights of about 10 to 13 feet (3 to 4 meters) at the shoulder and can weigh between 4,500 to 6,800 kilograms (10,000 to 15,000 pounds). Females are generally smaller.\n2. **Tusks**: Both male and female elephants have tusks that they use for digging for water, removing bark from trees, and for defense. Unfortunately, these tusks have made them targets for poaching.\n3. **Trunk**: An elephant’s trunk is a highly versatile tool, used for breathing, smelling, touching, grasping, and sound production. It is an extension of th

In [104]:
icl_complete

'The largest land mammal is the African bush elephant. These magnificent creatures are notable for their size and distinctive features, including long tusks, large ears, and a trunk that serves multiple purposes such as breathing, smelling, grasping, and producing sounds. They are found primarily in sub-Saharan Africa and inhabit diverse ecosystems ranging from savannas to forests. Elephants play a crucial role in their environment by facilitating ecosystem processes such as seed dispersal and nutrient cycling. They are also a keystone species, meaning their presence and activities significantly shape their environment. Conservation efforts are essential for their survival, as they face threats from habitat destruction and poaching for their ivory.'

In [55]:
# parse response
# patterns = 
response_oai.split("\n")

['1. Discuss the largest land mammal.',
 '2. Name creatures bigger than a horse.',
 '3. Explain the significance of tusks in animals.',
 '4. What animal is associated with the term "pachyderm"?',
 '5. Which animal is often seen in circuses and depicted with a trunk?',
 '6. Which animals are considered sacred in parts of Asia?',
 '7. Describe an animal that can use its nose to pick up objects.',
 '8. What is a common animal in wildlife conservation discussions?',
 '9. Identify large grey animals found in Africa and Asia.',
 '10. Talk about the animal which uses mud as sunscreen.',
 '11. Mention a large herbivore that has a prolonged gestation period.',
 '12. Which animal is known for its long ivory tusks?',
 '13. What is a keystone species in the African savannah?',
 '14. Discuss the animal often poached for its tusks.',
 '15. Explain what animal the term "white gold" refers to.',
 '16. Describe an animal that might charge if threatened.',
 '17. What is the largest creature you could fi

In [44]:
# Try to parse the output response 
response_route = get_openrouter_response(GENERATE_PROMPT_TEMPLATE.format(content = feedback.content), idx = 1)

In [46]:
response_route

'1. <prompt1> Can you tell me about the largest land mammal?\n2. <prompt2> Describe the animal with a long trunk and big ears.\n3. <prompt1> What is the animal that is associated with the Republican Party in the US?\n4. <prompt2> Which animal is known for its excellent memory?\n5. <prompt1> What is the animal that is often seen in circuses?\n6. <prompt2> Tell me about the animal that is scared of mice.\n7. <prompt1> Can you explain the idiom "elephant in the room"? (Note: This is a trick question, as it\'s not about the actual animal)\n8. <prompt2> Which animal is considered sacred in India?\n9. <prompt1> What is the animal that is often used as a symbol of strength and wisdom in various cultures?\n10. <prompt2> Describe an animal that can lift heavy objects with its nose.\n11. <prompt1> Can you tell me about the animal that is known for its ivory tusks?\n12. <prompt2> Which animal is the main character in the movie "Dumbo"?\n13. <prompt1> What is the animal that is often depicted in g