In [14]:
from dotenv import load_dotenv
import yaml
import os
import openai
from src.tree_node import TreeNode
from src.api_completion import Completion
import numpy as np

In [15]:
config_file = "config.yaml"

# Load experiment config
with open(config_file) as file:
    cfg = yaml.load(file, Loader=yaml.loader.SafeLoader)


In [16]:
_ = load_dotenv()
oai_client = openai.OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

generator = Completion(oai_client=oai_client)
generator_args = {
    "model": cfg["generator"]["model"],
    "system": cfg["generator"]["system_prompt"],
    "max_tokens": cfg["generator"]["max_tokens"],
    "temperature": cfg["generator"]["temperature"],
    "n": cfg["generator"]["n"]
}

verifier = Completion(oai_client=oai_client)
verifier_args = {
    "model": cfg["verifier"]["model"],
    "system": cfg["verifier"]["system_prompt"],
    "max_tokens": cfg["verifier"]["max_tokens"],
    "temperature": cfg["verifier"]["temperature"],
    "logprobs": cfg["verifier"]["logprobs"],
    "top_logprobs": cfg["verifier"]["top_logprobs"]
}

evaluator = Completion(oai_client=oai_client)
evaluator_args = {
    "model": cfg["evaluator"]["model"],
    "system": cfg["evaluator"]["system_prompt"],
    "max_tokens": cfg["evaluator"]["max_tokens"],
    "temperature": cfg["evaluator"]["temperature"],
    "logprobs": cfg["evaluator"]["logprobs"],
    "top_logprobs": cfg["evaluator"]["top_logprobs"]
}

In [18]:
task = "Kacey is picking out jewelry to wear for school. She has 10 rings, 4 bracelets, and 4 necklaces. How many jewelry combinations are possible if she is going to wear 1 ring, 1 bracelet, and 2 necklaces?"
tree_root = TreeNode(task)

In [19]:
prompt = f"<task>{task}</task>\n(no previous thoughts)\nNext thought:"
result = generator.run(prompt, **generator_args)

for i, choice in enumerate(result.choices):
    print(f"Thought {i+1}:\n{choice.message.content}\n")
    
for choice in result.choices:
    tree_node = TreeNode(choice.message.content)
    tree_root.add_child(tree_node)

Thought 1:
To find the number of jewelry combinations Kacey can wear, I need to multiply the number of choices for each type of jewelry.

Thought 2:
To find the total number of jewelry combinations, I need to multiply the number of choices for each type of jewelry: 10 rings, 4 bracelets, and the number of ways to choose 2 necklaces from 4.



In [20]:
current_paths = tree_root.get_paths()

for path in current_paths:
    prompt = f"<task>{path[0].value}</task>\n"
    if len(path) > 2:
        prompt += "\n".join([node.value for node in path[1:-1]])
    prompt += f"\nLast thought:\n{path[-1].value}"
    verifier_result = verifier.run(prompt, **verifier_args)
    evaluator_result = evaluator.run(prompt, **evaluator_args)

    correctness_scores = {
        logprob.token: float(np.round(np.exp(logprob.logprob)*100,2))
        for logprob in verifier_result.choices[0].logprobs.content[0].top_logprobs    
    }
    helpfulness_scores = {
        logprob.token: float(np.round(np.exp(logprob.logprob)*100,2))
        for logprob in evaluator_result.choices[0].logprobs.content[0].top_logprobs    
    }
    # Update the last tree node in the path with the scores
    path[-1].correctness_scores = correctness_scores
    path[-1].helpfulness_scores = helpfulness_scores
    print(f"{prompt}\n{correctness_scores = }\n{helpfulness_scores = }\n\n")


<task>Kacey is picking out jewelry to wear for school. She has 10 rings, 4 bracelets, and 4 necklaces. How many jewelry combinations are possible if she is going to wear 1 ring, 1 bracelet, and 2 necklaces?</task>

Last thought:
To find the number of jewelry combinations Kacey can wear, I need to multiply the number of choices for each type of jewelry.
correctness_scores = {'1': 99.14, '0': 0.86}
helpfulness_scores = {'9': 67.52, '8': 31.89, '7': 0.58, '6': 0.01, '5': 0.0, '4': 0.0, '3': 0.0, '10': 0.0, '2': 0.0, 'The': 0.0}


<task>Kacey is picking out jewelry to wear for school. She has 10 rings, 4 bracelets, and 4 necklaces. How many jewelry combinations are possible if she is going to wear 1 ring, 1 bracelet, and 2 necklaces?</task>

Last thought:
To find the total number of jewelry combinations, I need to multiply the number of choices for each type of jewelry: 10 rings, 4 bracelets, and the number of ways to choose 2 necklaces from 4.
correctness_scores = {'1': 99.99, '0': 0.01}
