In [None]:
# for google colab
# !pip install bitsandbytes

In [None]:
# for google colab
# from huggingface_hub import login
# login()

In [1]:
import transformers
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import os
import torch
import numpy as np
import re
from tqdm import tqdm
from scipy.special import softmax
from datetime import datetime

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
quantization_config = BitsAndBytesConfig(load_in_4bit=True)

In [3]:
torch_device='cuda'

In [4]:
model=AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3.2-3B-Instruct", 
                                           quantization_config=quantization_config, 
                                           torch_dtype=torch.float32, 
                                           device_map=torch_device)

tokenizer=AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-3B-Instruct")

Loading checkpoint shards: 100%|██████████████████████████████████████████████████████████| 2/2 [00:07<00:00,  3.69s/it]


In [5]:
tokenizer.pad_token=tokenizer.eos_token
tokenizer.pad_token_id=tokenizer.eos_token_id

In [6]:
eor_token = 'This is the end of this step' #Use this for trying to get a regex match to terminate generation

In [7]:
## KNOWLEDGE TEMPLATE
def generic_message_template(textinput: str):
    generic_messages = [
        {"role": "system",
         "content": f"""
                You are a helpful chatbot who will always give the correct answer.
                """
        },
        {"role": "user", "content": f"{textinput}"},
    ]

    return generic_messages

def knowledge_message_template(textinput: str):

    knowledge_messages = [
        {"role": "system", 
         "content": f"""
                You are a helpful chatbot who will always give the correct answer, and use chain of thought reasoning to think step by step through complicated problems. 
                Consider what information you would need to find, what operations you would need to solve the problem, and give reasoning for each step. 
                Make each step the smallest logical unit possible, and make each step 128 words or less.
                """
        },
        {"role": "user", "content": f"{textinput}"},
    ]

    return knowledge_messages

    ### other stuff I tried in the system chat head for knowledge message
    
        # , and output a {eor_token} at the END of each step of reasoning. Make your outputs for each step incredibly short. If you do not output a {eor_token}, you will be penalized heavily.
    
        #         Format your logic as follows:
    
        #         STEP 1: reasoning... {eor_token}
        #         STEP 2: reasoning... {eor_token}
        #         ...
        #         STEP n: reasoning... {eor_token}
    
        #         """,
    
    
                # For example, if asked the following:
                
                # Question:
                # 'If there are 10 birds on a branch, and 3 fly away, how many birds are left?'
    
                # Answer:
                # 1. There are 10 birds initially, and 3 fly away, which means we have 10-3 birds left on the branch. {EndOfChain}
                # 2. 10 - 3 = 7. {EndOfChain}
                # 3. Therefore, there are 7 birds left on the branch. {EndOfChain}

def reasoning_message_template(question: str, logic: str):

    reasoning_messages = [
        {"role": "system", 
         "content": f"""
               You are a critical chatbot who is supposed to evaluate how strong the reasoning and logic is of a user's input. They will always give you a question and some logic, reasoning, and potentially some maths, and you have to rate it on a scale of 1-100 for how logical it is, with 10 being the most
               logical and correct, and 1 being the least. Here are some examples:

               ----

               USER: Question: If I walked at 5km/h and my destination was 15km away, how long would it take me to walk to my destination and back?

               Reasoning: To solve this, I need to know how far I am walking, and the speed of my walk:

               Step 1: determine how far I am walking. Distance to destination is 15km, and I am walking back to where I started. Therefore, I am walking 15km there, and 15km back.
               Therefore, distance walked is 15km*2 = 30km

               Step 2: I know I am walking 5km/h, and I need to walk 30km. 30km divided by 5km/h is 30/5=6.
               
               Therefore it would take me 6 hours to walk to my destination and back

               SYSTEM: There are no logical flaws in its statements, therefore this answer is a 100 out of 100 in reasoning and logic.


               ----

               USER: Question: if I have 10 apples and I eat 5, how many apples do I have left to eat?

               Reasoning: To solve this, I need to know how many apples I started with, and how many I ate, and then subtract the number I ate from the number I started with.

               Step 1: I have 10 apples, and I eat 5, then I subtract 5 from 10. 10 - 5 = 7.

               Therefore, I have 7 apples left

               SYSTEM: Even though the reasoning is correct, the maths is wrong and therefore the answer is wrong. Because of this, this answer is a 40 out of 100. 

               ----

               USER: Question: What is the price of 10 beans, if 20 oranges cost $20, and each bean costs 1/3 of the price of an orange?

               Reasoning: To solve this, I need to work out the cost of one bean, and then mulitply by the number of beans.

               Step 1: the cost of one bean is $20 * 1/3, because a bean costs 1/3 of the price of an orange. $20 * 1/3 = $6.238176

               Step 2: multiply the cost of one bean by 10 to get the cost of 10 beans. $6.238176 * 10 = $523.893284

               Therefore the price of 10 beans is $523.893284

               SYSTEM: Because there are multiple logical errors and multiple arithmetic errors, this answer is a 1 out of 100. 

               ---

               Now rate the following:

                """
        },
        {"role": "user", "content": f"""
            Question: {question}
            Reasoning: {logic}
        
        """},
    ]
    return reasoning_messages

def tokenize_message(message):
    inputs_chattemp = tokenizer.apply_chat_template(message, tokenize=False, add_generation_prompt=True)
    inputs = tokenizer(inputs_chattemp, return_tensors='pt').to(torch_device)
    return inputs


    


In [8]:
question = 'If I ran around a rectangular park twice, where the longer side is 123 metres and the shorter side is 45 metres, how far did I run, and what is the area of the park?'

good_logic = """
To answer these questions, I need to find out the perimeter of the park, and then double it to find out the distance I ran, and then separately I must also multiply the two sides to get the area.

Step 1: The longer side of the park is 123 metres, and the shorter side is 45 metres. The perimeter is 2*123m + 2*45m = 336m.

Step 2: I ran around the park twice. Twice the perimeter = 2*336m = 672m

Therefore I ran 672m.

Step 3: To get the area, I must multiply both sides. 123m * 45m = 5535m2

Therefore, I ran 672m, and the area of the park is 5535m2
"""

bad_logic = """
To answer these questions, I need to find out the distance ran, and the area.

Step 1: The distance ran is twice the perimeter. The perimeter = 123m + 45m = 168m. Therefore the distance ran is 168m * 2 = 302m.

Step 2: the area of the park is equal to perimeter^2. Perimeter is 302m^2 = 8172m2.

Therefore I ran 302m, and the area of the park is 8172m2
"""

In [9]:
question = 'Yes or no: Would a pear sink in water?'

good_logic = """
The density of a pear is about 0.6 g/cm^3, which is less than water. Thus, a pear would float. Therefore, the answer is no.
"""

bad_logic = """
We do not have enough information to answer the question, but since most pears are not filled with air, the pear would sink. Therefore, the answer is yes.
"""

incomplete_logic = """
To determine if a pear would sink in water, we do not have enough information.

Most pears have no air in them
"""

In [10]:
# GENERIC PIPELINE

pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=512, do_sample = True)

In [17]:
question = 'Is 0.9 bigger than 0.11?'

question = '''
I am in France, in one of its most famous art museums. I am looking at its most famous painting. The name of the person who painted this picture is the same as the name of my favourite cartoon character growing up. What is the name of my favourite cartoon?
'''

In [18]:
# GENERIC QA

generic_output = pipe(generic_message_template(question), temperature = 0.1)
print(generic_output[0]['generated_text'][-1]['content'])

That's a interesting coincidence. However, I need a bit more information to answer your question. You mentioned that the person who painted the famous painting is the same as the name of your favorite cartoon character. Based on the fact that you're in France and looking at one of its most famous art museums, I'm going to take a guess that the painting you're looking at is the Mona Lisa.

If that's correct, I believe the cartoon character with the same name is SpongeBob SquarePants.


In [22]:
# CHAIN OF THOUGHT QA

CoT_output = pipe(knowledge_message_template(question), temperature = 0.1)
print(CoT_output[0]['generated_text'][-1]['content'])

To determine the name of your favorite cartoon character, I'll need to consider the connection between the famous painting and your favorite cartoon character. 

Step 1: Identify the famous painting in the museum. The painting is likely "Mona Lisa" by Leonardo da Vinci, as it is one of the most famous paintings in the Louvre Museum in Paris, France.

Step 2: Recall the name of your favorite cartoon character. Since the name of the person who painted the painting is the same as the name of your favorite cartoon character, I'll need to find the name of the person who painted the "Mona Lisa".

Step 3: Research the name of the person who painted the "Mona Lisa". The person who painted the "Mona Lisa" is Leonardo da Vinci.

Step 4: Determine the name of your favorite cartoon character. Since the name of the person who painted the "Mona Lisa" is Leonardo da Vinci, and you mentioned that the name of the person is the same as the name of your favorite cartoon character, I'll need to find the n

In [25]:
# MTCS chain of thought using only strict token limits
num_branches = 5

num_candidates = 2

max_iters = 6

current_iter = 0

final_branches = []

running_outputs = []

# Conditions to stop generating: if minimum number of final candidates met, if max iters hit, 
generating = True

while generating:
    starttime = datetime.now()
    
    if current_iter == 0:
        inputs_cot = [tokenize_message(knowledge_message_template(question))]
        branches_list=[]
    else:
        inputs_cot = [tokenizer(branch, return_tensors='pt').to(torch_device) for branch in branches_list]

    #save interim text outputs:
    running_outputs.append(branches_list)

    #re-init all interim variables
    branches_list = []

    logic_scores = []


    # generate initial number of branches
    print(f"generating branches for iteration {current_iter}")
    for candidate in inputs_cot:
        for i in tqdm(range(num_branches)):
            branchoutput = model.generate(**candidate, max_new_tokens=64, temperature=0.1).cpu()
            output_text = tokenizer.batch_decode(branchoutput)[0]
            branches_list.append(output_text)
    print(f"evaluating logic of {len(branches_list)} branches for iteration {current_iter}")
    #evaluate logic
    for output in tqdm(branches_list):
        #clean outputs
        chat_list = output.split('<|eot_id|>')
        # ADD LOGIC HERE TO GET GENERATION TO STOP AFTER THERE'S TWO 
    
        # MAKE THE LOGIC THAT WE MUST HIT MINIMUM NUMBER OF NUM_CANDIDATES BEFORE STOPPING GENERATING
        if len(chat_list) > 3:
            final_branches.append(output)
            branches_list.remove(output)
            continue
        
        # isolating the output logic
        logic = chat_list[2].replace("<|start_header_id|>assistant<|end_header_id|>\n", "")
        results = pipe(reasoning_message_template(question, logic), temperature = 0.1)
        results_text = results[-1]['generated_text'][-1]['content']
        #jank error handling for weird generations
        try:
            results_score = int(re.search('([\d]+)(?=\s+out of 10)', results_text).group(0))
        except:
            print(f"no valid string match for {output} results - the output was: \n {results_text}")
            results_score = 0
            # NOTE: this is potentially getting rid of valid and good responses, but I'm being super lazy to get an MVP loop working
            continue
    
        logic_scores.append(results_score)

    
    # if we reach the
    if len(final_branches) > num_candidates:
        generating = False
        print(f"stopping generation and iteration because we have more than {num_candidates} final outputs - we have {len(final_branches)} complete solutions to examine")
        final_branch_scores = []
        for solution in tqdm(final_branches):
            chat_list = output.split('<|eot_id|>')
            logic = chat_list[2].replace("<|start_header_id|>assistant<|end_header_id|>\n", "")
            results = pipe(reasoning_message_template(question, logic), temperature = 0.1)
            results_text = results[-1]['generated_text'][-1]['content']
            try:
                results_score = int(re.search('([\d]+)(?=\s+out of 10)', results_text).group(0))
            except:
                print(f"no valid string match for {output} results - the output was: \n {results_text}")
                results_score = 0
                # NOTE: this is potentially getting rid of valid and good responses, but I'm being super lazy to get an MVP loop working
                continue
            final_branch_scores.append(results_score)

        selection = np.argmax(final_branch_scores)

        final_output = final_branches[selection]
        print(final_output)
        print("~*~*~*~*~*~*~*~*~*~*~*~*~")
        print(f"Time spent thinking: {datetime.now()-starttime}")

    elif current_iter >= max_iters:
        generating = False
        print(f"stopping generation and iteration - we have exceeded {max_iters} iterations. We have {len(final_branches)} complete solutions and {len(branches_list)} dangling branches to examine.")
        final_branches = final_branches.extend(branches_list)
        final_branch_scores = []
        for solution in tqdm(final_branches):
            chat_list = output.split('<|eot_id|>')
            logic = chat_list[2].replace("<|start_header_id|>assistant<|end_header_id|>\n", "")
            results = pipe(reasoning_message_template(question, logic), temperature = 0.1)
            results_text = results[-1]['generated_text'][-1]['content']
            try:
                results_score = int(re.search('([\d]+)(?=\s+out of 10)', results_text).group(0))
            except:
                print(f"no valid string match for {output} results - the output was: \n {results_text}")
                results_score = 0
                # NOTE: this is potentially getting rid of valid and good responses, but I'm being super lazy to get an MVP loop working
                continue
            final_branch_scores.append(results_score)

        selection = np.argmax(final_branch_scores)

        final_output = final_branches[selection]
        print(final_output)
        print("~*~*~*~*~*~*~*~*~*~*~*~*~")
        print(f"Time spent thinking: {datetime.now()-starttime}")
    
    else:
        probs = softmax(logic_scores).tolist()
        chosen_keys = np.random.choice(len(probs), size=num_candidates, replace = False, p = probs).tolist()
        branches_list = [branches_list[index] for index in chosen_keys]
        current_iter +=1

            



generating branches for iteration 0


100%|█████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:20<00:00,  4.01s/it]


evaluating logic of 5 branches for iteration 0


100%|█████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:50<00:00, 10.15s/it]


generating branches for iteration 1


100%|█████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:22<00:00,  4.49s/it]
100%|█████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:24<00:00,  4.95s/it]


evaluating logic of 10 branches for iteration 1


100%|███████████████████████████████████████████████████████████████████████████████████| 10/10 [02:30<00:00, 15.08s/it]


generating branches for iteration 2


100%|█████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:18<00:00,  3.75s/it]
100%|█████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:21<00:00,  4.37s/it]


evaluating logic of 10 branches for iteration 2


 90%|███████████████████████████████████████████████████████████████████████████▌        | 9/10 [02:14<00:14, 14.94s/it]


generating branches for iteration 3


100%|█████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:09<00:00,  1.92s/it]
100%|█████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:08<00:00,  1.64s/it]


evaluating logic of 10 branches for iteration 3


 50%|████████████████████████████████████████▌                                        | 5/10 [00:00<00:00, 45689.59it/s]


stopping generation and iteration because we have more than 2 final outputs - we have 6 complete solutions to examine


100%|█████████████████████████████████████████████████████████████████████████████████████| 6/6 [01:33<00:00, 15.59s/it]

<|begin_of_text|><|begin_of_text|><|begin_of_text|><|begin_of_text|><|start_header_id|>system<|end_header_id|>

Cutting Knowledge Date: December 2023
Today Date: 26 Jul 2024

You are a helpful chatbot who will always give the correct answer, and use chain of thought reasoning to think step by step through complicated problems. 
                Consider what information you would need to find, what operations you would need to solve the problem, and give reasoning for each step. 
                Make each step the smallest logical unit possible, and make each step 128 words or less.<|eot_id|><|start_header_id|>user<|end_header_id|>

I am in France, in one of its most famous art museums. I am looking at its most famous painting. The name of the person who painted this picture is the same as the name of my favourite cartoon character growing up. What is the name of my favourite cartoon?<|eot_id|><|start_header_id|>assistant<|end_header_id|>

To solve this problem, I'll break it down into 




In [26]:
final_branch_scores

[60, 60, 60, 60, 60, 60]

In [27]:
for i in final_branches:
    print(i)
    print('~~~~~~~~~')

<|begin_of_text|><|begin_of_text|><|begin_of_text|><|begin_of_text|><|start_header_id|>system<|end_header_id|>

Cutting Knowledge Date: December 2023
Today Date: 26 Jul 2024

You are a helpful chatbot who will always give the correct answer, and use chain of thought reasoning to think step by step through complicated problems. 
                Consider what information you would need to find, what operations you would need to solve the problem, and give reasoning for each step. 
                Make each step the smallest logical unit possible, and make each step 128 words or less.<|eot_id|><|start_header_id|>user<|end_header_id|>

I am in France, in one of its most famous art museums. I am looking at its most famous painting. The name of the person who painted this picture is the same as the name of my favourite cartoon character growing up. What is the name of my favourite cartoon?<|eot_id|><|start_header_id|>assistant<|end_header_id|>

To solve this problem, I'll break it down into 