<a href="https://colab.research.google.com/github/abbey1203/Tree-of-Thought/blob/main/ToT8.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# PROMPT ENGINEERING
#TREE OF THOUGHTS
## Kieu Ngoc Nguyen
## Student ID: 103806243

# Install all packages and libraries


In [None]:
!python -m pip install python-dotenv openai guidance transformers datasets sentence-transformers scikit-learn rich rouge-score

In [None]:
OPENAI_API_KEY=''

In [None]:

import concurrent.futures
from abc import ABC, abstractmethod
import openai
import os
import guidance
import time
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import pipeline
import json
DATA_PATH = './data'
import logging
import argparse
from dotenv import load_dotenv

load_dotenv() # Load environment variables from a .env file.

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

api_key = os.getenv('OPENAI_API_KEY')

start to install package: redis
successfully installed package: redis
start to install package: redis-om
successfully installed package: redis-om


# AbstractLanguageModel




In [None]:

# Abstract base class for a language model.
class AbstractLanguageModel(ABC):
    @abstractmethod
    def generate_thoughts(self, state, k):
        pass

    @abstractmethod
    def evaluate_states(self, states):
        pass

# A custom language model class that extends the abstract base class.
class CustomLanguageModel(AbstractLanguageModel):
    def __init__(self, model):
        self.model = model

    def generate_thoughts(self, state, k):
        #implement the thought generation logic using self.model
        pass

    def evaluate_states(self, states):
        #implement state evaluation logic using self.model
        pass

In [None]:
class CustomLanguageModel(AbstractLanguageModel):
    def generate_thoughts(self, state, k):
        # Example logic: generate k thoughts based on the provided state using self.model
        thoughts = self.model.generate(state, k)
        return thoughts

    def evaluate_states(self, states):
        # Example logic: evaluate provided states using self.model
        evaluations = [self.model.evaluate(state) for state in states]
        return evaluations


# OpenAI Language Model
## The provided code is a more advanced, concrete implementation of the AbstractLanguageModel class, specifically tailored to interact with the OpenAI API.

The evaluate_states method takes two arguments besides self:

**states:** a list of states (thoughts) to evaluate.
initial_prompt **bold text**: The initial prompt or problem statement that the states are meant to address or solve.
The evaluation can follow two strategies, as defined by the evaluation_strategy attribute, which are **'value' and 'vote'**. The respective strategies are chosen based on the conditional branches inside the method
1. Value Strategy: If **self.evaluation_strategy == 'value'**, the method iterates through each state and asks the OpenAI model to evaluate it by providing a **floating-point number between 0 and 1.** Higher values mean the state is considered more effective or probable in solving the problem.

2. Vote Strategy: asked to vote for the best state from th list of states. This approach gives a binary evaluation - it picks the single state that is considered most likely to solve the problem and assigns a 1, while all other states receive a 0



In [None]:
class OpenAILanguageModel(AbstractLanguageModel):
    def __init__(self, api_key, strategy="cot", evaluation_strategy="value", api_base="", api_model="", enable_ReAct_prompting=True):
        os.getenv("OPENAI_API_KEY")
        if api_key == "" or api_key is None:
            api_key = os.environ.get("OPENAI_API_KEY", "")
        if api_key != "":
            openai.api_key = api_key
        else:
            raise Exception("Please provide OpenAI API key")

        if api_base == ""or api_base is None:
            api_base = os.environ.get("OPENAI_API_BASE", "")  # if not set, use the default base path of "https://api.openai.com/v1"
        if api_base != "":
            # e.g. https://api.openai.com/v1/ or your custom url
            openai.api_base = api_base
            print(f'Using custom api_base {api_base}')

        if api_model == "" or api_model is None:
            api_model = os.environ.get("OPENAI_API_MODEL", "")
        if api_model != "":
            self.api_model = api_model
        else:
            self.api_model = "text-davinci-003"
        print(f'Using api_model {self.api_model}')

        self.use_chat_api = 'gpt' in self.api_model

        # reference : https://www.promptingguide.ai/techniques/react
        self.ReAct_prompt = ''
        if enable_ReAct_prompting:
            self.ReAct_prompt = "Write down your observations in format 'Observation:xxxx', then write down your thoughts in format 'Thoughts:xxxx'."

        self.strategy = strategy
        self.evaluation_strategy = evaluation_strategy

    def openai_api_call_handler(self, prompt, max_tokens, temperature, k=1, stop=None):
        while True:
            try:
                if self.use_chat_api:
                    messages = [
                        {
                            "role": "user",
                            "content": prompt
                        }
                    ]
                    response = openai.ChatCompletion.create(
                        model=self.api_model,
                        messages=messages,
                        max_tokens=max_tokens,
                        temperature=temperature,
                    )
                else:
                    response = openai.Completion.create(
                        engine=self.api_model,
                        prompt=prompt,
                        n=k,
                        max_tokens=max_tokens,
                        stop=stop,
                        temperature=temperature,
                    )
                with open("openai.logs", 'a') as log_file:
                    log_file.write("\n" + "-----------" + '\n' +"Prompt : "+ prompt+"\n")
                return response
            except openai.error.RateLimitError as e: #If there's a rate limit error, it will sleep for a specified time and then retry.
                sleep_duratoin = os.environ.get("OPENAI_RATE_TIMEOUT", 30)
                print(f'{str(e)}, sleep for {sleep_duratoin}s, set it by env OPENAI_RATE_TIMEOUT')
                time.sleep(sleep_duratoin)

    def openai_choice2text_handler(self, choice): #Processes the response choice (message or text) based on whether the chat API is being used.
        if self.use_chat_api:
            text = choice['message']['content']
        else:
            text = choice.text.strip()
        return text

        # to store the thoughts in json file
    def store_in_json(self, data, filename="output.json"):
        """Stores data in a JSON file."""
        with open(filename, 'w') as file:
            json.dump(data, file)




    def generate_text(self, prompt, k):
        if self.use_chat_api:
            thoughts = []
            for _ in range(k):
                response = self.openai_api_call_handler(prompt, 1200, 0.5, k)
                text = self.openai_choice2text_handler(response.choices[0])
                thoughts += [text]
                print(f'thoughts: {thoughts}')
            return thoughts

        else:
            response = self.openai_api_call_handler(prompt, 1200, 0.5, k)
            thoughts = [self.openai_choice2text_handler(choice) for choice in response.choices]
            return thoughts

    def generate_thoughts(self, state, k, initial_prompt):
        if (type(state) == str):
            state_text = state
        else:
            state_text = '\n'.join(state)
        print("THIS IS WHERE IT GENERATE THE THOUGHTS BASING ON THE STATES:")
        print("We receive STATE of type", type(state), "For state: ", state, "\n\n")

        # prompt = f"Given the current state of reasoning: \n\n\n'{state_text}'\n\n\nGenerate the next best coherent thought to achieve the reasoning process and get the solution: "
        # prompt = f"Based on the current state of reasoning: \n\n\n'{state_text} Provide the next coherent thought that will help progress the reasoning process and reach an soluton "
        # prompt = f"These are the thoughts you've had: \n\n\n{state_text}, provide the next coherent thought that will help advance the reasoning process and reach an solution for this problem {initial_prompt}. Think sharply, think out of the box, predict failure. Do not leave any open questions. Unleash your mind."
        prompt = f"Considering the thoughts you've had until now: THE STATES ARE: \n\n{state_text}\n\nDevise the next coherent thought that will aid in advancing the reasoning process and achieving a solution to {initial_prompt}. Assess various scenarios, think unconventionally, anticipate potential challenges, and resolve any outstanding queries. Tap into your mind's full potential and make certain no open questions remain."

        prompt += self.ReAct_prompt
        print(prompt)
        thoughts = self.generate_text(prompt, k)

        # try comments for each thought generated.
        for idx, thought in enumerate(thoughts):
            print(f"Thought {idx + 1}: {thought}")
        return thoughts


        # print(thoughts)
        print(f"Generated thoughts: {thoughts}")
        return thoughts


    def generate_solution(self, initial_prompt, state):
        if (type(state) == str):
            state_text = state
        else:
            state_text = '\n'.join(state)

        prompt = f"Considering the reasoning provided:\n\n'{state_text}'\n\nDevise the best possible solution for the task: {initial_prompt}"
        answer = self.generate_text(prompt, 1)
        # print(thoughts)
        print(f"General solution : {answer}")
        self.store_in_json(answer)
        return answer

    def evaluate_states(self, states, initial_prompt):
        if self.evaluation_strategy == 'value':
            state_values = {}
            for state in states:
                state_text = ' '.join(state)
                print("We receive a state of type", type(state), "For state: ", state, "\n\n")
                prompt = f"Given the current state of reasoning: '{state_text}', evaluate its value as a float between 0 and 1, become very pessimistic think of potential adverse risks on the probability of this state of reasoning achieveing {initial_prompt} and DO NOT RESPOND WITH ANYTHING ELSE: OTHER THAN AN FLOAT"

                response = self.openai_api_call_handler(prompt, 500, 1)
                try:
                    value_text = self.openai_choice2text_handler(response.choices[0])
                    print(f'state: {value_text}')
                    value = float(value_text)
                    print(f"value: {value}")
                except ValueError:
                    value = 0  # Assign a default value if the conversion fails
                state_values[state] = value
            return state_values

        elif self.evaluation_strategy == 'vote':
            states_text = '\n'.join([' '.join(state) for state in states])

            prompt = f"Given the following states of reasoning, vote for the best state utilizing an scalar value 1-10:\n{states_text}\n\nVote, on the probability of this state of reasoning achieveing {initial_prompt} and become very pessimistic very NOTHING ELSE"

            response = self.openai_api_call_handler(prompt, 500, 1)

            print(f'state response: {response}')

            best_state_text = self.openai_choice2text_handler(response.choices[0])

            print(f"Best state text: {best_state_text}")

            best_state = tuple(best_state_text.split())

            print(f'best_state: {best_state}')

            return {state: 1 if state == best_state else 0 for state in states}

        else:
            raise ValueError("Invalid evaluation strategy. Choose 'value' or 'vote'.")



# OptimizedOpenAILanguageModel
This class is extension of OpenAILanguageModel

In [None]:
class OptimizedOpenAILanguageModel(OpenAILanguageModel):
    #Constructor Method
    def __init__(self, api_key, strategy="cot", evaluation_strategy="value", cache_enabled=True, api_base="", api_model="", enable_ReAct_prompting=False):
        super().__init__(api_key, strategy, evaluation_strategy, api_base, api_model, enable_ReAct_prompting) #Calls the constructor of the parent class
        self.cache_enabled = cache_enabled #A boolean that toggles whether caching is enabled.
        self.thought_cache = {}
        self.state_evaluation_cache = {}
          #thought_cache and state_evaluarion_cache are dictionaries to cache results of thought generation and state evaluation, respectively, to prevent redundant calculations.
    def parallel_generate_thoughts(self, states, k): #generate thoughts for multiple states simultaneously.
        print(f"=== DEBUG ===\nStates: {states}, k: {k}")
        with concurrent.futures.ThreadPoolExecutor() as executor:
            thoughts = list(executor.map(lambda state: self.generate_thoughts(state, k), states))
            print(f"=== DEBUG ===\nGenerated thoughts: {thoughts}")
            # print(f"Parallel generated thoughts: {thoughts}")
        return thoughts

    def parallel_evaluate_states(self, states, initial_prompt):#this method also utilizes parallel processing, but for evaluating states.
        with concurrent.futures.ThreadPoolExecutor() as executor:
            state_values = list(executor.map(self.evaluate_states, states, initial_prompt))
            print(f"Parallel evaluated state values: {state_values}")
        return state_values

# TREE OF THOUGHTS

1. Init(self,model, search_algorithm):
- model: an instance of a model to geneate and evaluate thoughts.
- search_algorithm: indicate whether to use Breadth-First Seach (BFS) or Depth First Search (DFS) in the solution finding process.
2. solve(): initiates a search for a solution, specifying various search parameters, and depending on the chosen algorithms
3. tot_bsf(self, x,k,T,b): limiting the exploratio to 'T'steps and choosing the top 'b'- the numbers of most promising states.
4. tot_dfs: conduct DFSm cutiing off branches that don't exceed the value threshold 'vth' and recursively exploring promising branches to maximum depth 'T'
5. save_tree_to_json: saves the search tree into a JSON file specified by 'file_name'
6. print_tree: a method that visualize the tree of thoughts by traversing through it and assembling information into a nested dictionary structure.

In [None]:
class TreeofThoughts:
    """
    1. Thought Decomposition --> based on problem properties

    2. Thought Generator -> create a thought generator function G(p0, s, k) with 2 strategies a sample iid thoughts from a cot prompt b. propose thoughts
    sequentially using a propose prompt

    3. create a state evaluator function V(p0, S) with 2 strategies a value each state independently b. vote across states

    4. Choose a search algo based on tree structure [BFS or DFS]

    Implement chosen search algorithm for bfs (algo1):
        init S0 with the input x
        for t = 1 to T (step limit):
            generate candidate thoughts for each state in St-1
            eveluate the candiate states using the state evaluator V
            select the b most promising states for St

        return the final output by genertaing the thought for the best state in St for DFS(algo2)

        defien a recurseive DFS function with the current state s, step t, and other required params

        if t > T record the output by generating the thought for current state S

        for each candidate state s in the sorted list of generated thoughts for s:

            if the evaluated value of s is greater the the threshold of vth call the dfs function recursively
            with s and t + 1

    execute the chosen search algo with the input problem, thought generator, and state evaluator, and other required params
    """

    def __init__(self, model, search_algorithm):
        self.model = model
        self.search_algorithm = search_algorithm
        self.tree = {
            "nodes": [],
            "metrics": {
                "thoughts": [],
                "evaluations": []
            }
        }
        self.df = pd.DataFrame(columns =['Thought','Evaluation','Best Solution']) #Initial the DataFrame

    def solve(self, x, k=None, T=None, b=None, vth=None, timeout=None, confidence_threshold=None, max_iterations=None, convergence_threshold=None, convergence_count=None):
        #intended to find a solution to a problem instance x using the configured search algorithm (BFS or DFS) with other parameters.
        start_time = time.time()
        file_name = f"logs/tree_of_thoughts_output_{self.search_algorithm}.json"
        try:
            if self.search_algorithm == 'BFS':
                while timeout is None or time.time() - start_time < timeout:
                    result = self.tot_bfs(x, k, T, b) #b is number of promising states
                    if result:
                        self.save_tree_to_json(file_name)
                        return result
            elif self.search_algorithm == 'DFS':
                while timeout is None or time.time() - start_time < timeout:
                    result = self.tot_dfs(x, k, T, vth) #Value threshold for DFS
                    if result:
                        self.save_tree_to_json(file_name)
                        return result
            else:
                raise ValueError("Invalid search algorithm. Choose 'BFS' or 'DFS'.")
        except KeyboardInterrupt:
            logger.error("Keyboard interrupt detected.")
        except ValueError as e:
            logger.error(f"Error: {e}")
        finally:
            logger.info("Saving the current tree and metrics.")
            self.save_tree_to_json(file_name)



    def tot_bfs(self, x, k, T, b):
        S0 = {x}
        for t in range(1, T + 1):
            S0_t = set()
            for s in S0:
                for z in self.model.generate_thoughts(s, k, x):
                    if (type(s) == str):
                        S0_t.add((s, z))
                    else:
                        S0_t.add((*s, z))
            Vt = self.model.evaluate_states(S0_t, x)
            St = sorted(S0_t, key=lambda s: Vt[s], reverse=True)[:b]
            S0 = set(St)

            logger.info(f'Step: {t}, S0_t: {S0_t}, Vt: {Vt}, St: {St}, S0: {S0}')



        best_state = max(St, key=lambda s: Vt[s])

        return best_state


    def tot_dfs(self, x, k, T, vth, pruning_threshold=0.5, confidence_threshold=None, max_iterations=None, convergence_threshold=None, convergence_count=None):
        output = [] #List to store potential solutions (thoughts) and their evaluations.
        iteration_count = 0
        consecutive_convergence_count = 0
        prev_best_value = None
        file_name = f"logs/tree_of_thoughts_output_{self.search_algorithm}.json"


        def dfs(s, t): #A nested function to perform the recursive DFS. It takes s (the current state) and t (the current depth of search) as parameters.
            nonlocal consecutive_convergence_count, prev_best_value, iteration_count, output
            if t > T: #the search is too deep and must be curtailed. It generates a thought from the model for the current state s, evaluates it, and appends it along with its evaluation to output.
                thought = self.model.generate_thoughts(s, 1, x)
                print(f'thoughts inside dfs {thought}')

                value = self.model.evaluate_states({tuple(s)}, x)[tuple(s)]
                # # Add the thought and its evaluation to the DataFrame
                self.df = self.df.append({'Thought': thought, 'Evaluation': value}, ignore_index=True)
                print(f'values inside dfs {value}')

                output.append((thought, value))
                print(f'output {output}')

                if confidence_threshold is not None and value >= confidence_threshold:
                    return True

                if prev_best_value is not None and convergence_threshold is not None:
                    if abs(value - prev_best_value) < convergence_threshold:
                        consecutive_convergence_count += 1
                    else:
                        consecutive_convergence_count = 0

                prev_best_value = value
                iteration_count += 1

                if (max_iterations is not None and iteration_count >= max_iterations) or (convergence_count is not None and consecutive_convergence_count >= convergence_count):
                    return True

                return False

            for s_prime in sorted(self.model.generate_thoughts(s, k, x)):
                state_value = self.model.evaluate_states({s_prime}, x)[s_prime]
                logger.info(f"State: {s_prime}, Value: {state_value}")

                if state_value > vth and (pruning_threshold is None or state_value >= pruning_threshold):
                    if (type(s) == str):
                        child = (s, s_prime)
                    else:
                        child = (*s, s_prime)
                    # self.tree['nodes'][child] = s
                    # self.tree["metrics"]["thoughts"][child] = s_prime
                    # self.tree["metrics"]["evaluations"][child] = state_value

                    if dfs(child, t + 1):
                        return True

            self.save_tree_to_json(file_name)
            return False


        dfs(x, 4)
        print(f'output  {output}')
        best_state = max(output, key=lambda x: x[1])
        return best_state[0]

    # Function to save the DataFrame to a CSV
    def save_dataframe(self, file_name):
        self.df.to_csv(file_name, index=False)

    def save_tree_to_json(self, file_name): #Intended to save the current state of the tree to a JSON file.
        os.makedirs(os.path.dirname(file_name), exist_ok=True)

        with open(file_name, 'w') as json_file:
            json.dump(self.tree, json_file, indent=4)

    def print_tree(self, x, node=None, depth=0):
        if node is None:
            node = self.tree["nodes"][x]

        thought = self.tree["metrics"]["thoughts"][node]
        evaluation = self.tree["metrics"]["evaluations"][node]

        tree_info = {
            "node": node,
            "thought": thought,
            "evaluation": evaluation,
            "children": []
        }

        for child, parent in self.tree["nodes"].items():
            if parent == node:
                child_info = self.print_tree(child, depth + 1)
                tree_info["children"].append(child_info)

        return tree_info




**"OptimizedTreeofThoughts" class**: is a subclass of "TreeofThoughts', inherits the attributes and methods from the class. The 'solve' method in OptimizedTreeofThoughts is an override of the solve method in the TreeofThoughts class.
- Method 'solve': attempts to solve a problem 'x' using a chosen search algorithm. Parameter: k, T,b, vth: are configurations and thresholds for the search algorithms and the problem-solving process:
  * x: the problem instance to solve
  * k: number of the thought generate
  * T: step limit for the search algorithm
  * b: number of most promising states to consider (applicable to BFS only)
  * vtb: value threshold (applicable in DFS only)
  * timeout: maximum time allowed for the method to try to find a solution.
  * confidence_threshold, max_iterations, convergence_threshold and covergence_count: various optional parameters for addtional control and fine-tuning of the solution process.

- BFS Algorithm : the method enters a loop that continues until a solution is found or until the specific timeout is reached.
 * it calls the tot_bfs method with the problem instance x and the additional parameters to try to find a solution using BFS. It prints the resulting solution.
 * if a solution is found, it returns the solution and exits.
- DFS Algoritm:
 * it calls tot_dfs to try to find a solution using DFS, with a slightly different set of parameters to align with the different nature of DFS. It considers the value of threshold 'vth'.
- If an invalid search algorithm is specified, it raises a 'ValueError;

In [None]:
class OptimizedTreeofThoughts(TreeofThoughts):
    def solve(self, x, k=None, T=None, b=None, vth=None, timeout=None, confidence_threshold=None, max_iterations=None, convergence_threshold=None, convergence_count=None):
        #k: number of thoughts, T: step limit, b = Number of most promising states, vth:Value threshold for DFS
        start_time = time.time()
        print(f'Start time {start_time}')
        if self.search_algorithm == 'BFS':
            while timeout is None or time.time() - start_time < timeout:
                result = self.tot_bfs(x, k, T, b)
                print(f'result in optimized tree of thoughts: {result}')
                if result:
                    return result
        elif self.search_algorithm == 'DFS':
            while timeout is None or time.time() - start_time < timeout:
                result = self.tot_dfs(x, k, T, vth, confidence_threshold=confidence_threshold, max_iterations=max_iterations, convergence_threshold=convergence_threshold, convergence_count=convergence_count)
                if result:
                    return result
        else:
            raise ValueError("Invalid search algorithm. Choose 'BFS' or 'DFS'.")

# Applying ToT with HuggingFace Dataset



Download the Medical Dataset from HuggingFace (https://huggingface.co/datasets/danielpark/MQuAD-v1)

The HuggingFace Dataset comprises four columns: Questions, Answers, Embedding_questions, and Embedding_answers, offering information pertinent to medical issues.

We utilize ToT to generate answers for any questions related to medical topics. A comparison is conducted between the questions sourced from the Datasets and those from ChatGPT.


In [None]:
import pandas as pd

# Set the max column width to some reasonable value
pd.set_option('display.max_colwidth', 20)

#If you want to limit the width of the whole output
pd.set_option('display.width', 80)


In [None]:
# @title Download Dataset


# Load packages
import datasets as ds
from datasets import Dataset, DatasetDict, load_dataset, load_metric
dataset = load_dataset("danielpark/MQuAD-v1")

#check where it was store as cache
dataset.cache_files


# Access a specific split
train_dataset = dataset['train']

# Access the features of the split
train_features = train_dataset.features

print(train_features)

num_train_samples = len(train_dataset)

print(f"Number of train samples: {num_train_samples}")

Downloading readme:   0%|          | 0.00/1.15k [00:00<?, ?B/s]

Downloading data files:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/546M [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/1 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

{'question': Value(dtype='string', id=None), 'answer': Value(dtype='string', id=None), 'Q_FFNN_embeds': Value(dtype='string', id=None), 'A_FFNN_embeds': Value(dtype='string', id=None)}
Number of train samples: 23802


In [None]:
# @title Extract Questions and Answers from Dataset:
 ## question 3rd in the dataset
q3 = train_dataset.select([3])['question']

 #Print the answer for the above questions.
a3 = train_dataset.select([3])['answer']

print("Question 3 is: ", q3)
print("Ground-truth answer: ",a3)


Question 3 is:  ['i have had a pneumonia shot can i get either a sinus infection or walking pneumonia from my 6 year old grand daughter  and can i be a carrier to others in my age group']
Ground-truth answer:  ['you can always catch an illness from a child but most of these are viral like colds your pneumonia shot only protects you from certain types of bacterial pneumonia pneumococcus it would not protect you at all from viruses any respiratory virus can cause pneumonia or sinusitis if you are prone to these types of infections']


In [None]:
# @title Apply ToT model to question 3

search_algorithm = "DFS"
strategy = "cot"
evaluation_strategy="vote"

#create instance
model = OptimizedOpenAILanguageModel('OPENAI_API_KEY', api_model="gpt-3.5-turbo")
tree_of_thoughts = OptimizedTreeofThoughts(model, search_algorithm)


# input_problem = "using question from Dataset in HuggingFace"
class args:
    problem = q3
    search_algorithm = "DFS"
    k = 3
    T = 4
    b = 5
    vth = 0.4
    timeout = 10
    confidence = 0.8
    max_iterations = 40
    convergence_threshold = 0.01
    convergence_count = 5

#solve the problem using the tree of thoughts class
optimized_tree_of_thoughts = OptimizedTreeofThoughts(model, search_algorithm=args.search_algorithm)

#solve the porblem using tree of thoughts problem helper
best_state = optimized_tree_of_thoughts.solve(args.problem, k=args.k, T=args.T, b=args.b, vth=args.vth)

#generate the final silution
final_solution_3= optimized_tree_of_thoughts.model.generate_solution(best_state, args.problem)



#print the final solutions
print(f"THE FINAL SOLUTION IS:  {final_solution_3}")

# # After processing, save the DataFrame to a CSV:
optimized_tree_of_thoughts.save_dataframe('results.csv')




Using api_model gpt-3.5-turbo
Start time 1698618704.5722172
THIS IS WHERE IT GENERATE THE THOUGHTS BASING ON THE STATES:
We receive STATE of type <class 'list'> For state:  ['i have had a pneumonia shot can i get either a sinus infection or walking pneumonia from my 6 year old grand daughter  and can i be a carrier to others in my age group'] 


Considering the thoughts you've had until now: THE STATES ARE: 

i have had a pneumonia shot can i get either a sinus infection or walking pneumonia from my 6 year old grand daughter  and can i be a carrier to others in my age group

Devise the next coherent thought that will aid in advancing the reasoning process and achieving a solution to ['i have had a pneumonia shot can i get either a sinus infection or walking pneumonia from my 6 year old grand daughter  and can i be a carrier to others in my age group']. Assess various scenarios, think unconventionally, anticipate potential challenges, and resolve any outstanding queries. Tap into your m

  self.df = self.df.append({'Thought': thought, 'Evaluation': value}, ignore_index=True)


state: 0.85
value: 0.85
THIS IS WHERE IT GENERATE THE THOUGHTS BASING ON THE STATES:
We receive STATE of type <class 'tuple'> For state:  ('i have had a pneumonia shot can i get either a sinus infection or walking pneumonia from my 6 year old grand daughter  and can i be a carrier to others in my age group', "To advance the reasoning process and find a solution to the question, it is essential to consider the following points:\n\n1. Understanding the effectiveness of the pneumonia shot: Research and consult with healthcare professionals to determine the effectiveness of the pneumonia shot you received. It is important to know if the shot provides protection against both sinus infections and walking pneumonia.\n\n2. Assessing the transmission of infections: Evaluate the modes of transmission for sinus infections and walking pneumonia. Determine if these infections can be transmitted through close contact with your 6-year-old granddaughter, such as sharing utensils or being in close prox

  self.df = self.df.append({'Thought': thought, 'Evaluation': value}, ignore_index=True)


state: Unfortunately, as an AI language model, I cannot directly evaluate the value of a given statement as a float between 0 and 1. My purpose is to provide information and assist with answering questions to the best of my abilities.
output  [(["To further advance the reasoning process and achieve a solution to the question, it is important to consider the following:\n\n6. Evaluate your grandchild's health status: Assess whether your grandchild currently has a sinus infection or walking pneumonia. If she is showing symptoms or has been diagnosed, the risk of transmission may be higher. However, it is still crucial to consult with a healthcare professional to understand the specific pathogens involved and their transmission dynamics.\n\n7. Consider the symptoms and duration of illnesses: Sinus infections and walking pneumonia have distinct symptoms and durations. Sinus infections typically present with facial pain, congestion, and nasal discharge, while walking pneumonia may cause pers

In [None]:
ai_solution_3 = final_solution_3[0]
reference_text_3 = a3[0]


# 5 METRICS: BARTScore, BLEU, METEOR, ROUGE, and FactCC
BARTScore, BLEU, METEOR, ROUGE, and FactCC are metrics commonly used in natural language processing (NLP) to evaluate the quality of machine-generated text, particularly in tasks like text summarization, machine translation, and text generation.

1. **BARTScore**:
   - BARTScore is a metric that uses the BART model, a denoising autoencoder for pretraining sequence-to-sequence models, to score the similarity between generated and reference texts.
   - It computes the likelihood of the generated text being produced from the reference text, treating it as a noising process, and uses this as a score to measure the quality of the generated text.
   - To use BARTScore, you need a machine learning environment (like Python) with the necessary libraries (like Hugging Face's Transformers) installed. You would use the BART model within this environment to compute BARTScores for your generated text compared to your reference text.

2. **BLEU (Bilingual Evaluation Understudy)**:
   - BLEU is a metric for evaluating the quality of text which has been machine-translated from one natural language to another. It measures how many words, phrases, and sentences overlap (match exactly) between the generated text and a set of reference texts.
   - BLEU can be computed using natural language processing libraries like NLTK (Natural Language Toolkit) in Python. It involves tokenizing the texts into words (or sometimes subwords or characters), then counting the matching tokens, and normalizing by the lengths of the texts.

3. **METEOR (Metric for Evaluation of Translation with Explicit Ordering)**:
   - METEOR is similar to BLEU but considers not only the exact word matches but also the synonyms and the stemming forms of the words. It also explicitly rewards reordering of phrases and penalizes word mismatches.
   - Like BLEU, METEOR is often used in machine translation and can be calculated using libraries like NLTK in Python.

4. **ROUGE (Recall-Oriented Understudy for Gisting Evaluation)**:
   - ROUGE is used primarily in evaluating text summarization systems. It compares the overlap between the n-grams, word sequences of length n, in the generated text and reference text(s). ROUGE-L, specifically, considers the longest common subsequence (LCS) of words in its calculation.
   - There are several variants (like ROUGE-N, ROUGE-L, ROUGE-W, etc.), each considering different types of overlap. These can be computed using Python libraries like `rouge-score` or `pyrouge`.

5. **FactCC**:
   - FactCC is a metric designed to evaluate factual consistency in text summarization. It uses a pre-trained BERT model to predict if the generated summary contradicts, entails, or is neutral to the source document.
   - Implementing FactCC requires a machine learning environment with a library that includes BERT (like Hugging Face's Transformers).
   - FactCC evaluates whether a source document as a whole implies a generated sentence.

## ROUGE Score

In [None]:
from rouge_score import rouge_scorer
reference_text_3 = a3[0]
generated_text_3 = ai_solution_3

In [None]:
scorer = rouge_scorer.RougeScorer(['rouge1', 'rougeL'], use_stemmer =True)

scores = scorer.score(reference_text_3, generated_text_3)

print(scores)


{'rouge1': Score(precision=0.07874015748031496, recall=0.5357142857142857, fmeasure=0.13729977116704806), 'rougeL': Score(precision=0.047244094488188976, recall=0.32142857142857145, fmeasure=0.08237986270022885)}


- rouge1: provides scores based on the overlap of individual words,
- rougeL: focus on the longest common subsequence between 2 texts.
- the scores: precision, recall and F1-score
1. rouge1:
- Precision: is about 0.092 which means about 9.2% of the words in the generated sentence are also in the reference sentence.
- Recall is 51.78% of the words in reference summary is also in the generated summary
- F-measure is a harmonic  mean of precision anf recall and is 0.1571
2. RougeL:
- Precision: 0.04792
- Recall: 0.267
- F-measure: 0.0813

## BARTScore

In [None]:
from transformers import BartModel, BartTokenizer
import torch
from scipy.spatial.distance import cosine



# Load pre-trained model and tokenizer
model_name = 'facebook/bart-large'
model = BartModel.from_pretrained(model_name)
tokenizer = BartTokenizer.from_pretrained(model_name)

# Sentences you want to compare
reference_sentence_3 = a3[0]
generated_sentence_3 = ai_solution_3

# Encode sentences and get BART's output (hidden states)
# This will give us contextual embeddings of each token in the sentence
input_ids_ref_3 = tokenizer.encode(reference_sentence_3, return_tensors='pt', add_special_tokens=True)
input_ids_gen_3 = tokenizer.encode(generated_sentence_3, return_tensors='pt', add_special_tokens=True)

with torch.no_grad():  # Disable gradient updates
    model_output_ref_3 = model(input_ids_ref_3)
    model_output_gen_3 = model(input_ids_gen_3)

# We'll use the mean of the last hidden states from BART as sentence embeddings
sentence_embedding_ref_3 = model_output_ref_3.last_hidden_state.mean(dim=1)
sentence_embedding_gen_3 = model_output_gen_3.last_hidden_state.mean(dim=1)


# Convert torch tensor to numpy array and ensure it's 1-D by accessing the 0th element
sentence_embedding_ref_1d_3 = sentence_embedding_ref_3.squeeze().numpy()  # Squeeze the tensor to make it 1-D
sentence_embedding_gen_1d_3 = sentence_embedding_gen_3.squeeze().numpy()

# Calculate the cosine similarity between the embeddings
similarity = 1 - cosine(sentence_embedding_ref_1d_3, sentence_embedding_gen_1d_3)

print(f"Similarity score: {similarity}")


Downloading (…)lve/main/config.json:   0%|          | 0.00/1.63k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/1.02G [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Similarity score: 0.8400664925575256


In [None]:
similarity_scores.append((3, similarity))

## BLEU SCORE

In [None]:
import nltk
nltk.download('punkt')
nltk.download('wordnet')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...


True

In [None]:
from nltk.translate.bleu_score import sentence_bleu
from nltk.translate.meteor_score import meteor_score
from nltk.tokenize import word_tokenize

In [None]:
tokenized_refr = reference_sentence_3.split()
tokenized_gen = generated_sentence_3.split()

In [None]:
bleu_score = sentence_bleu([tokenized_refr], tokenized_gen)
print(f"BLEU score: {bleu_score}")

BLEU score: 2.4909844687582575e-155


The hypothesis contains 0 counts of 3-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 4-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()


The BLEU Score is between 0 and 1. In this case, it is practically zero suggesting that the generated sentence has very low overlap with the reference sentence in terms of n-gram matches.

The warnings suggest that the hypothesis (generated text) lacks 3-gram and 4-gram overlaps with the reference, which indicates a lack of longer phrase matches.

## METEOR Score:

In [None]:
print(reference_sentence_3)
print(generated_sentence_3)

you can always catch an illness from a child but most of these are viral like colds your pneumonia shot only protects you from certain types of bacterial pneumonia pneumococcus it would not protect you at all from viruses any respiratory virus can cause pneumonia or sinusitis if you are prone to these types of infections
To further advance the reasoning process and achieve a solution to the question, it is important to consider the following:

1. Evaluate your grandchild's health status: Assess whether your grandchild currently has a sinus infection or walking pneumonia. If she is showing symptoms or has been diagnosed, the risk of transmission may be higher. However, it is still crucial to consult with a healthcare professional to understand the specific pathogens involved and their transmission dynamics.

2. Consider the symptoms and duration of illnesses: Sinus infections and walking pneumonia have distinct symptoms and durations. Sinus infections typically present with facial pain,

In [None]:
reference = word_tokenize(reference_sentence_3)
generate = word_tokenize(generated_sentence_3)

In [None]:
meteor_score_result = meteor_score([reference], generate)
print(f"METEOR score: {meteor_score_result}")

METEOR score: 0.16524520255863542


The score is 0.165, suggesting moderate overlap or similarity between the reference and the generated text.

In [None]:
import torch
from transformers import BertTokenizer, BertForSequenceClassification

# Define the number of epochs for fine-tuning
epochs = 3  # for example, you can adjust based on your needs

# 1. Fine-tune a classifier on a dataset of sentence pairs:
def fine_tune_classifier(training_data):
    # Load pre-trained BERT model and tokenizer
    model = BertForSequenceClassification.from_pretrained('bert-base-uncased')
    tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

    # Fine-tuning loop (pseudo-code)
    for epoch in range(epochs):
        for src_sentence, gen_sentence, label in training_data:
            inputs = tokenizer(src_sentence, gen_sentence, return_tensors='pt', padding=True, truncation=True)
            label_tensor = torch.tensor([label])
            outputs = model(**inputs, labels=label)
            loss = outputs.loss
            # Backpropagate and update model weights...

    return model

from nltk.tokenize import sent_tokenize

def extract_claims_from_summary(summary):
    return sent_tokenize(summary)



In [None]:
# 2. Generate claims from the summarized/generated text:
def extract_claims_from_summary(summary):
    # Pseudo-code: Split summary into sentences or meaningful chunks
    return claims



# @ Loading the CSV File

In [None]:
# Load the DataFrame from CSV
df = pd.read_csv('results.csv')

# Add columns to the DataFrame
df['Best Solution'] = ai_solution_3
df['Human Solution'] = reference_text_3
df['Similarity Score'] = similarity

# Save the updated DataFrame to the same CSV
df.to_csv('results_1.csv', index=False)

print(df.head())

               Thought  Evaluation        Best Solution       Human Solution  \
0  ["To further adv...         0.6  To further advan...  you can always c...   
1  ["Considering th...         0.3  To further advan...  you can always c...   

   Similarity Score  
0          0.840066  
1          0.840066  


# OTHER

In [None]:
# @title Extract Questions and Answers from Dataset:
 ## question 3rd in the dataset
q11 = train_dataset.select([11])['question']

 #Print the answer for the above questions.
a11 = train_dataset.select([11])['answer']

print("Question 11 is: ", q11)
print("Ground-truth answer: ",a11)

Question 11 is:  ['can you be allergic to mold in your food']
Ground-truth answer:  ['a person can have an allergic reaction to just about anything so that would include food or molds']


In [None]:
search_algorithm = "DFS"
strategy = "cot"
evaluation_strategy="vote"

#create instance
model = OptimizedOpenAILanguageModel('OPENAI_API_KEY', api_model="gpt-3.5-turbo")
tree_of_thoughts = OptimizedTreeofThoughts(model, search_algorithm)


# input_problem = "using question from Dataset in HuggingFace"
class args:
    problem = q11
    search_algorithm = "DFS"
    k = 2
    T = 4
    b = 5
    vth = 0.4
    timeout = 10
    confidence = 0.8
    max_iterations = 40
    convergence_threshold = 0.01
    convergence_count = 5

#solve the problem using the tree of thoughts class
optimized_tree_of_thoughts = OptimizedTreeofThoughts(model, search_algorithm=args.search_algorithm)

#solve the porblem using tree of thoughts problem helper
best_state = optimized_tree_of_thoughts.solve(args.problem, k=args.k, T=args.T, b=args.b, vth=args.vth)

#generate the final silution
final_solution_11= optimized_tree_of_thoughts.model.generate_solution(best_state, args.problem)



#print the final solutions
print(f"THE FINAL SOLUTION IS:  {final_solution_11}")

# # After processing, save the DataFrame to a CSV:
optimized_tree_of_thoughts.save_dataframe('results.csv')

Using api_model gpt-3.5-turbo
Start time 1698620855.7810106
THIS IS WHERE IT GENERATE THE THOUGHTS BASING ON THE STATES:
We receive STATE of type <class 'list'> For state:  ['can you be allergic to mold in your food'] 


Considering the thoughts you've had until now: THE STATES ARE: 

can you be allergic to mold in your food

Devise the next coherent thought that will aid in advancing the reasoning process and achieving a solution to ['can you be allergic to mold in your food']. Assess various scenarios, think unconventionally, anticipate potential challenges, and resolve any outstanding queries. Tap into your mind's full potential and make certain no open questions remain.
thoughts: ['To advance the reasoning process and find a solution to the question "can you be allergic to mold in your food," it is essential to consider the following:\n\n1. Research existing literature: Explore scientific studies, medical journals, and reputable sources to determine if there is any documented evide

  self.df = self.df.append({'Thought': thought, 'Evaluation': value}, ignore_index=True)


state: 0.8
value: 0.8
THIS IS WHERE IT GENERATE THE THOUGHTS BASING ON THE STATES:
We receive STATE of type <class 'tuple'> For state:  ('can you be allergic to mold in your food', 'To advance the reasoning process and find a solution to the question "can you be allergic to mold in your food," it is essential to consider the following:\n\n1. Research existing literature: Explore scientific studies, medical journals, and reputable sources to determine if there is any documented evidence of food allergies specifically caused by mold.\n\n2. Consult medical professionals: Seek advice from allergists or immunologists who specialize in food allergies. They can provide insights into the potential allergenic properties of mold and its impact on human health.\n\n3. Understand types of mold: Investigate the different types of molds commonly found in food and their potential allergenicity. Some molds may produce mycotoxins, which can cause adverse reactions in sensitive individuals.\n\n4. Conside

  self.df = self.df.append({'Thought': thought, 'Evaluation': value}, ignore_index=True)


thoughts: ['Based on the information gathered, it is possible to conclude that individuals can be allergic to mold in their food. However, it is important to note that the severity and prevalence of these allergies may vary among individuals. Further research and consultation with medical professionals are necessary to determine the specific types of mold that can cause allergies and to understand the potential challenges in diagnosing and managing mold-induced food allergies. By considering these factors and exploring unconventional perspectives, a more comprehensive understanding of mold allergies in food can be achieved, leading to effective solutions for individuals with these allergies.\n\nPossible solutions for individuals with mold allergies in their food include:\n\n1. Avoidance: Individuals with mold allergies should be advised to avoid consuming foods that are known to be prone to mold growth, such as aged cheeses, fermented foods, and foods with visible mold growth. They sho

In [None]:
ai_solution_11 = final_solution_11[0]
reference_text_11 = a11[0]

In [None]:
# Load pre-trained model and tokenizer
model_name = 'facebook/bart-large'
model = BartModel.from_pretrained(model_name)
tokenizer = BartTokenizer.from_pretrained(model_name)
# Sentences you want to compare
reference_sentence_11 = a11[0]
generated_sentence_11 = ai_solution_11

# Encode sentences and get BART's output (hidden states)
# This will give us contextual embeddings of each token in the sentence
input_ids_ref_11 = tokenizer.encode(reference_sentence_11, return_tensors='pt', add_special_tokens=True)
input_ids_gen_11 = tokenizer.encode(generated_sentence_11, return_tensors='pt', add_special_tokens=True)

# print(input_ids_ref)
# print(input_ids_gen)

with torch.no_grad():  # Disable gradient updates
    model_output_ref_11 = model(input_ids_ref_11)
    model_output_gen_11 = model(input_ids_gen_11)

# We'll use the mean of the last hidden states from BART as sentence embeddings
sentence_embedding_ref_11 = model_output_ref_11.last_hidden_state.mean(dim=1)
sentence_embedding_gen_11 = model_output_gen_11.last_hidden_state.mean(dim=1)


# Convert torch tensor to numpy array and ensure it's 1-D by accessing the 0th element
sentence_embedding_ref_1d_11 = sentence_embedding_ref_11.squeeze().numpy()  # Squeeze the tensor to make it 1-D
sentence_embedding_gen_1d_11 = sentence_embedding_gen_11.squeeze().numpy()

# Calculate the cosine similarity between the embeddings
similarity_11 = 1 - cosine(sentence_embedding_ref_1d_11, sentence_embedding_gen_1d_11)

print(f"Similarity score: {similarity_11}")


Similarity score: 0.7299422025680542


In [None]:
similarity_scores.append((11, similarity_11))

In [None]:
# @title Extract Questions and Answers from Dataset:
 ## question 3rd in the dataset
q18 = train_dataset.select([18])['question']

 #Print the answer for the above questions.
a18 = train_dataset.select([18])['answer']

print("Question 18 is: ", q18)
print("Ground-truth answer: ",a18)

Question 18 is:  ['why would a rn choose not to get her kids a flu shot as the grandparent is there anything i can do']
Ground-truth answer:  ['you would just have to ask the rn for her rationale many people reject vaccines for a variety of reasons    some valid and some ridiculous if these are her children then she has the right not to vaccinate or decide which vaccinations her children receive the flu vaccine is optional as far as schools are concerned but not optional in my book i am very pro vaccine but i do allow the parents the right to refuse they do have to hear my lecture however but in the end the decision is theirs as a grandmother    and i am a grandparent too    we have memories of those days before vaccinations some new parents have never seen the diseases that ravaged our generation did she give you any specific reason if it is a time issue maybe you can volunteer to take your grandchildren to a nearby pharmacy or health department to get the shots or is she worried abou

In [None]:
search_algorithm = "DFS"
strategy = "cot"
evaluation_strategy="vote"

#create instance
model = OptimizedOpenAILanguageModel('OPENAI_API_KEY', api_model="gpt-3.5-turbo")
tree_of_thoughts = OptimizedTreeofThoughts(model, search_algorithm)


# input_problem = "using question from Dataset in HuggingFace"
class args:
    problem = q18
    search_algorithm = "DFS"
    k = 2
    T = 4
    b = 5
    vth = 0.4
    timeout = 10
    confidence = 0.8
    max_iterations = 40
    convergence_threshold = 0.01
    convergence_count = 5

#solve the problem using the tree of thoughts class
optimized_tree_of_thoughts = OptimizedTreeofThoughts(model, search_algorithm=args.search_algorithm)

#solve the porblem using tree of thoughts problem helper
best_state = optimized_tree_of_thoughts.solve(args.problem, k=args.k, T=args.T, b=args.b, vth=args.vth)

#generate the final silution
final_solution_18= optimized_tree_of_thoughts.model.generate_solution(best_state, args.problem)



#print the final solutions
print(f"THE FINAL SOLUTION IS:  {final_solution_18}")

# # After processing, save the DataFrame to a CSV:
optimized_tree_of_thoughts.save_dataframe('results.csv')

Using api_model gpt-3.5-turbo
Start time 1698621111.5157204
THIS IS WHERE IT GENERATE THE THOUGHTS BASING ON THE STATES:
We receive STATE of type <class 'list'> For state:  ['why would a rn choose not to get her kids a flu shot as the grandparent is there anything i can do'] 


Considering the thoughts you've had until now: THE STATES ARE: 

why would a rn choose not to get her kids a flu shot as the grandparent is there anything i can do

Devise the next coherent thought that will aid in advancing the reasoning process and achieving a solution to ['why would a rn choose not to get her kids a flu shot as the grandparent is there anything i can do']. Assess various scenarios, think unconventionally, anticipate potential challenges, and resolve any outstanding queries. Tap into your mind's full potential and make certain no open questions remain.
thoughts: ['To advance the reasoning process and find a solution to why a registered nurse (RN) would choose not to get her kids a flu shot whe

  self.df = self.df.append({'Thought': thought, 'Evaluation': value}, ignore_index=True)


state: 0.55
value: 0.55
THIS IS WHERE IT GENERATE THE THOUGHTS BASING ON THE STATES:
We receive STATE of type <class 'tuple'> For state:  ('why would a rn choose not to get her kids a flu shot as the grandparent is there anything i can do', "To advance the reasoning process and find a solution to why a registered nurse (RN) would choose not to get her kids a flu shot when the grandparent is present, we need to consider various scenarios and potential reasons behind this decision. \n\n1. Lack of trust in flu vaccines: The RN may have concerns about the safety or effectiveness of flu vaccines, leading her to choose not to get her kids vaccinated. It could be helpful to discuss her specific concerns and provide evidence-based information on the benefits of flu shots, addressing any misconceptions she may have.\n\n2. Personal experiences or beliefs: The RN may have had negative experiences with vaccines or hold personal beliefs that influence her decision. It is important to approach this 

  self.df = self.df.append({'Thought': thought, 'Evaluation': value}, ignore_index=True)


thoughts: ["One potential solution to address the situation could be to involve the grandparent in the conversation. The grandparent, being a trusted family member, may have influence over the RN's decision. By discussing the potential risks of not getting the children vaccinated and the importance of protecting vulnerable individuals like the grandparent, the grandparent may be able to provide additional support and encouragement for the RN to reconsider her decision.\n\nAdditionally, reaching out to healthcare professionals or organizations that specialize in vaccine education and advocacy could provide valuable resources and guidance. They may be able to provide evidence-based information, address any concerns or misconceptions, and offer strategies to effectively communicate with the RN about the importance of flu vaccination.\n\nIt is important to approach the discussion with empathy, understanding, and a non-judgmental attitude. By creating a safe space for open dialogue, activel

In [None]:
ai_solution_18 = final_solution_18[0]
reference_text_18 = a18[0]

In [None]:
# Load pre-trained model and tokenizer
model_name = 'facebook/bart-large'
model = BartModel.from_pretrained(model_name)
tokenizer = BartTokenizer.from_pretrained(model_name)
# Sentences you want to compare
reference_sentence_18 = a18[0]
generated_sentence_18 = ai_solution_18

# Encode sentences and get BART's output (hidden states)
# This will give us contextual embeddings of each token in the sentence
input_ids_ref_18 = tokenizer.encode(reference_sentence_18, return_tensors='pt', add_special_tokens=True)
input_ids_gen_18 = tokenizer.encode(generated_sentence_18, return_tensors='pt', add_special_tokens=True)

# print(input_ids_ref)
# print(input_ids_gen)

with torch.no_grad():  # Disable gradient updates
    model_output_ref_18 = model(input_ids_ref_18)
    model_output_gen_18 = model(input_ids_gen_18)

# We'll use the mean of the last hidden states from BART as sentence embeddings
sentence_embedding_ref_18 = model_output_ref_18.last_hidden_state.mean(dim=1)
sentence_embedding_gen_18 = model_output_gen_18.last_hidden_state.mean(dim=1)


# Convert torch tensor to numpy array and ensure it's 1-D by accessing the 0th element
sentence_embedding_ref_1d_18 = sentence_embedding_ref_18.squeeze().numpy()  # Squeeze the tensor to make it 1-D
sentence_embedding_gen_1d_18 = sentence_embedding_gen_18.squeeze().numpy()

# Calculate the cosine similarity between the embeddings
similarity_18 = 1 - cosine(sentence_embedding_ref_1d_18, sentence_embedding_gen_1d_18)

print(f"Similarity score: {similarity_18}")
similarity_scores.append((18, similarity_18))

Similarity score: 0.8324308395385742


#Create the DataFrame for store BARTScore similarity score for 3 examples.

In [None]:
similarity_scores = []

In [None]:
df = pd.DataFrame(similarity_scores, columns=['Question Number', 'Similarity Score'])
df.insert(0, 'Index', df.index + 1)
print(df)


   Index  Question Number  Similarity Score
0      1               18          0.832431
1      2               11          0.729942
2      3                3          0.840066
