<a href="https://colab.research.google.com/github/joexu22/llama2-finetune/blob/main/Notebooks/Haphazard_Benchmark.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Haphazard Benchmarking

Quick and Dirty similiarty comparison between source, base, and finetune

Source - Dataset (think client data)
Base - Llama2 7b
Finetune - The finetuned Llama

Credit to LLaMA Index for Underlying Method

In [None]:
# clone the repo with the data

!git clone https://github.com/joexu22/llama2-finetune.git

In [None]:
path_source = "/content/llama2-finetune/data_processing/answers.txt"
path_base ="/content/llama2-finetune/data_processing/llama_outputs/llama-base-hf-response-7b.txt"
path_finetune = "/content/llama2-finetune/data_processing/llama_outputs/llama-finetuned-output-ultimate.txt"


In [None]:
!git clone https://github.com/jerryjliu/llama_index.git
%cd llama_index
!pip install -r /content/llama_index/requirements.txt

In [None]:
!pip install llama-cpp-python

In [None]:
!pip install sentence_transformers

In [None]:
from llama_index.evaluation import SemanticSimilarityEvaluator

evaluator = SemanticSimilarityEvaluator()

In [None]:
# prompt: read the text files in source, base, and fine tune. Read them line by line and have them operatable

import io

def read_file(path):
    with io.open(path, 'r', encoding='utf-8') as f:
        return f.read().splitlines()

source_text = read_file(path_source)
base_text = read_file(path_base)
finetune_text = read_file(path_finetune)

# quick sanity check
print(source_text)
print(base_text)
print(finetune_text)



In [None]:
# function to extract the Assistant part from the base and finetune

import re

# Creating a reusable function to extract and format the Assistant's part from a given text
def extract_and_format_assistant_text(given_text: str) -> str:
    """
    Extract the Assistant's part from the given text and format it by removing special characters like \n and \'.

    Parameters:
        given_text (str): The text containing the Assistant's part.

    Returns:
        str: The formatted Assistant's part.
    """
    assistant_part = re.search('Assistant: (.+)', given_text)
    if assistant_part:
        assistant_text = assistant_part.group(1)
        # Replacing special characters like \n and \'
        formatted_text = assistant_text.replace("\\n", " ").replace("\\'", "'")
        return formatted_text
    else:
        return "Assistant's text not found"

# Testing the function
test_text = '''b'### Human: What are MSE and RMSE? ### Assistant: Hey bud, MSE and RMSE are two commonly used metrics in machine learning to evaluate the performance of a model. Here\\'s the lowdown on what they mean:\\nMSE (Mean Squared Error) is a measure of the average squared difference between the predicted values and the actual values. It\\'s calculated by taking the average of the squared differences between the predicted and actual values for all data points in the dataset. MSE is a popular choi...'''
print(extract_and_format_assistant_text(test_text))

#tests
#print(extract_and_format_assistant_text(base_text[1]))
#print(extract_and_format_assistant_text(finetune_text[1]))




In [None]:
# general framework for measure similiarity between llama responses

source_line = source_text[1]
base_line = extract_and_format_assistant_text(base_text[1])
finetune_line = extract_and_format_assistant_text(finetune_text[1])

print(source_line)
print(base_line)
print(finetune_line)
print()

# difference between source and base
result = await evaluator.aevaluate(
    response=source_line,
    reference=base_line,
)

print("Source and Base")
print(result.score)

print()

# difference between source and finetune
result = await evaluator.aevaluate(
    response=source_line,
    reference=finetune_line,
)

print("Source and Finetune")
print(result.score)