In [1]:
from prompttools.experiment import HuggingFaceHubExperiment
from langchain import HuggingFaceHub, PromptTemplate
from prompttools.utils import similarity

hf_repo_ids = ["google/flan-t5-xxl", "bigscience/bloom"]
max_lengths = [32, 64, 128]
temperatures = [0.1, 0.5, 0.999]
input_variables = ["question"]
template = """Question: {question}
        Answer: """

question = "Who was the very first president of the USA?"
expected = "George Washington"
messages = [[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": question},
]]

In [2]:
experiment = HuggingFaceHubExperiment(
    repo_id=hf_repo_ids,
    max_length=max_lengths,
    temperature=temperatures,
    input_variables=input_variables,
    template=template,
    question=question,
    expected=expected,
    messages=messages,
)

In [3]:
experiment.run()

In [4]:
from prompttools.utils import similarity
from typing import Dict, List

def extract_responses(output) -> str:
    return output["response"]


def measure_similarity(
    messages: List[Dict[str, str]], results: Dict, metadata: Dict
) -> float:
    """
    A simple test that checks semantic similarity between the user input
    and the model's text responses.
    """
#     print(results)
#     print(expected, response)
    scores = [
        similarity.compute(expected, response, use_chroma=False)
        for response in extract_responses(results)
    ]
    return max(scores)

In [5]:
experiment.evaluate("similar_to_expected", measure_similarity)

In [6]:
experiment.visualize()

Unnamed: 0,messages,response(s),latency,similar_to_expected,repo_id,max_length,temperature
0,"[{'role': 'system', 'content': 'You are a helpful assistant.'}, {'role': 'user', 'content': 'Who was the very first president of the USA?'}]",george washington,0.694896,0.352065,google/flan-t5-xxl,32,0.1
1,"[{'role': 'system', 'content': 'You are a helpful assistant.'}, {'role': 'user', 'content': 'Who was the very first president of the USA?'}]",james buchanan,0.361313,0.308,google/flan-t5-xxl,32,0.5
2,"[{'role': 'system', 'content': 'You are a helpful assistant.'}, {'role': 'user', 'content': 'Who was the very first president of the USA?'}]",george washington,0.318374,0.352065,google/flan-t5-xxl,32,0.999
3,"[{'role': 'system', 'content': 'You are a helpful assistant.'}, {'role': 'user', 'content': 'Who was the very first president of the USA?'}]",george washington,0.351335,0.352065,google/flan-t5-xxl,64,0.1
4,"[{'role': 'system', 'content': 'You are a helpful assistant.'}, {'role': 'user', 'content': 'Who was the very first president of the USA?'}]",george washington,0.278302,0.352065,google/flan-t5-xxl,64,0.5
5,"[{'role': 'system', 'content': 'You are a helpful assistant.'}, {'role': 'user', 'content': 'Who was the very first president of the USA?'}]",james buchanan,0.353921,0.308,google/flan-t5-xxl,64,0.999
6,"[{'role': 'system', 'content': 'You are a helpful assistant.'}, {'role': 'user', 'content': 'Who was the very first president of the USA?'}]",george washington,0.297332,0.352065,google/flan-t5-xxl,128,0.1
7,"[{'role': 'system', 'content': 'You are a helpful assistant.'}, {'role': 'user', 'content': 'Who was the very first president of the USA?'}]",james madison,0.337943,0.289897,google/flan-t5-xxl,128,0.5
8,"[{'role': 'system', 'content': 'You are a helpful assistant.'}, {'role': 'user', 'content': 'Who was the very first president of the USA?'}]",george washington,0.282666,0.352065,google/flan-t5-xxl,128,0.999
9,"[{'role': 'system', 'content': 'You are a helpful assistant.'}, {'role': 'user', 'content': 'Who was the very first president of the USA?'}]","George Washington\n """"""\n self.assertEqual(self.parser.parse(question), [('",1.095149,0.352065,bigscience/bloom,32,0.1
