In [1]:
import re
import pandas as pd
from tqdm.auto import tqdm
from huggingface_hub import InferenceClient, login
from dotenv import load_dotenv
import os

In [7]:
tqdm.pandas()  # load tqdm's pandas support

In [4]:
load_dotenv("../.env")
token = os.getenv("HUGGINGFACE_TOKEN")
login(token)

In [5]:
repo_id = "mistralai/Mixtral-8x7B-Instruct-v0.1"

llm_client = InferenceClient(
    model=repo_id,
    timeout=120,
)

In [2]:
examples = pd.read_csv("../data/ratings.csv")

In [3]:
JUDGE_PROMPT = """
You will be given a user_question and system_answer couple.
Your task is to provide a 'total rating' scoring how well the system_answer answers the user concerns expressed in the user_question.
Give your answer as a float on a scale of 0 to 10, where 0 means that the system_answer is not helpful at all, and 10 means that the answer completely and helpfully addresses the question.

Provide your feedback as follows:

Feedback:::
Total rating: (your rating, as a float between 0 and 10)

Now here are the question and answer.

Question: {question}
Answer: {answer}

Feedback:::
Total rating: """

In [10]:
examples["llm_judge"] = examples.progress_apply(
    lambda x: llm_client.text_generation(
        prompt=JUDGE_PROMPT.format(question=x["question"], answer=x["feedback1"]),
        max_new_tokens=1000,
    ),
    axis=1,
)

  0%|          | 0/2702 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [9]:
examples

Unnamed: 0,question,passage,feedback,rating,domain,rating1,rating2,feedback1,feedback2,rating_num1,rating_num2,human_score
0,How do I get help finding a job?,"{'passage_id': 139, 'source': 'Australia', 'ur...","['A link to a job search website is included, ...","['Excellent', 'Excellent']",Australia,Excellent,Excellent,"A link to a job search website is included, as...","Includes a link to a Jobs Hub page, which is b...",4.0,4.0,4.0
1,If I am in Australia on a worker holiday marke...,"{'passage_id': 546, 'source': 'Australia', 'ur...",['Answer instructs the requester to apply for ...,"['Excellent', 'Excellent']",Australia,Excellent,Excellent,Answer instructs the requester to apply for a ...,Has a detailed description of the circumstance...,4.0,4.0,4.0
2,Do immigration detention centers have proper PPE?,"{'passage_id': 224, 'source': 'Australia', 'ur...",['Does not address the matter at issue in the ...,"['Bad', 'Bad']",Australia,Bad,Bad,Does not address the matter at issue in the qu...,The question is about the availability of PPE ...,1.0,1.0,1.0
3,Do immigration detention centers have proper PPE?,"{'passage_id': 219, 'source': 'Australia', 'ur...",['Does not address the matter at issue in the ...,"['Bad', 'Bad']",Australia,Bad,Bad,Does not address the matter at issue in the qu...,The question is about prevalence of PPE in det...,1.0,1.0,1.0
4,Do immigration detention centers have proper PPE?,"{'passage_id': 220, 'source': 'Australia', 'ur...",['Claims it monitors PPE levels in the immigra...,"['Acceptable', 'Acceptable']",Australia,Acceptable,Acceptable,Claims it monitors PPE levels in the immigrant...,This gives a general answer to the question bu...,3.0,3.0,3.0
...,...,...,...,...,...,...,...,...,...,...,...,...
2697,How can practioners provide services to chroni...,"{'passage_id': 79, 'source': 'WHO', 'uri': 'ht...",['Has detailed information about providing ser...,"['Excellent', 'Excellent']",WHO,Excellent,Excellent,Has detailed information about providing servi...,This answer gives a lot of information. The i...,4.0,4.0,4.0
2698,Can you explain to me what self-care is,"{'passage_id': 159, 'source': 'WHO', 'uri': 'h...",['This answer explains what self care is direc...,"['Excellent', 'Excellent']",WHO,Excellent,Excellent,This answer explains what self care is directly.,This is a direct answer to the question. This ...,4.0,4.0,4.0
2699,Is it safe for me to manually replace my IUD a...,"{'passage_id': 70, 'source': 'WHO', 'uri': 'ht...",['Does not address whether or not replacing an...,"['Bad', 'Bad']",WHO,Bad,Bad,Does not address whether or not replacing an I...,The answer is irrelevant to the question. This...,1.0,1.0,1.0
2700,Is it safe for me to manually replace my IUD a...,"{'passage_id': 103, 'source': 'WHO', 'uri': 'h...",['Does not answer whether or not IUD can be sa...,"['Bad', 'Bad']",WHO,Bad,Bad,Does not answer whether or not IUD can be safe...,This answer is irrelevant to the question. Thi...,1.0,1.0,1.0
