In [None]:
# Import necessary packages
import pandas as pd
import torch

from pipeline.pipeline_initializer import initialize_pipeline
from pipeline.prompting_interface import prompt_pipeline
from utils.csv_data_source import CsvDataSource

In [None]:
import json
with open("resources/questions.json") as file:
    questions = json.load(file)

In [None]:
def get_prompt_standard(dataset: str, question: str):
    return f"""Given this dataset:
*/
{dataset}
*/
and this question:
/*
{question}
*/
Assume you have all the necessary information to respond to the question. Generate an answer for the question given the dataset satisfying the following criteria:
1. Completeness: The answer must definitively and comprehensively address all parts of the question.
2. Relevance: The answer must directly provide the information requested in the question without any extraneous details."""

In [None]:
def get_prompt_role_play(dataset: str, question: str, role: str):
    return f"""Given this dataset:
*/
{dataset}
*/
and this question:
/*
{question}
*/
Assume you are {role} with all the necessary information to respond to the question. Generate an answer for the question given the dataset satisfying the following criteria:
1. Completeness: The answer must definitively and comprehensively address all parts of the question.
2. Relevance: The answer must directly provide the information requested in the question without any extraneous details."""

In [None]:
model = "teknium/OpenHermes-2.5-Mistral-7B"
dtype = torch.bfloat16
pipe = initialize_pipeline(model, dtype)

In [None]:
def generate_benchmark_standard(benchmark_name: str, generation_params={}):
    csv_data_source = CsvDataSource("tables")
    for table in iter(csv_data_source):
        try:
            benchmark = pd.read_csv(benchmark_name)
        except FileNotFoundError:
            benchmark = pd.DataFrame(columns=['T','Q','A','E','R'])
        csv_file_name = table[0]
        print(f"Processing table {csv_file_name}")
        dataset = "".join(table[1]).rstrip()
        for i in questions.keys():
            print(f"Processing question {i}")
            question = questions[i]["question"]
            prompt = get_prompt_standard(dataset, question)
            conversation = [{"role": "user", "content": prompt}]
            values = {'T': [csv_file_name[:-4]], 'Q': [question]}
            if (benchmark[["T","Q"]].isin(values).all(axis=1).any()):
                continue
            answer = prompt_pipeline(pipe, conversation, **generation_params)[-1]["content"]
            row = pd.DataFrame({
                'T': [csv_file_name[:-4]],
                'Q': [question],
                'A': [answer],
                'E': ["unknown"],
                'R': ["unknown"],
            })
            benchmark = pd.concat([benchmark, row], ignore_index=True)
            benchmark.to_csv(benchmark_name, index=False)

In [None]:
def generate_benchmark_role_play(benchmark_name: str, generation_params={}):
    csv_data_source = CsvDataSource("tables")
    for table in iter(csv_data_source):
        try:
            benchmark = pd.read_csv(benchmark_name)
        except FileNotFoundError:
            benchmark = pd.DataFrame(columns=['T','Q','A','E','R'])
        csv_file_name = table[0]
        print(f"Processing table {csv_file_name}")
        dataset = "".join(table[1]).rstrip()
        for i in questions.keys():
            print(f"Processing question {i}")
            question = questions[i]["question"]
            role = questions[i]["role"]
            prompt = get_prompt_role_play(dataset, question, role)
            conversation = [{"role": "user", "content": prompt}]
            values = {'T': [csv_file_name[:-4]], 'Q': [question]}
            if (benchmark[["T","Q"]].isin(values).all(axis=1).any()):
                continue
            answer = prompt_pipeline(pipe, conversation, **generation_params)[-1]["content"]
            row = pd.DataFrame({
                'T': [csv_file_name[:-4]],
                'Q': [question],
                'A': [answer],
                'E': ["unknown"],
                'R': ["unknown"],
            })
            benchmark = pd.concat([benchmark, row], ignore_index=True)
            benchmark.to_csv(benchmark_name, index=False)

In [None]:
# Greedy Search RP
name = "evaluate_ulang/solar-RP-greedy.csv"
generate_benchmark_role_play(name)

In [None]:
# Nucleus 0.90 RP
# name = "benchmarks/openhermes-RP-nucleus_0.90.csv"
# benchmark = generate_benchmark_role_play(name, {"do_sample": True, "top_p": 0.90, "top_k": 0})

In [None]:
# Nucleus 0.90 RP
# name = "benchmarks/openhermes-RP-contrastive_5_0.8.csv"
# benchmark = generate_benchmark_role_play(name, {"do_sample": True, "top_k": 5, "penalty_alpha": 0.8})