In [None]:
import dotenv
import os
dotenv.load_dotenv() # For OpenRouter API key

In [None]:
QUESTIONS_PATH = "" # Path of MaScQA eval.json file
MODEL_NAME = ""
MAX_TOKENS = 1024
TEMPERATURE = 0.0

In [None]:
from src.import_questions import import_questions
questions = import_questions(QUESTIONS_PATH)

In [None]:
from haystack.components.builders import PromptBuilder

template = """
Solve the following question. Write the correct answer inside a list at the end.
Question: {{question}}
Answer:
"""

prompt_builder = PromptBuilder(template=template)

In [None]:
from haystack.components.generators import OpenAIGenerator
from haystack.utils import Secret

generator = OpenAIGenerator(model=MODEL_NAME,
                            api_key=Secret.from_env_var("OPENROUTER_API_KEY"),
                            api_base_url="https://openrouter.ai/api/v1",
                            generation_kwargs={
                              "max_tokens": MAX_TOKENS,
                              "temperature": TEMPERATURE,
                            })

In [None]:
from haystack import Pipeline

llm_pipeline = Pipeline()
# Add components to your pipeline
llm_pipeline.add_component("prompt_builder", prompt_builder)
llm_pipeline.add_component("llm", generator)

# Now, connect the components to each other
llm_pipeline.connect("prompt_builder", "llm")

In [None]:
import pandas as pd
# Take all dataframes in questions and concatenate them into one, using question keys as a new column topic
df = pd.concat(questions.values(), keys=questions.keys(), names=["topic"]).reset_index()
# Add columns for the results and analysis
df["result"] = ""
df["filtered_result"] = ""
df["correct_result"] = ""
df["overlap"] = 0
df["error_type"] = ""

In [None]:
from tqdm import tqdm
# Get the total number of rows in the DataFrame for the progress bar
total_rows = len(df[(df["result"] == "") | (df["result"] == "ERROR") | (df["result"].str.contains("<!DOCTYPE html>", na=False))])

# Create a progress bar
with tqdm(total=total_rows, desc="Processing rows", dynamic_ncols=True) as pbar:
    # Iterate over each row
    for index, row in df.iterrows():
        if row["result"] != "" and row["result"] != "ERROR" and not "<!DOCTYPE html>" in row["result"]:
            continue
        
        # Get the question and the context
        question = row["questions"]
        # Run the pipeline
        try:
            result = llm_pipeline.run({"prompt_builder": {"question": question}})["llm"]["replies"][0]
        except Exception as e:
            print(str(e))
            result = "ERROR"
        # Add to df
        df.at[index, "result"] = result

        # Update the progress bar
        pbar.update()

In [None]:
from datetime import datetime
import json

date = datetime.now().strftime("%Y%m%d%H%M")
MODEL_NAME_WITHOUT_SLASHES = MODEL_NAME.replace("/", "_")
filename = f"results_{MODEL_NAME_WITHOUT_SLASHES}_{date}"

# Save the results to a new file
df.to_csv(f"{filename}.csv", index=False)

# Save metadata
metadata = {
    "model": MODEL_NAME,
    "date": date,
    "num_rows": len(df),
}

with open(f'{filename}.json', 'w') as f:
  json.dump(metadata, f)