# Helper notebook for benchmarking the GPT-4o reference model

This notebook imports a csv-file containing a set of questions and saves the input- and output-tokens of the model responses to a dedicated csv-file. This notebook specifically tests the vanilla `GPT-4o-mini` PLM by *OpenAI*. The actual answers were extracted from the notebook variable `res_only`, accessible after running the corresponding cells below. For further information on the actual results, refer to the file ``.

In [2]:
from langchain_openai import ChatOpenAI
from langchain_core.messages import HumanMessage
from dotenv import load_dotenv
import pandas as pd
from typing import List, Dict
load_dotenv()

True

In [10]:
model = ChatOpenAI(model='gpt-4o-mini', temperature=0)

In [11]:
# model prompt

message = """Sie sind ein experte in Sachen Normen und Standards im Bauwesen. Beantworten Sie mir die folgende Frage und geben sie mir immer die Quelle an (Kapitel und Abschnitte), aus der die Antwort stammt.\
Die Fragen beziehen sich auf die Normenreihe DIN EN 1991-1-3:2010-12 oder DIN EN 1991-1-3/NA:2019-04 \
Hier nun die Frage: {Frage} 
    """

In [12]:
# test query
response = model.invoke(message.format(Frage="In welchem Jahr wurde die Norm EN 1990 veröffentlicht?"))

In [22]:
import csv


def get_chatgpt_response(question: str) -> Dict:
    """
    Get a response from the ChatGPT model for a given question.
    
    Args:
    question (str): The question to ask the model.
    
    Returns:
    Dict: A dictionary containing the response content and token usage.
    """
    response = model.invoke(message.format(Frage=question)
    )
    
    return {
        "content": response.content,
        "prompt_tokens": response.response_metadata['token_usage']['prompt_tokens'],
        "completion_tokens": response.response_metadata['token_usage']['completion_tokens']
    }

def process_questions(questions: List[str]) -> List[Dict]:
    """
    Process a list of questions using the ChatGPT model.
    
    Args:
    questions (List[str]): A list of questions to ask the model.
    
    Returns:
    List[Dict]: A list of dictionaries containing the responses and their metadata.
    """
    results = []
    
    for question in questions:
        response = get_chatgpt_response(question)
        results.append({
            "question": question,
            "response": response["content"],
            "prompt_tokens": response["prompt_tokens"],
            "completion_tokens": response["completion_tokens"]
        })
    
    return results

def save_to_csv(data: List[Dict], filename: str):
    """
    Save the data to a CSV file.
    
    Args:
    data (List[Dict]): The data to save.
    filename (str): The name of the file to save to.
    """
    keys = data[0].keys()
    
    with open(filename, 'w', newline='', encoding='utf-8') as output_file:
        dict_writer = csv.DictWriter(output_file, keys)
        dict_writer.writeheader()
        dict_writer.writerows(data)



In [20]:
df = pd.read_csv('questions.csv', delimiter=';')

question_list = df['Questions'].tolist()

In [23]:
responses = process_questions(question_list)


In [24]:
res_only = [response["response"] for response in responses]


In [27]:
output_file = "token_counts.csv"

# Extract prompt_tokens and completion_tokens and write to CSV
with open(output_file, 'w', newline='') as csvfile:
    fieldnames = ['prompt_tokens', 'completion_tokens']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    
    writer.writeheader()
    for item in responses:
        writer.writerow({
            'prompt_tokens': item['prompt_tokens'],
            'completion_tokens': item['completion_tokens']
        })