# Helper notebook for benchmarking the GPT-4o + RAG reference model

This notebook imports a csv-file containing a set of questions and saves the input- and output-tokens of the model responses to a dedicated csv-file. This notebook specifically tests the `GPT-4o-mini` PLM by *OpenAI* with access to the building standard *DIN EN 1991-1-3*. The actual answers were extracted from the notebook variable `res_only`, accessible after running the corresponding cells below. For further information on the actual results, consult the file `Benchmark_results.xlsx`.

In [2]:
from openai import OpenAI
from dotenv import load_dotenv
import pandas as pd
from typing import List, Dict
load_dotenv()

True

In [3]:
client = OpenAI()


assistant = client.beta.assistants.retrieve(
    assistant_id='asst_RvcAT9QAWZ3ji1hwA4Cc1JBW'    # assistant with access to the standard DIN EN 1991-1-3
)

In [4]:
# test run with a question
thread = client.beta.threads.create()

message = client.beta.threads.messages.create(
  thread_id=thread.id,
  role="user",
  content="In welchem Jahr wurde die Norm EN 1990 veröffentlicht?"
)

run = client.beta.threads.runs.create_and_poll(
  thread_id=thread.id,
  assistant_id=assistant.id
)

if run.status == 'completed': 
  messages = client.beta.threads.messages.list(
    thread_id=thread.id
  )
  print(messages)

SyncCursorPage[Message](data=[Message(id='msg_m3uaKTilwqIyjqFjb2j4mdHm', assistant_id='asst_RvcAT9QAWZ3ji1hwA4Cc1JBW', attachments=[], completed_at=None, content=[TextContentBlock(text=Text(annotations=[FileCitationAnnotation(end_index=62, file_citation=FileCitation(file_id='file-XX2y6OvXTw0k8VDTQTWz3vOt'), start_index=50, text='【6:0†source】', type='file_citation')], value='Die Norm EN 1990 wurde im Jahr 2002 veröffentlicht【6:0†source】.'), type='text')], created_at=1726868323, incomplete_at=None, incomplete_details=None, metadata={}, object='thread.message', role='assistant', run_id='run_CeG4yOaJNRlLlI05APjpmUWn', status=None, thread_id='thread_hkSxjCYzX2j0BXAsn1D66OSC'), Message(id='msg_z69QPHthKj4byLHvNtHJQ4Qv', assistant_id=None, attachments=[], completed_at=None, content=[TextContentBlock(text=Text(annotations=[], value='In welchem Jahr wurde die Norm EN 1990 veröffentlicht?'), type='text')], created_at=1726868318, incomplete_at=None, incomplete_details=None, metadata={}, object='t

In [12]:
run_steps = client.beta.threads.runs.steps.list(
    thread_id=thread.id,
    run_id=run.id
)

print(run_steps)

SyncCursorPage[RunStep](data=[RunStep(id='step_EcK473AAFP1WY57QrIsIFNRY', assistant_id='asst_RvcAT9QAWZ3ji1hwA4Cc1JBW', cancelled_at=None, completed_at=1726868324, created_at=1726868323, expired_at=None, failed_at=None, last_error=None, metadata=None, object='thread.run.step', run_id='run_CeG4yOaJNRlLlI05APjpmUWn', status='completed', step_details=MessageCreationStepDetails(message_creation=MessageCreation(message_id='msg_m3uaKTilwqIyjqFjb2j4mdHm'), type='message_creation'), thread_id='thread_hkSxjCYzX2j0BXAsn1D66OSC', type='message_creation', usage=Usage(completion_tokens=23, prompt_tokens=11049, total_tokens=11072), expires_at=None), RunStep(id='step_XjE83VCo12eaHpvuhfnBO5lk', assistant_id='asst_RvcAT9QAWZ3ji1hwA4Cc1JBW', cancelled_at=None, completed_at=1726868323, created_at=1726868320, expired_at=None, failed_at=None, last_error=None, metadata=None, object='thread.run.step', run_id='run_CeG4yOaJNRlLlI05APjpmUWn', status='completed', step_details=ToolCallsStepDetails(tool_calls=[Fil

In [34]:
# definition of code for automated querying
import csv


def get_chatgpt_response(question: str) -> Dict:
    """
    Get a response from the ChatGPT model for a given question.
    
    Args:
    question (str): The question to ask the model.
    
    Returns:
    Dict: A dictionary containing the response content and token usage.
    """
    
    thread = client.beta.threads.create()

    message = client.beta.threads.messages.create(
        thread_id=thread.id,
        role="user",
        content=question
    )

    run = client.beta.threads.runs.create_and_poll(
        thread_id=thread.id,
        assistant_id=assistant.id
    )

    if run.status == 'completed': 
        messages = client.beta.threads.messages.list(
            thread_id=thread.id
        )

    run_steps = client.beta.threads.runs.steps.list(
        thread_id=thread.id,
        run_id=run.id
        )
    prompt_tokens = 0
    completion_tokens = 0

    for step in run_steps.data:
        prompt_tokens += step.usage.prompt_tokens
        completion_tokens += step.usage.completion_tokens 


    response = messages.data[0].content[0].text.value
    res_str = f"Antwort für die Frage: '{question}' wurde generiert. \nPrompt Tokens: {prompt_tokens}; Completion Tokens: {completion_tokens} \n\n"
    print(res_str)

    return {
        "content": response,
        "prompt_tokens": prompt_tokens,
        "completion_tokens": completion_tokens
    }

def process_questions(questions: List[str]) -> List[Dict]:
    """
    Process a list of questions using the ChatGPT model.
    
    Args:
    questions (List[str]): A list of questions to ask the model.
    
    Returns:
    List[Dict]: A list of dictionaries containing the responses and their metadata.
    """
    results = []
    
    for question in questions:
        response = get_chatgpt_response(question)
        results.append({
            "question": question,
            "response": response["content"],
            "prompt_tokens": response["prompt_tokens"],
            "completion_tokens": response["completion_tokens"]
        })
    
    return results

def save_to_csv(data: List[Dict], filename: str):
    """
    Save the data to a CSV file.
    
    Args:
    data (List[Dict]): The data to save.
    filename (str): The name of the file to save to.
    """
    keys = data[0].keys()
    
    with open(filename, 'w', newline='', encoding='utf-8') as output_file:
        dict_writer = csv.DictWriter(output_file, keys)
        dict_writer.writeheader()
        dict_writer.writerows(data)



In [36]:
df = pd.read_csv('questions.csv', delimiter=';')

question_list = df['Questions'].tolist()

#responses = process_questions(question_list)

In [37]:
responses = process_questions(question_list)

Antwort für die Frage: 'In welchem Jahr wurde die Norm EN 1990 veröffentlicht?' wurde generiert. 
Prompt Tokens: 11765; Completion Tokens: 40 


Antwort für die Frage: 'Unter welchen Umständen dürfen Prüfungen und numerische Verfahren zur Ermittlung von Schneelasten verwendet werden?' wurde generiert. 
Prompt Tokens: 11677; Completion Tokens: 98 


Antwort für die Frage: 'Wo in der Norm EN 1990:2002 sind die grundlegenden Begriffe und Definitionen zu finden?' wurde generiert. 
Prompt Tokens: 11667; Completion Tokens: 89 


Antwort für die Frage: 'Was ist die jährliche Überschreitenswahrscheinlichkeit der Schneelast auf dem Boden?' wurde generiert. 
Prompt Tokens: 12107; Completion Tokens: 95 


Antwort für die Frage: 'Was ist die charakteristische Schneelast auf dem Dach?' wurde generiert. 
Prompt Tokens: 11857; Completion Tokens: 169 


Antwort für die Frage: 'Welche Faktoren können die Schneelastverteilung beeinflussen?' wurde generiert. 
Prompt Tokens: 11741; Completion Tokens: 316 

In [39]:
res_only = [response["response"] for response in responses]

In [33]:
res = get_chatgpt_response("Unter welchen Umständen dürfen Prüfungen und numerische Verfahren zur Ermittlung von Schneelasten verwendet werden?")

Antwort für die Frage: Unter welchen Umständen dürfen Prüfungen und numerische Verfahren zur Ermittlung von Schneelasten verwendet werden?; wurde generiert. 
Prompt Tokens: 11677; Completion Tokens: 189


In [38]:
output_file = "token_counts_assistant.csv"

# Extract prompt_tokens and completion_tokens and write to CSV
with open(output_file, 'w', newline='') as csvfile:
    fieldnames = ['prompt_tokens', 'completion_tokens']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    
    writer.writeheader()
    for item in responses:
        writer.writerow({
            'prompt_tokens': item['prompt_tokens'],
            'completion_tokens': item['completion_tokens']
        })