In [1]:
import os

from dotenv import load_dotenv, find_dotenv
from openai import OpenAI

_ = load_dotenv(find_dotenv())
llm_api_key = os.environ['DEEPSEEK_API_KEY']
MODEL = "deepseek-chat"
client = OpenAI(api_key=llm_api_key, base_url="https://api.deepseek.com")

In [2]:
def rag_response(question, context):
    prompt_template = """
    `You're a customer service agent. Answer the QUESTION based on the CONTEXT provided.
    Use only the facts from the CONTEXT when answering the QUESTION.

    QUESTION: {question}

    CONTEXT:
    {context}
    """.strip()

    prompt = prompt_template.format(question=question, context=context).strip()
    response = client.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": prompt},
        ],
        stream=False
    )
    return response.choices[0].message.content


In [3]:
from ragas import EvaluationDataset
import pandas as pd

df = pd.read_csv('../data/testset.csv', encoding='utf-8')
df.rename(columns={"question": "user_input", "context": "reference"}, inplace=True)


In [4]:
from hybrid_search import hybrid_search

retrieved_contexts = []
for query in df["user_input"].values:
    search_results = hybrid_search(query, size=1)
    context = ""
    for hit in search_results:
        doc = hit.metadata['_source']
        context = context + f"section: {doc['section']}\ntext: {hit.page_content}\n\n"
        retrieved_contexts.append([context])

No sentence-transformers model found with name bert-base-german-dbmdz-uncased. Creating a new one with mean pooling.


In [5]:
len(retrieved_contexts)

20

In [14]:
df["retrieved_contexts"] = retrieved_contexts
df["response"] = df.apply(lambda row: rag_response(row["user_input"], row["retrieved_contexts"][0]), axis=1)
eval_dataset = EvaluationDataset.from_pandas(df)

ValidationError: 1 validation error for SingleTurnSample
retrieved_contexts
  Input should be a valid list [type=list_type, input_value='section: heben und trage...ahre)\n<25 min. lt.\n\n', input_type=str]
    For further information visit https://errors.pydantic.dev/2.11/v/list_type

In [28]:
df.to_csv("../data/testset_eval.csv", index=False)

In [18]:
from ragas.llms import LangchainLLMWrapper
from langchain_openai import ChatOpenAI
from langchain_huggingface import HuggingFaceEmbeddings

eval_embeddings = HuggingFaceEmbeddings(model_name="bert-base-german-dbmdz-uncased")
eval_llm = LangchainLLMWrapper(ChatOpenAI(model=MODEL, api_key=llm_api_key, base_url="https://api.deepseek.com"))

No sentence-transformers model found with name bert-base-german-dbmdz-uncased. Creating a new one with mean pooling.


In [19]:
from ragas import evaluate
from ragas.metrics import ResponseRelevancy, LLMContextPrecisionWithReference

result = evaluate(
    eval_dataset,
    metrics=[
        ResponseRelevancy(), LLMContextPrecisionWithReference()
    ],
    llm=eval_llm,
    embeddings=eval_embeddings,
)

Evaluating:   0%|          | 0/40 [00:00<?, ?it/s]

Exception raised in Job[9]: TimeoutError()
Exception raised in Job[0]: TimeoutError()
Exception raised in Job[8]: TimeoutError()
Exception raised in Job[10]: TimeoutError()
Exception raised in Job[29]: TimeoutError()
Exception raised in Job[31]: TimeoutError()
Exception raised in Job[28]: TimeoutError()


In [25]:
result

{'answer_relevancy': 0.5025, 'llm_context_precision_with_reference': 0.5294}

In [24]:
result.upload()

[2025-04-15 15:14:42 - (2025-04-15 13:14:42 UTC)] [ERROR] [ragas.utils] [RagasID: a-54ff180f6e55403e8d025681a6855e26, App-Version: 0.2.14] [API_ERROR] Request failed. Status Code: 500, URL: https://api.ragas.io/api/v1/alignment/evaluation, Error Message: 
API Message: An internal server error occured


UploadException: Request failed: 
API Message: An internal server error occured