In [None]:
"""
If you want to use ConfidentAI platform create an account: https://www.confident-ai.com/
After signing-up you will get an API key.
Export it and login from notebook.
"""
import os
from dotenv import load_dotenv
from deepeval import login_with_confident_api_key

load_dotenv()

deepeval_api_key = os.getenv("DEEPEVAL_API_KEY")
login_with_confident_api_key(deepeval_api_key)

In [39]:
# Set Ollama as LLM provider

!deepeval set-local-model --model-name=llama3.1:latest --base-url="http://localhost:11434/" --api-key="ollama" --format=json

🙌 Congratulations! You're now using a local model for all evals that require an
LLM.


In [None]:
import ast 
import pandas as pd
from pandas import DataFrame
from deepeval.test_case import LLMTestCase
from deepeval.dataset import EvaluationDataset

# If you haven't already convert the RAGAs dataset into a DeepEval one and upload to Confident-AI
# Alternatively, use a custom dataset or let DeepEval generate one for you
# For my project since I already have one from RAGAs I will use it

def upload_ragas_dataset_to_confident_ai(filepath: str, dataset_name: str):
    try:
        dataset: DataFrame = pd.read_csv(filepath)

        test_cases: list[LLMTestCase] = []
        for _, row in dataset.iterrows():
            test_cases.append(
                LLMTestCase(
                    input=row['user_input'],
                    actual_output=row['response'],
                    expected_output=row['reference'],
                    context=ast.literal_eval(row['reference_contexts']),
                    retrieval_context=ast.literal_eval(row['retrieved_contexts']),        
                )
            )
            
        deepeval_dataset = EvaluationDataset(test_cases)
        deepeval_dataset.push(
            alias=dataset_name,
            auto_convert_test_cases_to_goldens=True
        )
    except FileNotFoundError as fnfe:
        print(print(fnfe.strerror))
    except TypeError as te:
        print(str(te))
    

In [73]:
upload_ragas_dataset_to_confident_ai("../ragas/dataset.csv", "RAGAs Dataset")

Gtk-Message: 19:06:38.380: Failed to load module "canberra-gtk-module"
Gtk-Message: 19:06:38.381: Failed to load module "canberra-gtk-module"


Opening in existing browser session.


In [None]:
from deepeval.dataset import EvaluationDataset

# If you already have one stored on the platform just specify the alias for the `pull` operation

evaluation_dataset = EvaluationDataset()
evaluation_dataset.pull("RAGAs Dataset")

In [80]:
type(evaluation_dataset.test_cases)

list

In [83]:
from deepeval import assert_test
from deepeval.metrics import GEval
from deepeval.models.llms import OllamaModel
from deepeval.test_case import LLMTestCase, LLMTestCaseParams

def test_case():
    correctness_metric = GEval(
        name="Correctness",
        criteria="Determine if the 'actual output' is correct based on the 'expected output'.",
        evaluation_params=[LLMTestCaseParams.ACTUAL_OUTPUT, LLMTestCaseParams.EXPECTED_OUTPUT],
        threshold=0.5,
        model=OllamaModel(),
        async_mode=False
    )
    test_case: LLMTestCase = evaluation_dataset.test_cases[0]
    assert_test(test_case, [correctness_metric], run_async=False)

In [85]:
test_case()

ResponseError: model requires more system memory (23.2 GiB) than is available (17.0 GiB) (status code: 500)