In [1]:
!pip install -U aimon



In [2]:
from aimon import evaluate

In [3]:
aimon_config = {
    "context_classification": {"detector_name": "default"},
    "hallucination": {"detector_name": "default"},
    "conciseness": {"detector_name": "default"},
    "completeness": {"detector_name": "default"},
    "toxicity": {"detector_name": "default"},
    "instruction_adherence": {"detector_name": "default"},
}

### Create a dataset and a dataset collection

AIMon can be used to manage datasets used for the evaluations. Here, we create two datasets and a collection
comprising of these datasets. Note that these datasets only need to be created once.

In [4]:
import json
import os
# Create a new datasets
file_path1 = "./test_evaluation_dataset_oct_2024_1.csv"
file_path2 = "./test_evaluation_dataset_oct_2024_2.csv"

dataset_data_1 = json.dumps({
    "name": "test_evaluation_dataset_oct_17_2024_1.csv",
    "description": "This is one custom dataset"
})

dataset_data_2 = json.dumps({
    "name": "test_evaluation_dataset_oct_17_2024_2.csv",
    "description": "This is another custom dataset"
})

from aimon import Client
aimon_api_key = os.getenv("AIMON_API_KEY")
aimon_client = Client(auth_header=f"Bearer {aimon_api_key}")

with open(file_path1, 'rb') as file1:
    dataset1 = aimon_client.datasets.create(
        file=file1,
        json_data=dataset_data_1
    )

with open(file_path2, 'rb') as file2:
    dataset2 = aimon_client.datasets.create(
        file=file2,
        json_data=dataset_data_2
    )
dataset1 = aimon_client.datasets.list(name="test_evaluation_dataset_oct_17_2024_1.csv")
dataset2 = aimon_client.datasets.list(name="test_evaluation_dataset_oct_17_2024_2.csv")

In [5]:
# Create a new dataset collection
dataset_collection = aimon_client.datasets.collection.create(
    name="dataset_collection_oct_17_2024_1200", 
    dataset_ids=[dataset1.sha, dataset2.sha], 
    description="This is a collection of two datasets."
)

### Evaluation

We will run an evaluation the example below that uses Langchain to summarize documents using OpenAI.

In [6]:
def print_eval_results(eval_res):
    for item in eval_res:
        if item.response.status == 200:
            print(f"\U0001F7E2 {item}\n")
        else:
            print(f"\U0001F534 {item}\n")

In [7]:
res = evaluate(
        application_name="llm_marketing_summarization_app_v5",
        model_name="my_gpt4_model_fine_tuned",
        dataset_collection_name="dataset_collection_oct_17_2024_1200",
        evaluation_name="simple_eval_with_output_oct_17",
        headers=['context_docs', 'user_query', 'prompt', 'instructions', 'output'],
        api_key=os.getenv("AIMON_API_KEY"),
        aimon_client=aimon_client,
        config=aimon_config,
    )
print_eval_results(res)

🟢 EvalResponse(output=The latest version is 2.1 and has async support. It was launched in March 2024, response=AnalyzeCreateResponse(message='Data successfully sent to AIMon.', status=200))

🟢 EvalResponse(output= Setup the environment variables, install dependencies and follow the official documentation for configuration", response=AnalyzeCreateResponse(message='Data successfully sent to AIMon.', status=200))

🟢 EvalResponse(output=Requires Python 3.6 and above., response=AnalyzeCreateResponse(message='Data successfully sent to AIMon.', status=200))

🟢 EvalResponse(output=Common issues are conflics with Python packages, connectivity problems and incorrect dependency installation., response=AnalyzeCreateResponse(message='Data successfully sent to AIMon.', status=200))

🟢 EvalResponse(output=The latest version is 2.1 and has async support. It was launched in March 2024, response=AnalyzeCreateResponse(message='Data successfully sent to AIMon.', status=200))

🟢 EvalResponse(output= Setup 