In [None]:
import os
import pandas as pd
from athina.evals import ContextContainsEnoughInformation
from athina.loaders import RagLoader
from athina.keys import AthinaApiKey, OpenAiApiKey
from athina.interfaces.model import Model
from athina.interfaces.athina import AthinaExperiment
from athina.datasets import yc_query_mini

### Configure your API keys

Evals use OpenAI, so you need to configure your OpenAI API key.

If you wish to view the results on Athina's UI, and maintain a historical record of experiments, then you also need an Athina API Key.

In [None]:
OpenAiApiKey.set_key(os.getenv('OPENAI_API_KEY'))
AthinaApiKey.set_key(os.getenv('ATHINA_API_KEY')) # Optional, recommended

### Load your dataset

You can use one of our `loaders` to load the data from a Dictionary, CSV or JSON file.

Here's an example
```
from athina.loaders import RagLoader

dataset = RagLoader().load_dict(raw_data)
```

Here is the complete [documentation](https://docs.athina.ai/evals/running_evals/loading_data) specifying the various ways you can load your dataset.

In [None]:
# Create or load batch dataset
raw_data = yc_query_mini.data
dataset = RagLoader().load_dict(raw_data)

pd.DataFrame(dataset)

### Describe your experiment metadata fields (optional)
These metadata fields are only used as identifiers when we save your experiment on Athina Develop.
This helps you search, sort and filter through past experimentation runs.

Currently, this includes your:
- `experiment_name`: (string) The name of your experiment
- `experiment_description`: (string) A description this iteration of your experiment
- `language_model_provider`: (string) `openai`
- `language_model_id`: (string) The language model used for the LLM inference (ex: `gpt-3.5-turbo`)
- `prompt_template`: (object) A JS object representing the prompt you are sending to the LLM (for example, messages array in OpenAI)
- `dataset_name`: (string) An identifier for the dataset you are using.

In [None]:
# Define your experiment parameters
prompt_template = [
    { 
        "role": "system",
        "content": "You are an expert at answering questions about Y Combinator. If you do not know the answer, say I don't know. Be direct and concise in your responses" },
    { 
        "role": "user", 
        "content": "{query}"
    }
]
experiment = AthinaExperiment(
    experiment_name="ContextRelevance",
    experiment_description="Checking retrieval scores for YC dataset with a simple zero-shot prompt",
    language_model_provider="openai",
    language_model_id="gpt-3.5-turbo",
    prompt_template=prompt_template,
    dataset_name="yc_dataset_mini",
)

### Run your evaluation

Simply instantiate the evaluator class you wish to use, and call `run_batch` to the eval

##### Run evals in parallel (much faster)

You may specify `max_parallel_evals` to run multiple LLM evaluation inferences in parallel.

##### View as a dataframe
Call `.to_df()` on the results to view as a dataframe


##### Log results to Athina Develop (Dashboard UI)
If you have specified an `AthinaApiKey`, then results will automatically logged to the dashboard.

In [None]:
# Checks if the LLM response answers the user query sufficiently
results = ContextContainsEnoughInformation().configure_experiment(experiment).run_batch(
    data=dataset,
    max_parallel_evals=5 # Run up to 5 evals in parallel
)

results.to_df()