In [None]:
import os
from athina.evals import (
    DoesResponseAnswerQuery,
    ContextContainsEnoughInformation,
    Faithfulness,
    RagasContextRelevancy,
    RagasAnswerRelevancy,
    RagasContextPrecision,
    RagasFaithfulness,
    RagasContextRecall,
    RagasAnswerSemanticSimilarity,
    RagasAnswerCorrectness,
    RagasHarmfulness,
    RagasMaliciousness,
    RagasCoherence,
    RagasConciseness
)
from athina.loaders import Loader
from athina.keys import AthinaApiKey, OpenAiApiKey
import pandas as pd


from dotenv import load_dotenv
load_dotenv()

OpenAiApiKey.set_key(os.getenv('OPENAI_API_KEY'))
AthinaApiKey.set_key(os.getenv('ATHINA_API_KEY'))

In [None]:
raw_data = [
    {
        "query": "Who founded Tesla",
        "context": [
            "Tesla is an automative manufacturer.",
            "Tesla was founded by Elon Musk in 2003 and is headquartered in Palo Alto, California.",
            "Tesla makes electric cars.",
        ],
        "response": "Tesla is an electric car company",
    },
    {
        "query": "Where is France and what is it's capital?",
        "context": ["France is the country in europe known for delicious cuisine", "Paris is the capital of france"],
        "response": "France is in western Europe and Paris is its capital",
    },
]

dataset_raw_data = Loader().load_dict(raw_data)
pd.DataFrame(dataset_raw_data)

In [None]:
eval_model = "gpt-3.5-turbo"
RagasAnswerRelevancy(model=eval_model).run_batch(data=dataset_raw_data).to_df()

In [None]:
data = {
        "query": "Where is France and what is its capital?",
        "context": [
            "France is a country in Europe known for delicious cuisine",
            "The capital of France is Paris.", 
            "French fries were not invented in France."
        ],
        "response": "Paris is the capital of France",
    }
eval_model = "gpt-3.5-turbo"
RagasAnswerRelevancy(model=eval_model).run(**data).to_df()

In [None]:
raw_data_ragas_with_expected_response = [
    {
        "query": "Where is France and what is its capital?",
        "context": [
            "France is a country in Europe known for delicious cuisine",
            "The capital of France is Paris.", 
            "French fries were not invented in France."
        ],
        "response": "Paris is the capital of France",
        "expected_response": "France is in europe. Paris is it's capital"
    },
    {
        "query": "What is Tesla? Who founded it?",
        "context": [
            "Tesla is an electric car company.", 
            "Tesla is registered in United States", 
            "Elon Musk founded Tesla"
        ],
        "response": "Tesla is an electric car company",
        "expected_response": "Tesla is an electric car company, founded by Elon Musk."
    },
]
ragas_dataset_with_expected_response = Loader().load_dict(raw_data_ragas_with_expected_response)
pd.DataFrame(ragas_dataset_with_expected_response)

In [None]:
eval_model = "gpt-3.5-turbo"
RagasContextPrecision(model=eval_model).run_batch(data=ragas_dataset_with_expected_response).to_df()

In [None]:
eval_model = "gpt-3.5-turbo"
RagasContextRelevancy(model=eval_model).run_batch(data=ragas_dataset_with_expected_response).to_df()

In [None]:
eval_model = "gpt-3.5-turbo"
RagasFaithfulness(model=eval_model).run_batch(data=ragas_dataset_with_expected_response).to_df()

In [None]:
eval_model = "gpt-3.5-turbo"
RagasContextRecall(model=eval_model).run_batch(data=ragas_dataset_with_expected_response).to_df()

In [None]:
eval_model = "gpt-3.5-turbo"
RagasAnswerSemanticSimilarity(model=eval_model).run_batch(data=ragas_dataset_with_expected_response).to_df()

In [None]:
eval_model = "gpt-3.5-turbo"
RagasAnswerCorrectness(model=eval_model).run_batch(data=ragas_dataset_with_expected_response).to_df()

In [None]:
eval_model = "gpt-3.5-turbo"
RagasHarmfulness(model=eval_model).run_batch(data=ragas_dataset_with_expected_response).to_df()

In [None]:
eval_model = "gpt-3.5-turbo"
RagasMaliciousness(model=eval_model).run_batch(data=ragas_dataset_with_expected_response).to_df()

In [None]:
eval_model = "gpt-3.5-turbo"
RagasCoherence(model=eval_model).run_batch(data=ragas_dataset_with_expected_response).to_df()

In [None]:
eval_model = "gpt-3.5-turbo"
RagasConciseness(model=eval_model).run_batch(data=ragas_dataset_with_expected_response).to_df()

In [None]:
# Create batch dataset from list of dict objects
raw_data = [
    {
        "query": "What is the capital of Greece?",
        "context": ["Greece is often called the cradle of Western civilization."],
        "response": "Athens",
    },
    {
        "query": "What is the price of a Tesla Model 3?",
        "context": ["Tesla Model 3 is a fully electric car."],
        "response": "I cannot answer this question as prices vary from country to country.",
    },
    {
        "query": "What is a shooting star?",
        "context": ["Black holes are stars that have collapsed under their own gravity. They are so dense that nothing can escape their gravitational pull, not even light."],
        "response": "A shooting star is a meteor that burns up in the atmosphere.",
    }
]

dataset = Loader().load_dict(raw_data)
pd.DataFrame(dataset)

### You can run our function based evaluators as follows

In [None]:
# Imports
from athina.evals import ContainsAny, Regex
from athina.loaders import TextLoader

In [None]:
# Load dataset
raw_data = [ 
    { 
        "text": "I cannot answer this question as prices vary from country to country.",
    },
    {
        "text": "A shooting star is a meteor that burns up in the atmosphere.",
    }
]
dataset = TextLoader().load_dict(raw_data)
pd.DataFrame(dataset)

In [None]:
# Eval checks if the response contains any of the keywords
ContainsAny(keywords=["star"]).run_batch(data=dataset).to_df()


In [None]:
# Load dataset
raw_data = [ 
    { 
        "text": "I cannot answer this question as prices vary from country to country.",
    },
    {
        "text": "Contact us at hello@athina.ai to get access to our LLM observability platform where you can run the tests you've defined here against your LLM responses in production.",
    }
]
dataset = TextLoader().load_dict(raw_data)
pd.DataFrame(dataset)

In [None]:
# Eval checks if the response matches the regex
Regex(regex='([a-zA-Z0-9._-]+@[a-zA-Z0-9._-]+\.[a-zA-Z0-9_-]+)').run_batch(data=dataset).to_df()

In [None]:
from athina.evals import ContainsNone

# Example data
raw_data = [
    {
        "text": "This text does not contain the specified keyword.",
    },
    {
        "text": "This is a text without any specified search word.",
    }
]

# Load data into dataset
dataset = TextLoader().load_dict(raw_data)
ContainsNone(keywords=["keyword"]).run_batch(data=dataset).to_df()

In [None]:
from athina.evals import Contains

# Example data
raw_data = [
    {
        "text": "The keyword YC present in this text.",
    },
    {
        "text": "This text does not contain the specified word.",
    }
]

# Load data into dataset
dataset = TextLoader().load_dict(raw_data)
Contains(keyword="YC").run_batch(data=dataset).to_df()


In [None]:
from athina.evals import ContainsAll

# Example data
raw_data = [
    {"text": "This text contains both keyword1 and keyword2."},
    {"text": "This text does not contain all specified keywords."},
]

# Load data into dataset
dataset = TextLoader().load_dict(raw_data)
ContainsAll(keywords=["keyword1", "keyword2"]).run_batch(data=dataset).to_df()

In [None]:
from athina.evals import ContainsJson

# Example data
raw_data = [
    {"text": '{"key": "value"}'},
    {"text": '{"invalid : "json"}'},
]

# Load data into dataset
dataset = TextLoader().load_dict(raw_data)
ContainsJson().run_batch(data=dataset).to_df()

In [None]:
from athina.evals import ContainsEmail

# Example data
raw_data = [
    {"text": "Contact us at contact@example.com."},
    {"text": "This text does not contain any email address."},
]

# Load data into dataset
dataset = TextLoader().load_dict(raw_data)
ContainsEmail().run_batch(data=dataset).to_df()

In [None]:
from athina.evals import IsJson

# Example data
raw_data = [
    {"text": '{"key": "value"}'},
    {"text": 'invalid_json'},
]

# Load data into dataset
dataset = TextLoader().load_dict(raw_data)
IsJson().run_batch(data=dataset).to_df()

In [None]:
from athina.evals import IsEmail

# Example data
raw_data = [
    {"text": "john.doe@example.com"},
    {"text": "invalid.email"},
]

# Load data into dataset
dataset = TextLoader().load_dict(raw_data)
IsEmail().run_batch(data=dataset).to_df()

In [None]:
from athina.evals import ContainsLink

# Example data
raw_data = [
    {"text": "For more information, visit https://example.com."},
    {"text": "This text does not contain any link."},
]

# Load data into dataset
dataset = TextLoader().load_dict(raw_data)
ContainsLink().run_batch(data=dataset).to_df()

In [None]:
from athina.evals import ContainsValidLink

# Example data
raw_data = [
    {"text": "Visit our official website at http://example.com."},
    {"text": "Visit our official website at https://exampleasdf.com"},
    {"text": "This text does not contain any valid link."},
]

# Load data into dataset
dataset = TextLoader().load_dict(raw_data)
ContainsValidLink().run_batch(data=dataset).to_df()

In [None]:
from athina.evals import NoInvalidLinks

# Example data
raw_data = [
    {"text": "Visit our website at https://example.com."},
    {"text": "Visit our official website at https://exampleasdf.com"},
    {"text": "This text does not contain any valid link."},
]

# Load data into dataset
dataset = TextLoader().load_dict(raw_data)

# Example calls
NoInvalidLinks().run_batch(data=dataset).to_df()
NoInvalidLinks().run_batch(data=dataset).to_df()


In [None]:
from athina.evals import ApiCall
from athina.loaders import ResponseLoader

# API call to your own API based evaluator. Raw data must contain response and optionally the query, context and expected_response
raw_data = [
    {
        "response": "Response to be sent to the your own API based evaluator",
        "query": "Query to be sent to the your own API based evaluator"
    }
]

# Load data into dataset
dataset = ResponseLoader().load_dict(raw_data)
ApiCall(url="https://8e714940905f4022b43267e348b8a713.api.mockbin.io/", payload={"evaluator": "custom_api_based_evaluator"}, headers={"Authorization": "Bearer token"}).run_batch(data=dataset).to_df()


In [None]:
from athina.evals import Equals
from athina.loaders import TextLoader

# Example data
raw_data = [
    {"text": "This is the expected response"},
    {"text": "This is an unexpected response"},
]

# Load data into dataset
dataset = TextLoader().load_dict(raw_data)
Equals(expected_text="This is the expected response").run_batch(data=dataset).to_df()

In [None]:
from athina.evals import StartsWith

# Example data
raw_data = [
    {"text": "The text starts with this substring."},
    {"text": "This text does not start with the specified substring."},
]

# Load data into dataset
dataset = TextLoader().load_dict(raw_data)
StartsWith(substring="The text starts with").run_batch(data=dataset).to_df()

In [None]:
from athina.evals import EndsWith

# Example data
raw_data = [
    {"text": "The text ends with this substring."},
    {"text": "This text does not end with the specified substring."},
]

# Load data into dataset
dataset = TextLoader().load_dict(raw_data)
EndsWith(substring="with this substring.").run_batch(data=dataset).to_df()

In [None]:
from athina.evals import LengthLessThan

# Example data
raw_data = [
    {"text": "Short text"},
    {"text": "This is a longer text."},
]

# Load data into dataset
dataset = TextLoader().load_dict(raw_data)
LengthLessThan(max_length=20).run_batch(data=dataset).to_df()

In [None]:
from athina.evals import LengthGreaterThan

# Example data
raw_data = [
    {"text": "Short text"},
    {"text": "This is a longer text."},
]

# Load data into dataset
dataset = TextLoader().load_dict(raw_data)
LengthGreaterThan(min_length=20).run_batch(data=dataset).to_df()

In [None]:
from athina.evals import LengthBetween

# Example data
data = [
    {"text": "Short text"},
    {"text": "This is a long text."},
    {"text": "This is a very long text. The Great Barrier Reef is the world's largest coral reef system composed of over 2,900 individual reefs and 900 islands stretching for over 2,300 kilometers."}
]
 
LengthBetween(min_length=20, max_length=50).run_batch(data=data).to_df()

In [None]:
from athina.evals import OneLine

# Example data
data = [
    {"text": "This is a single line text."},
    {"text": "The Great Barrier Reef is the world's largest coral reef system.\n It is composed of over 2,900 individual reefs and 900 islands stretching for over 2,300 kilometers."}
]
 
OneLine().run_batch(data=data).to_df()