In [1]:
import os
from athina.evals import (
    DoesResponseAnswerQuery,
    ContextContainsEnoughInformation,
    Faithfulness,
    RagasContextRelevancy,
    RagasAnswerRelevancy,
    RagasContextPrecision,
    RagasFaithfulness,
    RagasContextRecall,
    RagasAnswerSemanticSimilarity,
    RagasAnswerCorrectness,
    RagasHarmfulness,
    RagasMaliciousness,
    RagasCoherence,
    RagasConciseness
)
from athina.loaders import Loader
from athina.keys import AthinaApiKey, OpenAiApiKey
import pandas as pd


from dotenv import load_dotenv
load_dotenv()

OpenAiApiKey.set_key(os.getenv('OPENAI_API_KEY'))
AthinaApiKey.set_key(os.getenv('ATHINA_API_KEY'))

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
raw_data = [
    {
        "query": "Who founded Tesla",
        "context": [
            "Tesla is an automative manufacturer.",
            "Tesla was founded by Elon Musk in 2003 and is headquartered in Palo Alto, California.",
            "Tesla makes electric cars.",
        ],
        "response": "Tesla is an electric car company",
    },
    {
        "query": "Where is France and what is it's capital?",
        "context": ["France is the country in europe known for delicious cuisine", "Paris is the capital of france"],
        "response": "France is in western Europe and Paris is its capital",
    },
]

dataset_raw_data = Loader().load_dict(raw_data)
pd.DataFrame(dataset_raw_data)

Unnamed: 0,query,context,response,expected_response
0,Who founded Tesla,"[Tesla is an automative manufacturer., Tesla w...",Tesla is an electric car company,
1,Where is France and what is it's capital?,[France is the country in europe known for del...,France is in western Europe and Paris is its c...,


In [3]:
eval_model = "gpt-3.5-turbo"
RagasAnswerRelevancy(model=eval_model).run_batch(data=dataset_raw_data).to_df()

evaluating with [answer_relevancy]
evaluating with [answer_relevancy]


100%|██████████| 1/1 [00:02<00:00,  2.20s/it]
100%|██████████| 1/1 [00:03<00:00,  3.26s/it]


Unnamed: 0,query,context,response,expected_response,display_name,failed,grade_reason,runtime,model,ragas_answer_relevancy
0,Who founded Tesla,"[Tesla is an automative manufacturer., Tesla was founded by Elon Musk in 2003 and is headquartered in Palo Alto, California., Tesla makes electric cars.]",Tesla is an electric car company,,Ragas Answer Relevancy,,"A response is deemed relevant when it directly and appropriately addresses the original query. Importantly, our assessment of answer relevance does not consider factuality but instead penalizes cases where the response lacks completeness or contains redundant details",3469,gpt-3.5-turbo,0.820244
1,Where is France and what is it's capital?,"[France is the country in europe known for delicious cuisine, Paris is the capital of france]",France is in western Europe and Paris is its capital,,Ragas Answer Relevancy,,"A response is deemed relevant when it directly and appropriately addresses the original query. Importantly, our assessment of answer relevance does not consider factuality but instead penalizes cases where the response lacks completeness or contains redundant details",2510,gpt-3.5-turbo,0.975397


In [4]:
data = {
        "query": "Where is France and what is its capital?",
        "context": [
            "France is a country in Europe known for delicious cuisine",
            "The capital of France is Paris.", 
            "French fries were not invented in France."
        ],
        "response": "Paris is the capital of France",
    }
eval_model = "gpt-3.5-turbo"
RagasAnswerRelevancy(model=eval_model).run(**data).to_df()

evaluating with [answer_relevancy]


100%|██████████| 1/1 [00:00<00:00,  1.03it/s]


Unnamed: 0,query,context,response,display_name,failed,grade_reason,runtime,model,ragas_answer_relevancy
0,Where is France and what is its capital?,"[France is a country in Europe known for delicious cuisine, The capital of France is Paris., French fries were not invented in France.]",Paris is the capital of France,Ragas Answer Relevancy,,"A response is deemed relevant when it directly and appropriately addresses the original query. Importantly, our assessment of answer relevance does not consider factuality but instead penalizes cases where the response lacks completeness or contains redundant details",1093,gpt-3.5-turbo,0.946599


In [5]:
raw_data_ragas_with_expected_response = [
    {
        "query": "Where is France and what is its capital?",
        "context": [
            "France is a country in Europe known for delicious cuisine",
            "The capital of France is Paris.", 
            "French fries were not invented in France."
        ],
        "response": "Paris is the capital of France",
        "expected_response": "France is in europe. Paris is it's capital"
    },
    {
        "query": "What is Tesla? Who founded it?",
        "context": [
            "Tesla is an electric car company.", 
            "Tesla is registered in United States", 
            "Elon Musk founded Tesla"
        ],
        "response": "Tesla is an electric car company",
        "expected_response": "Tesla is an electric car company, founded by Elon Musk."
    },
]
ragas_dataset_with_expected_response = Loader().load_dict(raw_data_ragas_with_expected_response)
pd.DataFrame(ragas_dataset_with_expected_response)

Unnamed: 0,query,context,response,expected_response
0,Where is France and what is its capital?,"[France is a country in Europe known for delicious cuisine, The capital of France is Paris., French fries were not invented in France.]",Paris is the capital of France,France is in europe. Paris is it's capital
1,What is Tesla? Who founded it?,"[Tesla is an electric car company., Tesla is registered in United States, Elon Musk founded Tesla]",Tesla is an electric car company,"Tesla is an electric car company, founded by Elon Musk."


In [6]:
eval_model = "gpt-3.5-turbo"
RagasContextPrecision(model=eval_model).run_batch(data=ragas_dataset_with_expected_response).to_df()

evaluating with [context_precision]evaluating with [context_precision]



100%|██████████| 1/1 [00:00<00:00,  1.01it/s]
100%|██████████| 1/1 [00:01<00:00,  1.89s/it]


Unnamed: 0,query,context,response,expected_response,display_name,failed,grade_reason,runtime,model,ragas_context_precision
0,Where is France and what is its capital?,"[France is a country in Europe known for delicious cuisine, The capital of France is Paris., French fries were not invented in France.]",Paris is the capital of France,France is in europe. Paris is it's capital,Ragas Context Precision,,This metric evaluates whether all of the ground-truth relevant items present in the context are ranked higher or not. Ideally all the relevant chunks must appear at the top ranks,1124,gpt-3.5-turbo,0.5
1,What is Tesla? Who founded it?,"[Tesla is an electric car company., Tesla is registered in United States, Elon Musk founded Tesla]",Tesla is an electric car company,"Tesla is an electric car company, founded by Elon Musk.",Ragas Context Precision,,This metric evaluates whether all of the ground-truth relevant items present in the context are ranked higher or not. Ideally all the relevant chunks must appear at the top ranks,2027,gpt-3.5-turbo,0.333333


In [7]:
eval_model = "gpt-3.5-turbo"
RagasContextRelevancy(model=eval_model).run_batch(data=ragas_dataset_with_expected_response).to_df()

evaluating with [context_relevancy]
evaluating with [context_relevancy]


100%|██████████| 1/1 [00:00<00:00,  1.36it/s]
100%|██████████| 1/1 [00:01<00:00,  1.96s/it]


Unnamed: 0,query,context,response,expected_response,display_name,failed,grade_reason,runtime,model,ragas_context_relevancy
0,Where is France and what is its capital?,"[France is a country in Europe known for delicious cuisine, The capital of France is Paris., French fries were not invented in France.]",Paris is the capital of France,France is in europe. Paris is it's capital,Ragas Context Relevancy,,This metric is calulated by dividing the number of sentences in context that are relevant for answering the given query by the total number of sentences in the retrieved context,867,gpt-3.5-turbo,0.666667
1,What is Tesla? Who founded it?,"[Tesla is an electric car company., Tesla is registered in United States, Elon Musk founded Tesla]",Tesla is an electric car company,"Tesla is an electric car company, founded by Elon Musk.",Ragas Context Relevancy,,This metric is calulated by dividing the number of sentences in context that are relevant for answering the given query by the total number of sentences in the retrieved context,2104,gpt-3.5-turbo,0.333333


In [8]:
eval_model = "gpt-3.5-turbo"
RagasFaithfulness(model=eval_model).run_batch(data=ragas_dataset_with_expected_response).to_df()

evaluating with [faithfulness]


  0%|          | 0/1 [00:00<?, ?it/s]

evaluating with [faithfulness]


100%|██████████| 1/1 [00:03<00:00,  3.42s/it]
100%|██████████| 1/1 [00:04<00:00,  4.12s/it]


Unnamed: 0,query,context,response,expected_response,display_name,failed,grade_reason,runtime,model,ragas_faithfulness
0,Where is France and what is its capital?,"[France is a country in Europe known for delicious cuisine, The capital of France is Paris., French fries were not invented in France.]",Paris is the capital of France,France is in europe. Paris is it's capital,Ragas Faithfulness,,The generated answer is regarded as faithful if all the claims that are made in the answer can be inferred from the given context. To calculate this a set of claims from the generated answer is first identified. Then each one of these claims are cross checked with given context to determine if it can be inferred from given context or not,3615,gpt-3.5-turbo,1.0
1,What is Tesla? Who founded it?,"[Tesla is an electric car company., Tesla is registered in United States, Elon Musk founded Tesla]",Tesla is an electric car company,"Tesla is an electric car company, founded by Elon Musk.",Ragas Faithfulness,,The generated answer is regarded as faithful if all the claims that are made in the answer can be inferred from the given context. To calculate this a set of claims from the generated answer is first identified. Then each one of these claims are cross checked with given context to determine if it can be inferred from given context or not,4267,gpt-3.5-turbo,1.0


In [9]:
eval_model = "gpt-3.5-turbo"
RagasContextRecall(model=eval_model).run_batch(data=ragas_dataset_with_expected_response).to_df()

evaluating with [context_recall]
evaluating with [context_recall]


100%|██████████| 1/1 [00:01<00:00,  1.85s/it]
100%|██████████| 1/1 [00:01<00:00,  1.94s/it]


Unnamed: 0,query,context,response,expected_response,display_name,failed,grade_reason,runtime,model,ragas_context_recall
0,Where is France and what is its capital?,"[France is a country in Europe known for delicious cuisine, The capital of France is Paris., French fries were not invented in France.]",Paris is the capital of France,France is in europe. Paris is it's capital,Ragas Context Recall,,Context Recall metric is calculated by dividing the number of sentences in the ground truth that can be attributed to retrieved context by the total number of sentences in the grouund truth,2086,gpt-3.5-turbo,1.0
1,What is Tesla? Who founded it?,"[Tesla is an electric car company., Tesla is registered in United States, Elon Musk founded Tesla]",Tesla is an electric car company,"Tesla is an electric car company, founded by Elon Musk.",Ragas Context Recall,,Context Recall metric is calculated by dividing the number of sentences in the ground truth that can be attributed to retrieved context by the total number of sentences in the grouund truth,1993,gpt-3.5-turbo,1.0


In [10]:
eval_model = "gpt-3.5-turbo"
RagasAnswerSemanticSimilarity(model=eval_model).run_batch(data=ragas_dataset_with_expected_response).to_df()

evaluating with [answer_similarity]
evaluating with [answer_similarity]


100%|██████████| 1/1 [00:00<00:00,  3.98it/s]
100%|██████████| 1/1 [00:00<00:00,  3.85it/s]


Unnamed: 0,query,context,response,expected_response,display_name,failed,grade_reason,runtime,model,ragas_answer_semantic_similarity
0,Where is France and what is its capital?,"[France is a country in Europe known for delicious cuisine, The capital of France is Paris., French fries were not invented in France.]",Paris is the capital of France,France is in europe. Paris is it's capital,Ragas Answer Semantic Similarity,,"Answer Semantic Similarity pertains to the assessment of the semantic resemblance between the generated response and the ground truth. This evaluation is based on the ground truth and the response, with values falling within the range of 0 to 1. A higher score signifies a better alignment between the generated response and the ground truth",413,gpt-3.5-turbo,0.937311
1,What is Tesla? Who founded it?,"[Tesla is an electric car company., Tesla is registered in United States, Elon Musk founded Tesla]",Tesla is an electric car company,"Tesla is an electric car company, founded by Elon Musk.",Ragas Answer Semantic Similarity,,"Answer Semantic Similarity pertains to the assessment of the semantic resemblance between the generated response and the ground truth. This evaluation is based on the ground truth and the response, with values falling within the range of 0 to 1. A higher score signifies a better alignment between the generated response and the ground truth",467,gpt-3.5-turbo,0.955073


In [11]:
eval_model = "gpt-3.5-turbo"
RagasAnswerCorrectness(model=eval_model).run_batch(data=ragas_dataset_with_expected_response).to_df()

evaluating with [answer_correctness]
evaluating with [answer_correctness]


100%|██████████| 1/1 [00:01<00:00,  1.61s/it]
100%|██████████| 1/1 [00:02<00:00,  2.04s/it]


Unnamed: 0,query,context,response,expected_response,display_name,failed,grade_reason,runtime,model,ragas_answer_correctness
0,Where is France and what is its capital?,"[France is a country in Europe known for delicious cuisine, The capital of France is Paris., French fries were not invented in France.]",Paris is the capital of France,France is in europe. Paris is it's capital,Ragas Answer Correctness,,"Answer correctness encompasses two critical aspects: semantic similarity between the generated answer and the ground truth, as well as factual similarity. These aspects are combined using a weighted scheme to formulate the answer correctness score",2190,gpt-3.5-turbo,0.734328
1,What is Tesla? Who founded it?,"[Tesla is an electric car company., Tesla is registered in United States, Elon Musk founded Tesla]",Tesla is an electric car company,"Tesla is an electric car company, founded by Elon Musk.",Ragas Answer Correctness,,"Answer correctness encompasses two critical aspects: semantic similarity between the generated answer and the ground truth, as well as factual similarity. These aspects are combined using a weighted scheme to formulate the answer correctness score",1756,gpt-3.5-turbo,0.738768


In [12]:
eval_model = "gpt-3.5-turbo"
RagasHarmfulness(model=eval_model).run_batch(data=ragas_dataset_with_expected_response).to_df()

evaluating with [harmfulness]


  0%|          | 0/1 [00:00<?, ?it/s]

evaluating with [harmfulness]


100%|██████████| 1/1 [00:00<00:00,  1.08it/s]
100%|██████████| 1/1 [00:01<00:00,  1.29s/it]


Unnamed: 0,query,context,response,expected_response,display_name,failed,grade_reason,runtime,model,ragas_harmfulness
0,Where is France and what is its capital?,"[France is a country in Europe known for delicious cuisine, The capital of France is Paris., French fries were not invented in France.]",Paris is the capital of France,France is in europe. Paris is it's capital,Ragas Harmfulness,,"This is calculated by how much potential generated response has to cause harm to individuals, groups, or society at large",1495,gpt-3.5-turbo,0.0
1,What is Tesla? Who founded it?,"[Tesla is an electric car company., Tesla is registered in United States, Elon Musk founded Tesla]",Tesla is an electric car company,"Tesla is an electric car company, founded by Elon Musk.",Ragas Harmfulness,,"This is calculated by how much potential generated response has to cause harm to individuals, groups, or society at large",1085,gpt-3.5-turbo,0.0


In [13]:
eval_model = "gpt-3.5-turbo"
RagasMaliciousness(model=eval_model).run_batch(data=ragas_dataset_with_expected_response).to_df()

evaluating with [maliciousness]
evaluating with [maliciousness]


100%|██████████| 1/1 [00:01<00:00,  1.35s/it]
100%|██████████| 1/1 [00:01<00:00,  1.37s/it]


Unnamed: 0,query,context,response,expected_response,display_name,failed,grade_reason,runtime,model,ragas_maliciousness
0,Where is France and what is its capital?,"[France is a country in Europe known for delicious cuisine, The capital of France is Paris., French fries were not invented in France.]",Paris is the capital of France,France is in europe. Paris is it's capital,Ragas Maliciousness,,"This is calculated by how much potential generated response has to harm, deceive, or exploit users",1551,gpt-3.5-turbo,0.0
1,What is Tesla? Who founded it?,"[Tesla is an electric car company., Tesla is registered in United States, Elon Musk founded Tesla]",Tesla is an electric car company,"Tesla is an electric car company, founded by Elon Musk.",Ragas Maliciousness,,"This is calculated by how much potential generated response has to harm, deceive, or exploit users",1611,gpt-3.5-turbo,0.0


In [14]:
eval_model = "gpt-3.5-turbo"
RagasCoherence(model=eval_model).run_batch(data=ragas_dataset_with_expected_response).to_df()

evaluating with [coherence]
evaluating with [coherence]


100%|██████████| 1/1 [00:00<00:00,  1.28it/s]
100%|██████████| 1/1 [00:00<00:00,  1.10it/s]


Unnamed: 0,query,context,response,expected_response,display_name,failed,grade_reason,runtime,model,ragas_coherence
0,Where is France and what is its capital?,"[France is a country in Europe known for delicious cuisine, The capital of France is Paris., French fries were not invented in France.]",Paris is the capital of France,France is in europe. Paris is it's capital,Ragas Coherence,,"This is calculated by how coherent is the generated llm response and how able it is able to present ideas, information, or arguments in a logical and organized manner",912,gpt-3.5-turbo,1.0
1,What is Tesla? Who founded it?,"[Tesla is an electric car company., Tesla is registered in United States, Elon Musk founded Tesla]",Tesla is an electric car company,"Tesla is an electric car company, founded by Elon Musk.",Ragas Coherence,,"This is calculated by how coherent is the generated llm response and how able it is able to present ideas, information, or arguments in a logical and organized manner",1066,gpt-3.5-turbo,1.0


In [15]:
eval_model = "gpt-3.5-turbo"
RagasConciseness(model=eval_model).run_batch(data=ragas_dataset_with_expected_response).to_df()

evaluating with [conciseness]
evaluating with [conciseness]


100%|██████████| 1/1 [00:00<00:00,  1.02it/s]
100%|██████████| 1/1 [00:01<00:00,  1.45s/it]


Unnamed: 0,query,context,response,expected_response,display_name,failed,grade_reason,runtime,model,ragas_conciseness
0,Where is France and what is its capital?,"[France is a country in Europe known for delicious cuisine, The capital of France is Paris., French fries were not invented in France.]",Paris is the capital of France,France is in europe. Paris is it's capital,Ragas Conciseness,,"This is calculated by how efficiently generated llm response conveys information or ideas clearly and efficiently, without unnecessary or redundant details",1139,gpt-3.5-turbo,1.0
1,What is Tesla? Who founded it?,"[Tesla is an electric car company., Tesla is registered in United States, Elon Musk founded Tesla]",Tesla is an electric car company,"Tesla is an electric car company, founded by Elon Musk.",Ragas Conciseness,,"This is calculated by how efficiently generated llm response conveys information or ideas clearly and efficiently, without unnecessary or redundant details",1613,gpt-3.5-turbo,0.0


In [16]:
# Create batch dataset from list of dict objects
raw_data = [
    {
        "query": "What is the capital of Greece?",
        "context": ["Greece is often called the cradle of Western civilization."],
        "response": "Athens",
    },
    {
        "query": "What is the price of a Tesla Model 3?",
        "context": ["Tesla Model 3 is a fully electric car."],
        "response": "I cannot answer this question as prices vary from country to country.",
    },
    {
        "query": "What is a shooting star?",
        "context": ["Black holes are stars that have collapsed under their own gravity. They are so dense that nothing can escape their gravitational pull, not even light."],
        "response": "A shooting star is a meteor that burns up in the atmosphere.",
    }
]

dataset = Loader().load_dict(raw_data)
pd.DataFrame(dataset)

Unnamed: 0,query,context,response,expected_response
0,What is the capital of Greece?,[Greece is often called the cradle of Western civilization.],Athens,
1,What is the price of a Tesla Model 3?,[Tesla Model 3 is a fully electric car.],I cannot answer this question as prices vary from country to country.,
2,What is a shooting star?,"[Black holes are stars that have collapsed under their own gravity. They are so dense that nothing can escape their gravitational pull, not even light.]",A shooting star is a meteor that burns up in the atmosphere.,


### You can run our function based evaluators as follows

In [2]:
# Imports
from athina.evals import ContainsAny, Regex
from athina.loaders import TextLoader

In [18]:
# Load dataset
raw_data = [ 
    { 
        "text": "I cannot answer this question as prices vary from country to country.",
    },
    {
        "text": "A shooting star is a meteor that burns up in the atmosphere.",
    }
]
dataset = TextLoader().load_dict(raw_data)
pd.DataFrame(dataset)

Unnamed: 0,text
0,I cannot answer this question as prices vary from country to country.
1,A shooting star is a meteor that burns up in the atmosphere.


In [19]:
# Eval checks if the response contains any of the keywords
ContainsAny(keywords=["star"]).run_batch(data=dataset).to_df()


Unnamed: 0,text,display_name,failed,grade_reason,runtime,model,passed
0,I cannot answer this question as prices vary from country to country.,ContainsAny,True,No keywords found in output,0,,0.0
1,A shooting star is a meteor that burns up in the atmosphere.,ContainsAny,False,One or more keywords were found in output: star,0,,1.0


In [20]:
# Load dataset
raw_data = [ 
    { 
        "text": "I cannot answer this question as prices vary from country to country.",
    },
    {
        "text": "Contact us at hello@athina.ai to get access to our LLM observability platform where you can run the tests you've defined here against your LLM responses in production.",
    }
]
dataset = TextLoader().load_dict(raw_data)
pd.DataFrame(dataset)

Unnamed: 0,text
0,I cannot answer this question as prices vary from country to country.
1,Contact us at hello@athina.ai to get access to our LLM observability platform where you can run the tests you've defined here against your LLM responses in production.


In [21]:
# Eval checks if the response matches the regex
Regex(regex='([a-zA-Z0-9._-]+@[a-zA-Z0-9._-]+\.[a-zA-Z0-9_-]+)').run_batch(data=dataset).to_df()

Unnamed: 0,text,display_name,failed,grade_reason,runtime,model,passed
0,I cannot answer this question as prices vary from country to country.,Regex,True,regex pattern ([a-zA-Z0-9._-]+@[a-zA-Z0-9._-]+\.[a-zA-Z0-9_-]+) not found in output,0,,0.0
1,Contact us at hello@athina.ai to get access to our LLM observability platform where you can run the tests you've defined here against your LLM responses in production.,Regex,False,regex pattern ([a-zA-Z0-9._-]+@[a-zA-Z0-9._-]+\.[a-zA-Z0-9_-]+) found in output,0,,1.0


In [22]:
from athina.evals import ContainsNone

# Example data
raw_data = [
    {
        "text": "This text does not contain the specified keyword.",
    },
    {
        "text": "This is a text without any specified search word.",
    }
]

# Load data into dataset
dataset = TextLoader().load_dict(raw_data)
ContainsNone(keywords=["keyword"]).run_batch(data=dataset).to_df()

Unnamed: 0,text,display_name,failed,grade_reason,runtime,model,passed
0,This text does not contain the specified keyword.,ContainsNone,True,One or more keywords were found in output: keyword,0,,0.0
1,This is a text without any specified search word.,ContainsNone,False,No keywords found in output,0,,1.0


In [23]:
from athina.evals import Contains

# Example data
raw_data = [
    {
        "text": "The keyword YC present in this text.",
    },
    {
        "text": "This text does not contain the specified word.",
    }
]

# Load data into dataset
dataset = TextLoader().load_dict(raw_data)
Contains(keyword="YC").run_batch(data=dataset).to_df()


Unnamed: 0,text,display_name,failed,grade_reason,runtime,model,passed
0,The keyword YC present in this text.,Contains,False,keyword yc found in output,0,,1.0
1,This text does not contain the specified word.,Contains,True,keyword not found in output: yc,0,,0.0


In [24]:
from athina.evals import ContainsAll

# Example data
raw_data = [
    {"text": "This text contains both keyword1 and keyword2."},
    {"text": "This text does not contain all specified keywords."},
]

# Load data into dataset
dataset = TextLoader().load_dict(raw_data)
ContainsAll(keywords=["keyword1", "keyword2"]).run_batch(data=dataset).to_df()

Unnamed: 0,text,display_name,failed,grade_reason,runtime,model,passed
0,This text contains both keyword1 and keyword2.,ContainsAll,False,2/2 keywords found in output,0,,1.0
1,This text does not contain all specified keywords.,ContainsAll,True,"keywords not found in output: keyword1, keyword2",0,,0.0


In [25]:
from athina.evals import ContainsJson

# Example data
raw_data = [
    {"text": '{"key": "value"}'},
    {"text": '{"invalid : "json"}'},
]

# Load data into dataset
dataset = TextLoader().load_dict(raw_data)
ContainsJson().run_batch(data=dataset).to_df()

Unnamed: 0,text,display_name,failed,grade_reason,runtime,model,passed
0,"{""key"": ""value""}",ContainsJson,False,Output contains JSON,0,,1.0
1,"{""invalid : ""json""}",ContainsJson,True,Output contains a potential JSON but it is invalid,0,,0.0


In [26]:
from athina.evals import ContainsEmail

# Example data
raw_data = [
    {"text": "Contact us at contact@example.com."},
    {"text": "This text does not contain any email address."},
]

# Load data into dataset
dataset = TextLoader().load_dict(raw_data)
ContainsEmail().run_batch(data=dataset).to_df()

Unnamed: 0,text,display_name,failed,grade_reason,runtime,model,passed
0,Contact us at contact@example.com.,ContainsEmail,False,regex pattern [a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+ found in output,0,,1.0
1,This text does not contain any email address.,ContainsEmail,True,regex pattern [a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+ not found in output,0,,0.0


In [3]:
from athina.evals import IsJson

# Example data
raw_data = [
    {"text": '{"key": "value"}'},
    {"text": 'invalid_json'},
]

# Load data into dataset
dataset = TextLoader().load_dict(raw_data)
IsJson().run_batch(data=dataset).to_df()

Unnamed: 0,text,display_name,failed,grade_reason,runtime,model,passed
0,"{""key"": ""value""}",IsJson,False,Output contains JSON,0,,1.0
1,invalid_json,IsJson,True,Output does not contain JSON,0,,0.0


In [28]:
from athina.evals import IsEmail

# Example data
raw_data = [
    {"text": "john.doe@example.com"},
    {"text": "invalid.email"},
]

# Load data into dataset
dataset = TextLoader().load_dict(raw_data)
IsEmail().run_batch(data=dataset).to_df()

Unnamed: 0,text,display_name,failed,grade_reason,runtime,model,passed
0,john.doe@example.com,IsEmail,False,regex pattern ^[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+$ found in output,0,,1.0
1,invalid.email,IsEmail,True,regex pattern ^[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+$ not found in output,0,,0.0


In [29]:
from athina.evals import ContainsLink

# Example data
raw_data = [
    {"text": "For more information, visit https://example.com."},
    {"text": "This text does not contain any link."},
]

# Load data into dataset
dataset = TextLoader().load_dict(raw_data)
ContainsLink().run_batch(data=dataset).to_df()

Unnamed: 0,text,display_name,failed,grade_reason,runtime,model,passed
0,"For more information, visit https://example.com.",ContainsLink,False,Link found in output,0,,1.0
1,This text does not contain any link.,ContainsLink,True,No link found in output,0,,0.0


In [30]:
from athina.evals import ContainsValidLink

# Example data
raw_data = [
    {"text": "Visit our official website at http://example.com."},
    {"text": "Visit our official website at https://exampleasdf.com"},
    {"text": "This text does not contain any valid link."},
]

# Load data into dataset
dataset = TextLoader().load_dict(raw_data)
ContainsValidLink().run_batch(data=dataset).to_df()

Unnamed: 0,text,display_name,failed,grade_reason,runtime,model,passed
0,Visit our official website at http://example.com.,ContainsValidLink,False,link http://example.com. found in output and is valid,52,,1.0
1,Visit our official website at https://exampleasdf.com,ContainsValidLink,True,link https://exampleasdf.com found in output but is invalid,57,,0.0
2,This text does not contain any valid link.,ContainsValidLink,True,no link found in output,0,,0.0


In [31]:
from athina.evals import NoInvalidLinks

# Example data
raw_data = [
    {"text": "Visit our website at https://example.com."},
    {"text": "Visit our official website at https://exampleasdf.com"},
    {"text": "This text does not contain any valid link."},
]

# Load data into dataset
dataset = TextLoader().load_dict(raw_data)

# Example calls
NoInvalidLinks().run_batch(data=dataset).to_df()
NoInvalidLinks().run_batch(data=dataset).to_df()


Unnamed: 0,text,display_name,failed,grade_reason,runtime,model,passed
0,Visit our website at https://example.com.,NoInvalidLinks,False,link https://example.com. found in output and is valid,62,,1.0
1,Visit our official website at https://exampleasdf.com,NoInvalidLinks,True,link https://exampleasdf.com found in output but is invalid,3,,0.0
2,This text does not contain any valid link.,NoInvalidLinks,False,no invalid link found in output,0,,1.0


In [32]:
from athina.evals import ApiCall
from athina.loaders import ResponseLoader

# API call to your own API based evaluator. Raw data must contain response and optionally the query, context and expected_response
raw_data = [
    {
        "response": "Response to be sent to the your own API based evaluator",
        "query": "Query to be sent to the your own API based evaluator"
    }
]

# Load data into dataset
dataset = ResponseLoader().load_dict(raw_data)
ApiCall(url="https://8e714940905f4022b43267e348b8a713.api.mockbin.io/", payload={"evaluator": "custom_api_based_evaluator"}, headers={"Authorization": "Bearer token"}).run_batch(data=dataset).to_df()


Unnamed: 0,response,query,display_name,failed,grade_reason,runtime,model,passed
0,Response to be sent to the your own API based evaluator,Query to be sent to the your own API based evaluator,ApiCall,False,Reason sent by your API based evaluator,641,,1.0


In [2]:
from athina.evals import Equals
from athina.loaders import TextLoader

# Example data
raw_data = [
    {"text": "This is the expected response"},
    {"text": "This is an unexpected response"},
]

# Load data into dataset
dataset = TextLoader().load_dict(raw_data)
Equals(expected_text="This is the expected response").run_batch(data=dataset).to_df()

Unnamed: 0,text,display_name,failed,grade_reason,runtime,model,passed
0,This is the expected response,Equals,False,✅ output exactly matches expected text,0,,1.0
1,This is an unexpected response,Equals,True,output does not exactly match expected text,0,,0.0


In [6]:
from athina.evals import StartsWith

# Example data
raw_data = [
    {"text": "The text starts with this substring."},
    {"text": "This text does not start with the specified substring."},
]

# Load data into dataset
dataset = TextLoader().load_dict(raw_data)
StartsWith(substring="The text starts with").run_batch(data=dataset).to_df()

Unnamed: 0,text,display_name,failed,grade_reason,runtime,model,passed
0,The text starts with this substring.,StartsWith,False,output starts with the text starts with,0,,1.0
1,This text does not start with the specified substring.,StartsWith,True,output does not start with the text starts with,0,,0.0


In [7]:
from athina.evals import EndsWith

# Example data
raw_data = [
    {"text": "The text ends with this substring."},
    {"text": "This text does not end with the specified substring."},
]

# Load data into dataset
dataset = TextLoader().load_dict(raw_data)
EndsWith(substring="with this substring.").run_batch(data=dataset).to_df()

Unnamed: 0,text,display_name,failed,grade_reason,runtime,model,passed
0,The text ends with this substring.,EndsWith,False,output ends with with this substring.,0,,1.0
1,This text does not end with the specified substring.,EndsWith,True,output does not end with with this substring.,0,,0.0


In [8]:
from athina.evals import LengthLessThan

# Example data
raw_data = [
    {"text": "Short text"},
    {"text": "This is a longer text."},
]

# Load data into dataset
dataset = TextLoader().load_dict(raw_data)
LengthLessThan(max_length=20).run_batch(data=dataset).to_df()

Unnamed: 0,text,display_name,failed,grade_reason,runtime,model,passed
0,Short text,LengthLessThan,False,output length is less than 20 characters,0,,1.0
1,This is a longer text.,LengthLessThan,True,output length is greater than 20 characters,0,,0.0


In [9]:
from athina.evals import LengthGreaterThan

# Example data
raw_data = [
    {"text": "Short text"},
    {"text": "This is a longer text."},
]

# Load data into dataset
dataset = TextLoader().load_dict(raw_data)
LengthGreaterThan(min_length=20).run_batch(data=dataset).to_df()

Unnamed: 0,text,display_name,failed,grade_reason,runtime,model,passed
0,Short text,LengthGreaterThan,True,output length is less than 20 characters,0,,0.0
1,This is a longer text.,LengthGreaterThan,False,output length is greater than 20 characters,0,,1.0
