# Installation and imports

In [None]:
!pip install evidently

In [None]:
import pandas as pd
import numpy as np

from evidently import Dataset
from evidently import DataDefinition
from evidently import Report
from evidently.descriptors import *

from evidently import BinaryClassification

from evidently.presets import TextEvals, ValueStats, ClassificationPreset
from evidently.metrics import *

from evidently.llm.templates import BinaryClassificationPromptTemplate

In [None]:
# import os
# os.environ["OPENAI_API_KEY"] = "YOUR_KEY"

# Prepare the dataset

Labeled dataset to evaluate the LLM judge on:

In [None]:
data = [
    ["Hi there, how do I reset my password?",
     "To reset your password, click on 'Forgot Password' on the login page and follow the instructions sent to your registered email.",
     "To change your password, select 'Forgot Password' on the login screen and follow the steps sent to your registered email address. If you don't receive the email, check your spam folder or contact support for assistance.",
     "incorrect", "adds new infromation (contact support)"],

    ["Where can I find my transaction history?",
     "You can view your transaction history by logging into your account and navigating to the 'Transaction History' section. Here, you can see all your past transactions. You can also filter the transactions by date or type for easier viewing.",
     "Log into your account and go to 'Transaction History' to see all your past transactions. In this section, you can view and filter your transactions by date or type. This allows you to find specific transactions quickly and easily.",
     "correct", ""],

    ["How do I add another user to my account?",
     "I am afraid it is not currently possible to add multiple users to the account. Our system supports only one user per account for security reasons. We recommend creating separate accounts for different users.",
     "To add a secondary user, go to 'Account Settings', select 'Manage Users', and enter the details of the person you want to add. You can set permissions for their access, deciding what they can and cannot do within the account.",
     "incorrect", "contradiction (incorrect answer)"],

    ["Is it possible to link multiple bank accounts?",
     "Yes, you can link multiple bank accounts by going to 'Account Settings' in the menu and selecting 'Add Bank Account'. Follow the prompts to add your bank account details. Make sure to verify each bank account by following the verification process.",
     "You can add multiple bank accounts by visiting 'Accounts' in the menu and choosing 'Add Bank Account'. Enter your bank details as prompted and complete the verification process for each account to link them successfully.",
     "incorrect", "contradiction (incorrect menu item)"],

    ["Can I use your service for cryptocurrency transactions?",
     "Currently, our service does not support cryptocurrency transactions. Please check our website for updates regarding this feature. We are continuously working to expand our services and may include this in the future.",
     "Currently, our service does not support cryptocurrency transactions. Please check our website for updates regarding this feature. We are continuously working to expand our services and may include this in the future.",
     "correct", ""],

    ["Hi, can I get a detailed report of my monthly transactions?",
     "Yes, you can generate a detailed monthly report of your transactions by logging into your account, going to 'Transaction History', and selecting 'Generate Report'. You can customize the report by selecting specific dates or transaction types.",
     "You can get a detailed monthly report by logging into your account, navigating to 'Transaction History', and clicking on 'Generate Report'. Customize your report by choosing the date range and types of transactions you want to include.",
     "correct", ""],

    ["I am traveling to the US. Can I use the app there?",
     "Yes, you can use the app in the US just like you do at home. Ensure you have an internet connection. You may also want to update your app to the latest version before traveling for optimal performance.",
     "The app will work in the US without any issues. Just make sure you have access to the internet. For the best experience, update your app to the latest version before you travel.",
     "correct", ""],

    ["How do I link my payment account to a new mobile number?",
     "To link a new mobile number, log in to your account, go to 'Account Settings', select 'Mobile Number', and follow the instructions to verify your new number. You will need to enter the new number and verify it via a code sent to your phone.",
     "To add a new number, navigate to the 'Account Settings' section, select 'Mobile Number' and proceed with the steps to add and confirm the new number. Enter the new mobile number and verify it using the code sent to your phone.",
     "correct", ""],

    ["Can I receive notifications for transactions in real-time?",
     "Yes, you can enable real-time notifications for transactions by going to 'Account Settings', then 'Notifications', and turning on 'Transaction Alerts'. You can choose to receive alerts via SMS, email, or push notifications on your mobile device.",
     "To receive real-time notifications for transactions, log into your account, go to 'Account Settings', select 'Notifications', and enable 'Transaction Alerts'. Choose your preferred notification method between email or push notifications.",
     "incorrect", "omits information (sms notification)"],

    ["Hey, can I set up automatic transfers to my savings account?",
     "Yes, you can set up automatic transfers by going to 'Account Settings', selecting 'Automatic Transfers', and specifying the amount and frequency. You can choose to transfer weekly, bi-weekly, or monthly. Make sure to save the settings to activate the transfers.",
     "You can arrange automatic transfers by going to 'Account Settings', choosing 'Automatic Transfers', and setting the desired amount and frequency. Don't forget to save the changes to enable the automatic transfers.",
     "incorrect", "omits information (limited frequency of transfers available)"],

    ["Hi there, how do I reset my password?",
     "To reset your password, click on 'Forgot Password' on the login page and follow the instructions sent to your registered email.",
     "To change your password, select 'Forgot Password' on the login screen and follow the steps sent to your registered email address. If you don't receive the email, check your spam folder.",
     "correct", ""],

    ["How can I update my billing address?",
     "To update your billing address, log into your account, go to 'Account Settings', select 'Billing Information', and enter your new address. Make sure to save the changes once you are done.",
     "To update your billing address, log into your account, navigate to 'Account Settings', and select 'Billing Information'. Enter your new address and ensure all fields are filled out correctly. Save the changes, and you will receive a confirmation email with the updated address details.",
     "incorrect", "adds new information (confirmation email)"],

    ["How do I contact customer support?",
     "You can contact customer support by logging into your account, going to the 'Help' section, and selecting 'Contact Us'. You can choose to reach us via email, phone, or live chat for immediate assistance.",
     "To contact customer support, log into your account and go to the 'Help' section. Select 'Contact Us' and choose your preferred method: email, phone, or live chat. Our support team is available 24/7 to assist you with any issues. Additionally, you can find a FAQ section that may answer your questions without needing to contact support.",
     "incorrect", "adds new information (24/7 availability, FAQ section)"],

    ["What should I do if my card is lost or stolen?",
     "If your card is lost or stolen, immediately log into your account, go to 'Card Management', and select 'Report Lost/Stolen'. Follow the instructions to block your card and request a replacement. You can also contact our support team for assistance.",
     "If your card is lost or stolen, navigate to 'Card Management' in your account, and select 'Report Lost/Stolen'. Follow the prompts to block your card and request a replacement. Additionally, you can contact our support team for help.",
     "correct", ""],

    ["How do I enable two-factor authentication (2FA)?",
     "To enable two-factor authentication, log into your account, go to 'Security Settings', and select 'Enable 2FA'. Follow the instructions to link your account with a 2FA app like Google Authenticator. Once set up, you will need to enter a code from the app each time you log in.",
     "To enable two-factor authentication, log into your account, navigate to 'Security Settings', and choose 'Enable 2FA'. Follow the on-screen instructions to link your account with a 2FA app such as Google Authenticator. After setup, each login will require a code from the app. Additionally, you can set up backup codes in case you lose access to the 2FA app.",
     "incorrect", "adds new information (backup codes)"]
]

columns = ["question", "target_response", "new_response", "label", "comment"]

golden_dataset = pd.DataFrame(data, columns=columns)

# Preview the dataset

In [None]:
pd.set_option('display.max_colwidth', None)
golden_dataset.head(5)

In [None]:
definition = DataDefinition(
    text_columns=["question", "target_response", "new_response"],
    categorical_columns=["label"]
    )

In [None]:
eval_dataset = Dataset.from_pandas(
    pd.DataFrame(golden_dataset),
    data_definition=definition)

Preview the distribution of classes:

In [None]:
report = Report([
  ValueStats(column="label")
])

my_eval = report.run(eval_dataset, None)
my_eval

# my_eval.dict()
# my_eval.json()

# Design the LLM judge

Judge to compare correctness of the response to reference.

In [None]:
correctness = BinaryClassificationPromptTemplate(
        criteria = """An ANSWER is correct when it is the same as the REFERENCE in all facts and details, even if worded differently.
        The ANSWER is incorrect if it contradicts the REFERENCE, adds additional claims, omits or changes details.
        REFERENCE:
        =====
        {target_response}
        =====""",
        target_category="incorrect",
        non_target_category="correct",
        uncertainty="unknown",
        include_reasoning=True,
        pre_messages=[("system", "You are an expert evaluator. You will be given an ANSWER and REFERENCE")],
        )

In [None]:
eval_dataset.add_descriptors(descriptors=[
    LLMEval("new_response",
            template=correctness,
            provider = "openai",
            model = "gpt-4o-mini",
            alias="Correctness",
            additional_columns={"target_response": "target_response"}),
    ])

See the raw scores with explanation:

In [None]:
eval_dataset.as_dataframe()

Optional: add a column "match" so that you can sort by mismatches easily.

In [None]:
eval_dataset.add_descriptors(descriptors=[
    ExactMatch(columns=["label", "Correctness"], alias="Judge_match")])

In [None]:
eval_dataset.as_dataframe()

In [None]:
report = Report([
    TextEvals()
])

my_eval = report.run(eval_dataset, None)
my_eval

# Evaluate the LLM judge quality

New dataset with data definition

In [None]:
df=eval_dataset.as_dataframe()

In [None]:
definition_2 = DataDefinition(
    classification=[BinaryClassification(
        target="label",
        prediction_labels="Correctness",
        pos_label = "incorrect")],
    categorical_columns=["label", "Correctness"])

In [None]:
class_dataset = Dataset.from_pandas(
    pd.DataFrame(df),
    data_definition=definition_2)

In [None]:
report = Report([
    ClassificationPreset()
])

my_eval = report.run(class_dataset, None)
my_eval

# Verbosity eval

In [None]:
verbosity = BinaryClassificationPromptTemplate(
        criteria = """Conciseness refers to the quality of being brief and to the point, while still providing all necessary information.
            A concise response should:
            - Provide the necessary information without unnecessary details or repetition.
            - Be brief yet comprehensive enough to address the query.
            - Use simple and direct language to convey the message effectively.""",
        target_category="concise",
        non_target_category="verbose",
        uncertainty="unknown",
        include_reasoning=True,
        pre_messages=[("system", "You are an expert text evaluator. You will be given a text of the response to a user question.")],
        )

In [None]:
eval_dataset.add_descriptors(descriptors=[
    LLMEval("new_response",
            template=verbosity,
            provider = "openai",
            model = "gpt-4o-mini",
            alias="Verbosity")
    ])

In [None]:
report = Report([
    TextEvals()
])

my_eval = report.run(eval_dataset, None)
my_eval

In [None]:
eval_dataset.as_dataframe()

# Upload to Evidently Cloud

In [None]:
from evidently.ui.workspace import CloudWorkspace

In [None]:
# ws = CloudWorkspace(token="YOUR_API_TOKEN", url="https://app.evidently.cloud")

In [None]:
# project = ws.create_project("LLM judge evals", org_id="YOUR_TEAM_ID")
# project.description = "My project description"
# project.save()

In [None]:
# ws.add_run(project.id, my_eval, include_data=True)