In [2]:
import sys

sys.path.insert(0, '..')

import docs

github_data = docs.read_github_data()
parsed_data = docs.parse_data(github_data)

file_index = {d['filename']: d['content'] for d in parsed_data}


In [6]:
parsed_data[10]

{'title': 'Output formats',
 'description': 'How to export the evaluation results.',
 'content': 'You can view or export Reports in multiple formats.\n\n**Pre-requisites**:\n\n* You know how to [generate Reports](/docs/library/report).\n\n## Log to Workspace\n\nYou can save the computed Report in Evidently Cloud or your local workspace.\n\n```python\nws.add_run(project.id, my_eval, include_data=False)\n```\n\n<Info>\n  **Uploading evals**. Check Quickstart examples [for ML](/quickstart_ml) or [for LLM](/quickstart_llm) for a full workflow.\n</Info>\n\n## View in Jupyter notebook\n\nYou can directly render the visual summary of evaluation results in interactive Python environments like Jupyter notebook or Colab.\n\nAfter running the Report, simply call the resulting Python object:\n\n```python\nmy_report\n```\n\nThis will render the HTML object directly in the notebook cell.\n\n## HTML\n\nYou can also save this interactive visual Report as an HTML file to open in a browser:\n\n```python

In [7]:
import pickle

with open('eval-run-v2-2025-10-29-20-15.bin', 'rb') as f_in:
    rows = pickle.load(f_in)

In [9]:
rows[10]

{'question': 'install Evidently for LLM',
 'answer': '# Installing Evidently for LLM\n\n## Installation Steps\n\n1. **Create an Account:** If you haven\'t done so, sign up for a free account at [Evidently Cloud](https://app.evidently.cloud/signup). After logging in, create an organization.\n\n2. **Install the Evidently Python Package:** You can install the Evidently library using pip. For general installation:\n   ```bash\n   pip install evidently\n   ```\n   If you are specifically working with large language models (LLMs), use the following command to install additional dependencies:\n   ```bash\n   pip install evidently[llm]\n   ```\n\n3. **Connect to the Cloud Workspace:** Once installed, you need to import the CloudWorkspace from the Evidently library and provide your API token to connect:\n   ```python\n   from evidently.ui.workspace import CloudWorkspace\n   ws = CloudWorkspace(\ntoken="API_KEY",\n   url="https://app.evidently.cloud")\n   ```\n   Alternatively, set the environme

In [10]:
import pandas as pd

df_evals = pd.DataFrame(rows)

df_evals['filename'] = df_evals.original_question.apply(lambda x: x['filename'])
df_evals['reference'] = df_evals.filename.apply(file_index.get)

In [12]:
df_evals.head()

Unnamed: 0,question,answer,messages,num_tool_calls,original_question,original_result,filename,reference
0,SemanticSimilarity descriptor explained,# Understanding SemanticSimilarity Descriptor ...,"[{'kind': 'user-prompt', 'content': 'SemanticS...",5,{'question': 'SemanticSimilarity descriptor ex...,AgentRunResult(output=SearchResultArticle(foun...,metrics/all_descriptors.mdx,"<Info>\n For an intro, read about [Core Conce..."
1,using is_critical parameter in alerts,# Using `is_critical` Parameter in Evidently A...,"[{'kind': 'user-prompt', 'content': 'using is_...",4,{'question': 'using is_critical parameter in a...,AgentRunResult(output=SearchResultArticle(foun...,docs/platform/alerts.mdx,<Check>\n Built-in alerting is a Pro feature ...
2,Quickstart Evidently setup,# Quickstart Setup for Evidently\n\n## Creatin...,"[{'kind': 'user-prompt', 'content': 'Quickstar...",5,"{'question': 'Quickstart Evidently setup', 'su...",AgentRunResult(output=SearchResultArticle(foun...,docs/library/overview.mdx,The Evidently Python library is an open-source...
3,Drift detection for numerical columns,# Drift Detection for Numerical Columns in Evi...,"[{'kind': 'user-prompt', 'content': 'Drift det...",5,{'question': 'Drift detection for numerical co...,AgentRunResult(output=SearchResultArticle(foun...,metrics/customize_data_drift.mdx,All Metrics and Presets that evaluate shift in...
4,non-letter character percentage function,# Non-letter Character Percentage Function in ...,"[{'kind': 'user-prompt', 'content': 'non-lette...",5,{'question': 'non-letter character percentage ...,AgentRunResult(output=SearchResultArticle(foun...,metrics/all_descriptors.mdx,"<Info>\n For an intro, read about [Core Conce..."


In [13]:
# Configure LLM evaluation
# evidently 

from evidently import Dataset, DataDefinition
from evidently.descriptors import LLMEval
from evidently.llm.templates import MulticlassClassificationPromptTemplate

  TextMatch(text_column="description", match_items=r"\b\d{3}-\d{3}-\d{4}\b", match_type="regex")


In [14]:
matcher = MulticlassClassificationPromptTemplate(
    pre_messages=[
        ("system", "You are a judge that evaluates the factual alignment of two chatbot answers.")
    ],
    criteria="""
    You are given a question, a new answer and a reference answer. 
    Classify the new answer based on how it compares to the reference.
    ===
    Question: {question}
    Reference: {reference}
    """,
    category_criteria={
        "match": "The answer matches the reference in all factual and semantic details.",
        "partial_match": "The answer is correct in what it says but leaves out details from the reference.",
        "mismatch": "The answer doesn't match the reference answer.",
        "not_available": "The answer says that information is not available.",
    },
    uncertainty="unknown",
    include_reasoning=True,
    include_scores=False
)

In [15]:
# Create evalution dataset
# # Create an Evidently Dataset with LLM evaluation descriptor. This will evaluate each answer against its reference using "gpt-4o-mini":

eval_dataset = Dataset.from_pandas(
    data=df_evals,
    data_definition=DataDefinition(),
    descriptors=[
        LLMEval(
            column_name="answer",
            additional_columns={"question": "question", "reference": "reference"},
            template=matcher,
            provider="openai",
            model="gpt-4o-mini",
            alias="eval"
        )
    ]
)


In [16]:
df_eval_result = eval_dataset.as_dataframe()

In [23]:
# Analyze evaluation results

print(df_eval_result.iloc[10]['eval reasoning'])


The new answer provides a valid installation process for Evidently, similar to the reference, but it focuses more on connecting to Evidently Cloud and suggests using an additional `[llm]` dependency, which is not explicitly mentioned in the reference. Therefore, while it is mostly aligned, it lacks some details present in the reference.


In [21]:
df_eval_result.iloc[10]['question']


'install Evidently for LLM'

In [24]:
print(df_eval_result.iloc[10]['answer'])

# Installing Evidently for LLM

## Installation Steps

1. **Create an Account:** If you haven't done so, sign up for a free account at [Evidently Cloud](https://app.evidently.cloud/signup). After logging in, create an organization.

2. **Install the Evidently Python Package:** You can install the Evidently library using pip. For general installation:
   ```bash
   pip install evidently
   ```
   If you are specifically working with large language models (LLMs), use the following command to install additional dependencies:
   ```bash
   pip install evidently[llm]
   ```

3. **Connect to the Cloud Workspace:** Once installed, you need to import the CloudWorkspace from the Evidently library and provide your API token to connect:
   ```python
   from evidently.ui.workspace import CloudWorkspace
   ws = CloudWorkspace(
token="API_KEY",
   url="https://app.evidently.cloud")
   ```
   Alternatively, set the environment variable `EVIDENTLY_API_KEY` to provide your token.

### References
- [Evi

In [None]:
# create report

from evidently import Report
from evidently.presets import TextEvals

In [25]:
report = Report([
    TextEvals()
])

my_eval = report.run(eval_dataset, None)