In [None]:
import pandas as pd
import numpy as np

In [None]:
from evidently import ColumnType
from evidently.future.report import Report
from evidently.future.datasets import BinaryClassification, Regression
from evidently.future.datasets import ColumnInfo
from evidently.future.datasets import DataDefinition
from evidently.future.descriptors import TextLength
import pandas as pd
from evidently.future.datasets import Dataset
from evidently.future.presets.classification import ClassificationQuality

data = pd.DataFrame(data={
    "column_1": [1, 2, 3, 4, -1, 5],
    "column_2": ["a", "aa", "aaaa", "aaaaaaa", None, "aa"],
    "text_column": ["a", "aa", "aaaa", "aaaaaaa", None, "aa"],
    "target": [1, 1, 0, 0, 1, 1],
    "prediction": [0.1, 0.2, 0.3, 0.4, 0.5, 0.6],
})

definition = DataDefinition(
    numerical_columns=["column_1"],
    categorical_columns=["column_2"],
    text_columns=["text_column"],
    classification=[BinaryClassification()],
    regression=[Regression()]
    )
dataset = Dataset.from_pandas(
    data,
    data_definition=definition,
    descriptors=[
        TextLength("column_2", alias="target2"),
        TextLength("column_2", alias="prediction2"),
    ],
)

dataset.as_dataframe()

In [None]:
dataset._data_definition

In [None]:
definition

In [None]:
from evidently.future.presets import DataSummaryPreset

num_rows = 20
np.random.seed(42)

# Generate numerical data with some missing values
num_col1 = np.random.randint(1, 100, num_rows).astype(float)
num_col2 = np.random.uniform(10, 500, num_rows)
num_col1[5] = np.nan  
num_col2[12] = np.nan  

# Generate categorical data with some missing values
cat_col1 = np.random.choice(['A', 'B', 'C'], num_rows)
cat_col2 = np.random.choice(['X', 'Y', 'Z'], num_rows)
cat_col1[3] = np.nan 
cat_col2[8] = np.nan 

# Generate text data with some missing values
text_col = np.random.choice(['Hello world', 'Test string', 'Sample text', 'Random text'], num_rows)
text_col[6] = np.nan 

# Generate datetime data with some missing values
date_col = pd.date_range(start='2025-01-01', periods=num_rows, freq='D')
date_col = date_col.to_series().astype("object")  # Convert to object to allow NaNs
date_col.iloc[10] = np.nan 

# Create DataFrame
df = pd.DataFrame({
    'Numerical_1': num_col1,
    'Numerical_2': num_col2,
    'Categorical_1': cat_col1,
    'Categorical_2': cat_col2,
    'Text': text_col,
    'Datetime': date_col.values, 
    'Datetime2': date_col.values,
    'Datetime3': date_col.values,
})

report = Report(
    [
        DataSummaryPreset()
    ]
)

snapshot = report.run(df, None)

In [None]:
df.info()

In [None]:
snapshot

In [None]:
from evidently.future.report import Context
from evidently.future.metrics import F1ByLabel
context = Context(None)

context.init_dataset(dataset, None)
metric_result = F1ByLabel(probas_threshold=0.4).call(context)
metric_result

In [None]:
from evidently.future.report import Report

from evidently.future.tests import lte

from evidently.future.metrics import F1Score
from evidently.future.metrics import Accuracy
from evidently.future.metrics import Precision
from evidently.future.metrics import Recall
from evidently.future.metrics import TPR
from evidently.future.metrics import TNR
from evidently.future.metrics import FPR
from evidently.future.metrics import FNR
from evidently.future.metrics import LogLoss
from evidently.future.metrics import RocAuc
from evidently.future.metrics import F1ByLabel
from evidently.future.metrics import PrecisionByLabel
from evidently.future.metrics import RecallByLabel
from evidently.future.metrics import RocAucByLabel
from evidently.future.metrics import DummyF1Score
from evidently.future.metrics import DummyPrecision
from evidently.future.metrics import DummyRecall

report = Report([
    F1Score(probas_threshold=0.4, conf_matrix=False),
    Accuracy(probas_threshold=0.4),
    Precision(probas_threshold=0.4, pr_curve=True, pr_table=True),
    Recall(probas_threshold=0.4),
    TPR(probas_threshold=0.4),
    TNR(probas_threshold=0.4),
    FPR(probas_threshold=0.4),
    FNR(probas_threshold=0.4),
    RocAuc(probas_threshold=0.4, roc_curve=False),
    LogLoss(probas_threshold=0.4, pr_table=True),
    F1ByLabel(probas_threshold=0.4, tests={0: [lte(0.2)]}),
    PrecisionByLabel(probas_threshold=0.4),
    PrecisionByLabel(probas_threshold=0.4),
    RecallByLabel(probas_threshold=0.4),
    RocAucByLabel(probas_threshold=0.4),
    DummyF1Score(probas_threshold=0.4),
    DummyPrecision(probas_threshold=0.4),
    DummyRecall(probas_threshold=0.4),
], include_tests=True)

snapshot = report.run(dataset, dataset)
snapshot


In [None]:
from evidently.future.report import Report

from evidently.future.presets import ClassificationDummyQuality
from evidently.future.presets import ClassificationQuality
from evidently.future.presets import ClassificationQualityByLabel

report = Report([
    ClassificationQuality(),
    ClassificationDummyQuality(),
    ClassificationQualityByLabel(),
], include_tests=True)

snapshot = report.run(dataset, dataset)
snapshot


In [None]:


from evidently.future.tests import Reference
from evidently.future.report import Report

from evidently.future.tests.numerical_tests import gte

from evidently.future.metrics.column_statistics import MinValue
from evidently.future.metrics.column_statistics import MaxValue
from evidently.future.metrics.column_statistics import MedianValue
from evidently.future.metrics.column_statistics import MeanValue
from evidently.future.metrics.column_statistics import StdValue
from evidently.future.metrics.column_statistics import QuantileValue

report = Report([
    MinValue(column="column_1", tests=[gte(0.2)]),
    MaxValue(column="column_1", tests=[gte(Reference(relative=0.1))]),
    MedianValue(column="column_1"),
    MeanValue(column="column_1"),
    StdValue(column="column_1"),
    QuantileValue(column="column_1"),
    QuantileValue(column="column_1", quantile=0.95),
], include_tests=True)

snapshot = report.run(dataset, dataset)
snapshot

In [None]:
from evidently.utils.types import ApproxValue
from evidently.future.report import Report

from evidently.future.metrics import CategoryCount
from evidently.future.metrics import InRangeValueCount
from evidently.future.metrics import OutRangeValueCount
from evidently.future.metrics import InListValueCount
from evidently.future.metrics import OutListValueCount
from evidently.future.metrics import MissingValueCount

from evidently.future.tests import eq
from evidently.future.tests import gt
from evidently.future.tests import gte
from evidently.future.tests import lt
from evidently.future.tests import lte

report = Report([
    CategoryCount(column="column_2", category="a", tests=[
        eq(1),
        lte(2),
        lte(Reference(relative=0.1)),
        lte(Reference(absolute=1)),
        gte(2),
        lt(1),
        gt(1),
    ], share_tests=[
        lte(0.5),
        eq(ApproxValue(0.19, absolute=0.015)),
    ]),
    
    CategoryCount(column="column_2", categories=["a", "aa"], tests=[lt(2)]),
    InRangeValueCount(column="column_1", left=1, right=3, count_tests=[lte(Reference(absolute=1))]),
    OutRangeValueCount(column="column_1", left=1, right=3),
    InListValueCount(column="column_2", values=["a", "aa"]),
    OutListValueCount(column="column_2", values=["a", "aa"]),
    MissingValueCount(column="column_2"),
])

snapshot = report.run(dataset, dataset)
snapshot

In [None]:
from evidently.future.report import Report
from evidently.future.presets import TextEvals

report = Report([
    TextEvals(),
], include_tests=True)

snapshot = report.run(dataset, dataset)
snapshot

In [None]:
from evidently.future.report import Report

from evidently.future.metrics import ColumnCount
from evidently.future.metrics import RowCount
from evidently.future.metrics import DuplicatedRowCount

report = Report([
    ColumnCount(),
    RowCount(),
    DuplicatedRowCount(),
], include_tests=True)

snapshot = report.run(dataset, dataset)
snapshot

In [None]:
from scipy.stats import anderson_ksamp
from evidently.calculations.stattests import register_stattest
from evidently.calculations.stattests import StatTest

from evidently.future.report import Report

from evidently.future.metrics import DriftedColumnsCount
from evidently.future.metrics import ValueDrift


def _addd(
    reference_data: pd.Series,
    current_data: pd.Series,
    feature_type: ColumnType,
    threshold: float,
):
    p_value = anderson_ksamp([reference_data.values, current_data.values])[2]
    return p_value, p_value < threshold


adt = StatTest(
    name="adt",
    display_name="Anderson-Darling",
    allowed_feature_types=[ColumnType.Numerical],
    default_threshold=0.1,
)

register_stattest(adt, default_impl=_addd)


report = Report([
    # ValueDrift(column="column_1"),
    ValueDrift(column="column_1", method="adt"),
    DriftedColumnsCount(),
], include_tests=True)

snapshot = report.run(dataset, dataset)
snapshot

In [None]:
from evidently.future.report import Report

from evidently.future.presets import DataDriftPreset

report = Report([
    DataDriftPreset(),
], include_tests=True)

snapshot = report.run(dataset, dataset)
snapshot

In [None]:
from evidently.future.report import Report

from evidently.future.presets.classification import ClassificationQualityByLabel

report = Report([
    ClassificationQualityByLabel(probas_threshold=0.4),
], include_tests=True)

snapshot = report.run(dataset, dataset)
snapshot

In [None]:
from evidently.future.report import Report

from evidently.future.presets import ValueStats

report = Report([
    ValueStats("column_1"),
    ValueStats("column_2"),
])

snapshot = report.run(dataset, dataset)
snapshot

In [None]:
from evidently.future.report import Report

from evidently.future.presets import DatasetStats

report = Report([
    DatasetStats(),
], include_tests=True)

snapshot = report.run(dataset, dataset)
snapshot

In [None]:
from evidently.future.report import Report

from evidently.future.metrics import MeanError
from evidently.future.metrics import MAE
from evidently.future.metrics import MAPE
from evidently.future.metrics import RMSE
from evidently.future.metrics import R2Score
from evidently.future.metrics import AbsMaxError

report = Report([
    MeanError(error_plot=True, error_distr=True, error_normality=True),
    MAE(error_plot=True, error_distr=True, error_normality=True),
    MAPE(error_distr=True),
    RMSE(error_distr=True, error_normality=True),
    R2Score(error_plot=True, error_distr=True, error_normality=True),
    AbsMaxError(error_distr=True, error_normality=True),
], include_tests=True)

snapshot = report.run(dataset, dataset)
snapshot

In [None]:
from evidently.future.report import Report

from evidently.future.metrics import DummyMAE
from evidently.future.metrics import DummyMAPE
from evidently.future.metrics import DummyRMSE

report = Report([
    DummyMAE(),
    DummyMAPE(),
    DummyRMSE(),
], include_tests=True)

snapshot = report.run(dataset, dataset)
snapshot

In [None]:
from evidently.future.report import Report

from evidently.future.presets import RegressionDummyQuality

report = Report([
    RegressionDummyQuality(),
], include_tests=True)

snapshot = report.run(dataset, dataset)
snapshot

In [None]:
snapshot.dict()

In [None]:
snapshot.json()

In [None]:
from evidently.future.presets.dataset_stats import ValueStatsTests
from evidently.future.presets import DataSummaryPreset

report = Report([
    DataSummaryPreset(row_count_tests=[gte(10)], column_tests={
        "column_1": ValueStatsTests(
            mean_tests=[lte(0.5)],
        ),
        "column_2": ValueStatsTests(
            unique_values_count_tests={"a": [lte(10)]}
        )
    })
])

snapshot = report.run(dataset, None)

snapshot.dict()['tests']

In [None]:
snapshot.dict()['tests']

In [None]:
from evidently.future.metrics import UniqueValueCount

In [None]:
?UniqueValueCount

In [None]:
report = Report([
   UniqueValueCount(column="column_2", 
                    tests={"aa":[lte(2)]}, 
                    share_tests={"aa":[lt(0.4)]})
])

snapshot = report.run(dataset, None)
snapshot

In [None]:
from evidently.future.generators import ColumnMetricGenerator

report = Report(metrics=[ColumnMetricGenerator(MaxValue, columns=["column_1"])])
report.run(dataset)