In [1]:
from typing import Dict
from typing import Generator
from typing import Union

import pandas as pd

from evidently import ColumnType
from evidently.future.datasets import DataDefinition

from evidently.future.datasets import DatasetColumn

from evidently.future.datasets import Descriptor
from evidently.future.metric_types import Metric
from evidently.future.datasets import Dataset
from evidently.future.metric_types import MetricResult
from evidently.future.metric_types import MetricTestResult
from evidently.future.metric_types import SingleValue
from evidently.future.metric_types import SingleValueTest
from evidently.future.metric_types import MetricCalculation
from evidently.future.metric_types import MetricId
from evidently.future.metric_types import SingleValueMetric
from evidently.future.metric_types import TResult
from evidently.future.preset_types import PresetResult

In [2]:
from typing import Optional


class TextLengthScorer(Descriptor):
    def __init__(self, column_name: str, alias: Optional[str] = None):
        super().__init__(alias or f"{column_name}: Text Length")
        self._column_name = column_name

    def generate_data(self, dataset: "Dataset") -> Union[DatasetColumn, Dict[str, DatasetColumn]]:
        lengths = dataset.column(self._column_name).data.apply(len)
        return DatasetColumn(type=ColumnType.Numerical, data=lengths)


In [3]:
class ToxicityScorer(Descriptor):
    def __init__(self, column_name: str, alias: Optional[str] = None):
        super().__init__(alias or f"{column_name}: Toxicity")
        self._column_name = column_name

    def generate_data(self, dataset: "Dataset") -> Union[DatasetColumn, Dict[str, DatasetColumn]]:
        from evidently.descriptors import ToxicityLLMEval
        from evidently.options.base import Options

        feature = ToxicityLLMEval().feature(self._column_name)
        data = feature.generate_features(dataset.as_dataframe(), None, Options())
        return {
            col: DatasetColumn(type=feature.get_type(f"{feature.get_fingerprint()}.{col}"), data=data[col])
            for col in data.columns
        }

In [4]:
def my_scorer(data: DatasetColumn) -> DatasetColumn:
    return DatasetColumn(type=data.type, data=data.data)

def my_scorer2(dataset: Dataset) -> Union[DatasetColumn, Dict[str, DatasetColumn]]:
    return {"c1": dataset.column("column_1"), "c2": dataset.column("column_2")}

In [5]:
from evidently.future.datasets import ColumnInfo
from evidently.future.descriptors import CustomColumnDescriptor
from evidently.future.descriptors import CustomDescriptor
from evidently.future.descriptors import TextLength

data = pd.DataFrame(data={"column_1": [1, 2, 3, 4, -1, 5], "column_2": ["a", "aa", "aaaa", "aaaaaaa", "a", "aa"]})

dataset = Dataset.from_pandas(
    data,
    data_definition=DataDefinition(
        numerical_columns=["column_1"],
        categorical_columns=["column_2"],
    ),
    descriptors=[
        TextLength("column_2", alias="column 2 length"),
        TextLength("column_2", alias="column 2 length"),
        # ToxicityScorer("column_2"),
        CustomColumnDescriptor("column_2", my_scorer, alias="column 2 custom function"),
        CustomDescriptor(my_scorer2, alias="global custom function"),
    ],
)

dataset.as_dataframe()

Unnamed: 0,column_1,column_2,column 2 length,column 2 length_1,column 2 custom function,global custom function.c1,global custom function.c2
0,1,a,1,1,a,1,a
1,2,aa,2,2,aa,2,aa
2,3,aaaa,4,4,aaaa,3,aaaa
3,4,aaaaaaa,7,7,aaaaaaa,4,aaaaaaa
4,-1,a,1,1,a,-1,a
5,5,aa,2,2,aa,5,aa


In [6]:
from typing import Optional
from typing import List
from plotly.express import line


class MyMaxMetric(SingleValueMetric):
    column: str


# implementation
class MaxMetricImplementation(MetricCalculation[SingleValue, MyMaxMetric]):
    def calculate(self, current_data: Dataset, reference_data: Optional[Dataset]) -> SingleValue:
        
        x = current_data.column(self.metric.column).data
        value = x.max()
        result = SingleValue(value=value)
        figure = line(x)
        figure.add_hrect(6, 10)
        #result.set_widget([plotly_figure(title=self.display_name(), figure=figure)])
        return result

    def display_name(self) -> str:
        return f"Max value for {self.metric.column}"

    def get_tests(self, value: TResult) -> Generator[MetricTestResult, None, None]:
        return
        yield


from evidently.future.report import Context
context = Context()

context.init_dataset(dataset, None)

result = MyMaxMetric(column="column_1").to_calculation().call(context)
result

In [7]:
from evidently.future.metrics.group_by import GroupBy
from evidently.future.metric_types import render_results

context = Context()

context.init_dataset(dataset, None)

metrics = GroupBy(MyMaxMetric(column="column 2 length"), "column_1").generate_metrics(context)

results = [metric.call(context) for metric in metrics]

render_results(results)

TypeError: Can't instantiate abstract class GroupByMetric with abstract method get_bound_tests

In [None]:
results[0].value

In [None]:
from evidently.future.preset_types import MetricPreset
from evidently.future.metrics import MinValue
from evidently.future.metrics import MaxValue

class ColumnSummary(MetricPreset):
    def __init__(self, column: str):
        self._column = column

    def metrics(self) -> List[Metric]:
        return [
            MinValue(column=self._column),
            MaxValue(column=self._column),
        ]
    
    def calculate(self, metric_results: Dict[MetricId, MetricResult]) -> PresetResult:
        return PresetResult(widget=[
            *metric_results[MinValue(column=self._column).get_metric_id()].widget,
            *metric_results[MaxValue(column=self._column).get_metric_id()].widget,
        ])


In [None]:
from evidently.future.report import Report
from evidently.future.tests import lte

report = Report([
    MyMaxMetric(column="column 2 length", tests=[lte(100), lte(3)]),
    MyMaxMetric(column="column_1", tests=[lte(100)]),
    MyMaxMetric(column="global custom function.c1", tests=[lte(100)]),
    ColumnSummary("column_1"),
    GroupBy(MyMaxMetric(column="column 2 length"), "column_1"),
])
snapshot = report.run(dataset, None)
snapshot