# Evidently Metric Presets

In [1]:
import pandas as pd
import numpy as np

from sklearn import datasets, ensemble, model_selection

from evidently import ColumnMapping
from evidently.report import Report
from evidently.metric_preset import DataDriftPreset
from evidently.metric_preset import DataQualityPreset
from evidently.metric_preset import RegressionPreset
from evidently.metric_preset import ClassificationPreset
from evidently.metric_preset import TargetDriftPreset

  @numba.jit()
  @numba.jit()
  @numba.jit()
  @numba.jit()


## Prepare Datasets

In [2]:
#Dataset for Data Quality and Integrity
adult_data = datasets.fetch_openml(name='adult', version=2, as_frame='auto')
adult = adult_data.frame

adult_ref = adult[~adult.education.isin(['Some-college', 'HS-grad', 'Bachelors'])]
adult_cur = adult[adult.education.isin(['Some-college', 'HS-grad', 'Bachelors'])]

adult_cur.iloc[:2000, 3:5] = np.nan

  warn(


In [3]:
#Dataset for regression
housing_data = datasets.fetch_california_housing(as_frame='auto')
housing = housing_data.frame

housing.rename(columns={'MedHouseVal': 'target'}, inplace=True)
housing['prediction'] = housing_data['target'].values + np.random.normal(0, 3, housing.shape[0])

housing_ref = housing.sample(n=5000, replace=False)
housing_cur = housing.sample(n=5000, replace=False)

In [4]:
#Dataset for Binary Probabilistic Classifcation
bcancer_data = datasets.load_breast_cancer(as_frame='auto')
bcancer = bcancer_data.frame

bcancer_ref = bcancer.sample(n=300, replace=False)
bcancer_cur = bcancer.sample(n=200, replace=False)

bcancer_label_ref = bcancer_ref.copy(deep=True)
bcancer_label_cur = bcancer_cur.copy(deep=True)

model = ensemble.RandomForestClassifier(random_state=1, n_estimators=10)
model.fit(bcancer_ref[bcancer_data.feature_names.tolist()], bcancer_ref.target)

bcancer_ref['prediction'] = model.predict_proba(bcancer_ref[bcancer_data.feature_names.tolist()])[:, 1]
bcancer_cur['prediction'] = model.predict_proba(bcancer_cur[bcancer_data.feature_names.tolist()])[:, 1]

bcancer_label_ref['prediction'] = model.predict(bcancer_label_ref[bcancer_data.feature_names.tolist()])
bcancer_label_cur['prediction'] = model.predict(bcancer_label_cur[bcancer_data.feature_names.tolist()])

In [5]:
#Dataset for multiclass classifcation
iris_data = datasets.load_iris(as_frame='auto')
iris = iris_data.frame

iris_ref = iris.sample(n=150, replace=False)
iris_cur = iris.sample(n=150, replace=False)

model = ensemble.RandomForestClassifier(random_state=1, n_estimators=3)
model.fit(iris_ref[iris_data.feature_names], iris_ref.target)

iris_ref['prediction'] = model.predict(iris_ref[iris_data.feature_names])
iris_cur['prediction'] = model.predict(iris_cur[iris_data.feature_names])

## How to run Reports (ex. Dashboard)?

In [6]:
data_drift_report = Report(metrics=[
    DataDriftPreset(num_stattest='ks', cat_stattest='psi', num_stattest_threshold=0.2, cat_stattest_threshold=0.2),
])

data_drift_report.run(reference_data=adult_ref, current_data=adult_cur)
data_drift_report
#data_drift_report.save_html('data_drift_report.html')

## How to create a json Report (ex. Profile)?

In [7]:
data_drift_report.json()
#data_drift_report.save_json('data_drift_report.json')

'{"version": "0.3.3", "timestamp": "2023-06-27 08:26:53.729895", "metrics": [{"metric": "DatasetDriftMetric", "result": {"drift_share": 0.5, "number_of_columns": 15, "number_of_drifted_columns": 6, "share_of_drifted_columns": 0.4, "dataset_drift": false}}, {"metric": "DataDriftTable", "result": {"number_of_columns": 15, "number_of_drifted_columns": 6, "share_of_drifted_columns": 0.4, "dataset_drift": false, "drift_by_columns": {"age": {"column_name": "age", "column_type": "num", "stattest_name": "K-S p_value", "stattest_threshold": 0.2, "drift_score": 2.704668436775831e-60, "drift_detected": true, "current": {"small_distribution": {"x": [17.0, 24.3, 31.6, 38.9, 46.2, 53.5, 60.8, 68.1, 75.4, 82.7, 90.0], "y": [0.02471021672878118, 0.025839691234843417, 0.0262859521410848, 0.025211766596857754, 0.015942967066340047, 0.010173168977679455, 0.0061528716099474344, 0.0018640278561586543, 0.000568686464590777, 0.0002369526935794904]}}, "reference": {"small_distribution": {"x": [17.0, 24.3, 31.

## How to get a python object with Report's main data?

In [8]:
data_drift_report.as_dict()

{'metrics': [{'metric': 'DatasetDriftMetric',
   'result': {'drift_share': 0.5,
    'number_of_columns': 15,
    'number_of_drifted_columns': 6,
    'share_of_drifted_columns': 0.4,
    'dataset_drift': False}},
  {'metric': 'DataDriftTable',
   'result': {'number_of_columns': 15,
    'number_of_drifted_columns': 6,
    'share_of_drifted_columns': 0.4,
    'dataset_drift': False,
    'drift_by_columns': {'age': {'column_name': 'age',
      'column_type': 'num',
      'stattest_name': 'K-S p_value',
      'stattest_threshold': 0.2,
      'drift_score': 2.704668436775831e-60,
      'drift_detected': True,
      'current': {'small_distribution': {'x': [17.0,
         24.3,
         31.6,
         38.9,
         46.2,
         53.5,
         60.8,
         68.1,
         75.4,
         82.7,
         90.0],
        'y': [0.02471021672878118,
         0.025839691234843417,
         0.0262859521410848,
         0.025211766596857754,
         0.015942967066340047,
         0.01017316897767945

## What Reports are avaliable?

In [9]:
data_quality_report = Report(metrics=[
    DataQualityPreset(),
])

data_quality_report.run(reference_data=adult_ref, current_data=adult_cur)
data_quality_report

In [10]:
regression_performance_report = Report(metrics=[
    RegressionPreset(),
])

regression_performance_report.run(reference_data=housing_ref.sort_index(), current_data=housing_cur.sort_index())
regression_performance_report

In [11]:
classification_performance_report = Report(metrics=[
    ClassificationPreset(probas_threshold=0.7),
])

classification_performance_report.run(reference_data=bcancer_ref, current_data=bcancer_cur)

classification_performance_report


In [12]:
num_target_drift_report = Report(metrics=[
    TargetDriftPreset(num_stattest='ks', cat_stattest='psi'),
])

num_target_drift_report.run(reference_data=housing_ref, current_data=housing_cur)
num_target_drift_report