In [None]:
try:
    import evidently
except:
    !npm install -g yarn
    !pip install git+https://github.com/evidentlyai/evidently.git@test_suite_alpha

# Data Quality

In [None]:
import pandas as pd
import numpy as np

from evidently.test_suite import TestSuite

from evidently.tests import TestFeatureValueMin, TestFeatureValueMax, TestFeatureValueMean, \
TestFeatureValueMedian, TestFeatureValueStd,\
TestMeanInNSigmas, TestValueRange, TestNumberOfOutRangeValues, TestShareOfOutRangeValues, TestValueList, \
TestNumberOfOutListValues, TestShareOfOutListValues, TestNumberOfUniqueValues, TestMostCommonValueShare, \
TestUniqueValuesShare

In [None]:
from evidently import ColumnMapping
from datetime import datetime
from sklearn.datasets import fetch_openml

data = fetch_openml(name='adult', version=2, as_frame='auto')
df = data.frame

ref = df[~df.education.isin(['Some-college', 'HS-grad', 'Bachelors'])]
curr = df[df.education.isin(['Some-college', 'HS-grad', 'Bachelors'])]

curr.iloc[:2000, 3:5] = np.nan
curr.iloc[:2000, 12] = np.nan

suite_with_ref = TestSuite(tests=[
TestFeatureValueMin(column_name='hours-per-week'),
TestFeatureValueMax(column_name='hours-per-week'),
TestFeatureValueMean(column_name='hours-per-week'),
TestFeatureValueMedian(column_name='hours-per-week'),
TestFeatureValueStd(column_name='hours-per-week'),
TestMeanInNSigmas(column_name='hours-per-week'),
TestValueRange(column_name='hours-per-week'),
TestNumberOfOutRangeValues(column_name='hours-per-week'),
TestShareOfOutRangeValues(column_name='hours-per-week'),
TestValueList(column_name='education'),
TestNumberOfOutListValues(column_name='hours-per-week'),
TestShareOfOutListValues(column_name='education'),
TestNumberOfUniqueValues(column_name='hours-per-week'),
TestMostCommonValueShare(column_name='education'),
TestUniqueValuesShare(column_name='education')
    ])
suite_with_ref.run(reference_data=ref,
          current_data=curr, column_mapping=ColumnMapping())
suite_with_ref

In [None]:
suite = TestSuite(tests=[
TestFeatureValueMin(column_name='hours-per-week'),
TestFeatureValueMax(column_name='hours-per-week'),
TestFeatureValueMean(column_name='hours-per-week'),
TestFeatureValueMedian(column_name='hours-per-week'),
TestFeatureValueStd(column_name='hours-per-week'),
# TestMeanInNSigmas(column_name='hours-per-week'),
TestValueRange(column_name='hours-per-week', left=7, right=13),
TestNumberOfOutRangeValues(column_name='hours-per-week', left=3, right=5),
TestShareOfOutRangeValues(column_name='hours-per-week', left=3, right=5),
TestValueList(column_name='education', values=['HS-grad', 'Bachelors']),
TestNumberOfOutListValues(column_name='hours-per-week', values=['HS-grad', 'Bachelors']),
TestShareOfOutListValues(column_name='education', values=['HS-grad', 'Bachelors']),
TestNumberOfUniqueValues(column_name='hours-per-week'),
TestMostCommonValueShare(column_name='education'),
TestUniqueValuesShare(column_name='education')
    ])
suite.run(reference_data=ref,
          current_data=curr, column_mapping=ColumnMapping())
suite

# Data Drift

In [None]:
import pandas as pd
import numpy as np

from evidently.test_suite import TestSuite

from evidently.tests import TestShareOfDriftedFeatures, TestFeatureValueDrift, TestNumberOfDriftedFeatures

In [None]:
from evidently import ColumnMapping
from datetime import datetime
from sklearn.datasets import fetch_openml
from evidently.test_preset import NoTargetPerformance, DataQuality, DataStability, DataDrift

data = fetch_openml(name='adult', version=2, as_frame='auto')
df = data.frame

ref = df[~df.education.isin(['Some-college', 'HS-grad', 'Bachelors'])]
curr = df[df.education.isin(['Some-college', 'HS-grad', 'Bachelors'])]

curr['target'] = curr['education-num']
curr['preds'] = curr['education-num'].values + np.random.normal(0, 6, curr.shape[0])
ref['target'] = ref['education-num']
ref['preds'] = ref['education-num'].values + np.random.normal(0, 6, ref.shape[0])

curr.iloc[:2000, 3:5] = np.nan
curr.iloc[:2000, 12] = np.nan

suite = TestSuite(tests=[
    TestShareOfDriftedFeatures(),
    TestNumberOfDriftedFeatures(),
    TestFeatureValueDrift(column_name='education-num'),
    TestFeatureValueDrift(column_name='education')
])

suite.run(reference_data=ref,
          current_data=curr, column_mapping=ColumnMapping(target='target', prediction='preds'))
suite

# Test Preset

In [None]:
suite = TestSuite(tests=[
    NoTargetPerformance(most_important_features=["education-num"]),
])

suite.run(reference_data=ref,
          current_data=curr, column_mapping=ColumnMapping(target='target', prediction='preds'))
suite

In [None]:
suite = TestSuite(tests=[
    DataDrift(),
])

suite.run(reference_data=ref,
          current_data=curr, column_mapping=ColumnMapping(target='target', prediction='preds'))
suite

In [None]:
suite = TestSuite(tests=[
    DataStability(),
])

suite.run(reference_data=ref,
          current_data=curr, column_mapping=ColumnMapping(target='target', prediction='preds'))
suite

In [None]:
suite = TestSuite(tests=[
    DataQuality(),
])

suite.run(reference_data=ref,
          current_data=curr, column_mapping=ColumnMapping(target='target', prediction='preds'))
suite

# Regression

In [None]:
import pandas as pd
import numpy as np

from evidently.test_suite import TestSuite

from evidently.tests import TestValueMAE, TestValueRMSE, TestValueMeanError, TestValueMAPE, \
TestValueAbsMaxError, TestValueR2Score

In [None]:
from evidently import ColumnMapping
from datetime import datetime
from sklearn.datasets import fetch_openml

data = fetch_openml(name='adult', version=2, as_frame='auto')
df = data.frame

ref = df[:20000]
curr = df[20000:]

curr['target'] = curr['education-num']
curr['preds'] = curr['education-num'].values + np.random.normal(0, 6, curr.shape[0])
ref['target'] = ref['education-num']
ref['preds'] = ref['education-num'].values + np.random.normal(0, 6, ref.shape[0])

curr.iloc[:2000, 3:5] = np.nan
curr.iloc[:2000, 12] = np.nan

suite = TestSuite(tests=[
    TestValueMAE(),
    TestValueRMSE(),
    TestValueMeanError(),
    TestValueMAPE(),
    TestValueAbsMaxError(),
    TestValueR2Score()
])

suite.run(reference_data=ref,
          current_data=curr, column_mapping=ColumnMapping(target='target', prediction='preds'))
suite

In [None]:
from evidently import ColumnMapping
from datetime import datetime
from sklearn.datasets import fetch_openml

data = fetch_openml(name='adult', version=2, as_frame='auto')
df = data.frame

ref = df[:20000]
curr = df[20000:]

curr['target'] = curr['education-num']
curr['preds'] = curr['education-num'].values + np.random.normal(0, 6, curr.shape[0])
ref['target'] = ref['education-num']
ref['preds'] = ref['education-num'].values + np.random.normal(0, 6, ref.shape[0])

curr.iloc[:2000, 3:5] = np.nan
curr.iloc[:2000, 12] = np.nan

suite = TestSuite(tests=[
    TestValueMAE(),
    TestValueRMSE(),
    TestValueMeanError(),
    TestValueMAPE(),
    TestValueAbsMaxError(),
    TestValueR2Score()
])

suite.run(reference_data=None,
          current_data=curr, column_mapping=ColumnMapping(target='target', prediction='preds'))
suite

# Test Generators
If you want to create a list of tests, you can just use a list comprehension.
As an example we create tests for quantiles

In [None]:
import pandas as pd
from sklearn.datasets import fetch_openml

from evidently.tests import TestValueQuantile
from evidently.test_suite import TestSuite


data = fetch_openml(name='adult', version=2, as_frame='auto')
df = data.frame
ref = df[:20000]
curr = df[20000:]


suite = TestSuite(tests=[
    TestValueQuantile(column_name="education-num", quantile=quantile) for quantile in [0.5, 0.9, 0.99]
])

suite.run(reference_data=ref, current_data=curr)
suite

But if you want to use column names in tests generation, you can get it after the suite calculation is already launched.

In this case you can make a test generator.

In [None]:
from typing import List

import pandas as pd
from sklearn.datasets import fetch_openml

from evidently.analyzers.utils import DatasetColumns
from evidently.tests.base_test import BaseTestGenerator
from evidently.tests import TestValueQuantile
from evidently.test_suite import TestSuite


data = fetch_openml(name='adult', version=2, as_frame='auto')
df = data.frame

ref = df[:20000]
curr = df[20000:]


class TestAllQuantiles(BaseTestGenerator):
        def generate_tests(self, columns_info: DatasetColumns) -> List[TestValueQuantile]:
            # iterate over all numeric features
            return [
                TestValueQuantile(column_name=name, quantile=quantile)
                for quantile in (0.5, 0.9, 0.99)
                for name in columns_info.num_feature_names
            ]

suite = TestSuite(tests=[TestAllQuantiles()])
suite.run(reference_data=ref, current_data=curr)
suite

Alos you can use pre-defined test generators

In [None]:
import pandas as pd
from sklearn.datasets import fetch_openml

from evidently.test_suite import TestSuite
from evidently.tests.base_test import generate_tests_for_columns
from evidently.tests.base_test import generate_tests_for_all_columns
from evidently.tests.base_test import generate_tests_for_num_features
from evidently.tests.base_test import generate_tests_for_cat_features
from evidently.tests.base_test import generate_tests_for_datetime_features
from evidently.tests import TestFeatureValueMin
from evidently.tests import TestColumnNANShare
from evidently.tests import TestColumnAllUniqueValues

data = fetch_openml(name='adult', version=2, as_frame='auto')
df = data.frame

ref = df[:20000]
curr = df[20000:]

Use **generate_tests_for_columns** if you want to generate tests for a custom list of columns:

In [None]:
suite = TestSuite(tests=[generate_tests_for_columns(TestColumnNANShare, columns=["age", "workclass", "race"])])
suite.run(current_data=curr, reference_data=ref)
suite

Use **generate_tests_for_all_columns** if you want to generate tests for all columns, include **target** and **prediction**

In [None]:
suite = TestSuite(tests=[generate_tests_for_all_columns(TestColumnNANShare, per_column_parameters={
    "fnlwgt": {
        "gt": 0.7
    }
})])
suite.run(current_data=curr, reference_data=ref)
suite

And the same for numeric, category and datetime features, exclude special columns

In [None]:
suite = TestSuite(tests=[generate_tests_for_num_features(TestFeatureValueMin, for_all_parameters={
    "gt": 0.7
})])
suite.run(current_data=curr, reference_data=ref)
suite

In [None]:
suite = TestSuite(tests=[generate_tests_for_cat_features(TestColumnAllUniqueValues)])
suite.run(current_data=curr, reference_data=ref)
suite

In [None]:
suite = TestSuite(tests=[generate_tests_for_datetime_features(TestColumnAllUniqueValues)])
suite.run(current_data=curr, reference_data=ref)
suite