# Evidently Tests and Test Presets

In [None]:
import pandas as pd
import numpy as np

from datetime import datetime
from sklearn.datasets import fetch_openml

from evidently import ColumnMapping
from evidently.test_suite import TestSuite
from evidently.tests import *

from evidently.test_preset import NoTargetPerformanceTestPreset
from evidently.test_preset import DataQualityTestPreset
from evidently.test_preset import DataStabilityTestPreset
from evidently.test_preset import DataDriftTestPreset

## Prepare Datasets

In [None]:
data = fetch_openml(name='adult', version=2, as_frame='auto')
df = data.frame

#target and prediction
df['target'] = df['education-num']
df['prediction'] = df['education-num'].values + np.random.normal(0, 6, df.shape[0])

#reference data
ref = df[~df.education.isin(['Some-college', 'HS-grad', 'Bachelors'])]

#current data
curr = df[df.education.isin(['Some-college', 'HS-grad', 'Bachelors'])]

## How to run tests for a dataset?

In [None]:
dataset_tests = TestSuite(tests=[
    TestNumberOfColumns(),
    TestNumberOfRows(),
    TestNumberOfConstantColumns(),
    TestNumberOfDuplicatedColumns(),
    TestNumberOfDuplicatedRows(),
    TestColumnsType(),
    TestTargetFeaturesCorrelations(),
    TestHighlyCorrelatedFeatures(),
    TestShareOfDriftedFeatures(),
    TestNumberOfDriftedFeatures(),
])

dataset_tests.run(reference_data=ref, current_data=curr)
dataset_tests

In [None]:
dataset_tests.json()

In [None]:
dataset_tests.as_dict()

## How to run tests for individual features?

In [None]:
feature_level_tests = TestSuite(tests=[
    TestMeanInNSigmas(column_name='hours-per-week'),
    TestShareOfOutRangeValues(column_name='hours-per-week'),
    TestColumnShareOfNulls(column_name='education'),
    TestFeatureValueDrift(column_name='education'),
    TestColumnValueRegExp(column_name='education', reg_exp=r"^\s+.*"),
])

feature_level_tests.run(reference_data=ref, current_data=curr)
feature_level_tests

## How to set test parameters?

In [None]:
feature_level_tests = TestSuite(tests=[
    TestMeanInNSigmas(column_name='hours-per-week', n_sigmas=3),
    TestShareOfOutRangeValues(column_name='hours-per-week', lte=0),
    #TestNumberOfOutListValues(column_name='education', lt=0),
    TestColumnShareOfNulls(column_name='education', lt=0.2),
])

feature_level_tests.run(reference_data=ref, current_data=curr)
feature_level_tests

## How to use presets?

In [None]:
no_target_performance = TestSuite(tests=[
    NoTargetPerformanceTestPreset(columns=['education-num', 'hours-per-week']),
])

no_target_performance.run(reference_data=ref,current_data=curr)
no_target_performance

In [None]:
data_drift = TestSuite(tests=[
    DataDriftTestPreset(),
])

data_drift.run(reference_data=ref, current_data=curr)
data_drift

In [None]:
data_stability = TestSuite(tests=[
    DataStabilityTestPreset(),
])

data_stability.run(reference_data=ref, current_data=curr)
data_stability

In [None]:
data_quality = TestSuite(tests=[
    DataQualityTestPreset(),
])

data_quality.run(reference_data=ref,current_data=curr)
data_quality