# Statistical Tests

In [None]:
import pandas as pd
import numpy as np

from scipy.stats import anderson_ksamp
from sklearn.datasets import fetch_openml

from evidently import ColumnMapping
from evidently.calculations.stattests import StatTest
from evidently.dashboard import Dashboard
from evidently.dashboard.tabs import DataDriftTab, CatTargetDriftTab
from evidently.options import DataDriftOptions

In [None]:
import warnings
warnings.filterwarnings('ignore')
warnings.simplefilter('ignore')

## Adult Dataset

In [None]:
data = fetch_openml(name='adult', version=2, as_frame='auto')
df = data.frame
df.head()

Let's add two features to illustrate, that we choose stat test depending not just on its type, but also on a number of unique values.

Also, we will keep in mind that these features are absolutely random, so we don't expect any drift here.

In [None]:
df['num_feature_with_3_values'] = np.random.choice(3, df.shape[0])
df['num_feature_with_2_values'] = np.random.choice(2, df.shape[0])

In [None]:
df.head()

# Automatic Statistical Test Selection

In [None]:
numerical_features = ['age', 'fnlwgt', 'education-num', 'capital-gain', 'capital-loss', 
                      'hours-per-week', 'num_feature_with_3_values', 'num_feature_with_2_values']

categorical_features = ['workclass', 'education', 'marital-status', 'occupation', 'relationship', 
                        'race', 'sex', 'native-country']

column_mapping = ColumnMapping(numerical_features=numerical_features, 
                               categorical_features=categorical_features,
                               task='classification', target='class')

## Small dataset with <= 1k observations

### no drift
We created 2 small random samples, so we do not expect to see a lot of drift here.

In [None]:
data_drift_dashboard = Dashboard(tabs=[DataDriftTab()])
data_drift_dashboard.calculate(df.sample(50, random_state=0), 
                               df.sample(50, random_state=10), column_mapping=column_mapping)
data_drift_dashboard.show()

When you're working with small datasets, it's more likely that you'll get different distributions by chance. But it can also be concluded that statistical tests are quite sensitive.

### data shifted
We split data in 2 samples by relationship status, so we do expect to see some drift here.

In [None]:
data_drift_dashboard = Dashboard(tabs=[DataDriftTab()])
data_drift_dashboard.calculate(df[df.relationship.isin(['Husband', 'Wife'])].sample(1000, random_state=0), 
                               df[~df.relationship.isin(['Husband', 'Wife'])].sample(1000, random_state=10), column_mapping=column_mapping)
data_drift_dashboard.show()

## Big dataset with > 1k observations

### no drift
We created 2 random samples, so we do not expect to see a lot of drift here.

In [None]:
data_drift_dashboard = Dashboard(tabs=[DataDriftTab()])
data_drift_dashboard.calculate(df.sample(30000, random_state=0), 
                               df.sample(30000, random_state=10), column_mapping=column_mapping)
data_drift_dashboard.show()

### data shifted
We split data in 2 samples by relationship status, so we do expect to see some drift here.

In [None]:
data_drift_dashboard = Dashboard(tabs=[DataDriftTab()])
data_drift_dashboard.calculate(df[df.relationship.isin(['Husband', 'Wife'])].sample(30000, random_state=0, replace=True), 
                               df[~df.relationship.isin(['Husband', 'Wife'])].sample(30000, random_state=10, replace=True), 
                               column_mapping=column_mapping)
data_drift_dashboard.show()

# Statistical Test for All, Numerical and Categorical Features

In [None]:
stat_test_option = DataDriftOptions(all_features_stattest='psi')

In [None]:
stat_test_option = DataDriftOptions(per_feature_stattest={'class':'psi', 'age':'ks'})

In [None]:
data_drift_dashboard = Dashboard(tabs=[DataDriftTab()], options=[stat_test_option])
data_drift_dashboard.calculate(df.sample(50, random_state=0), 
                               df.sample(50, random_state=10), column_mapping=column_mapping)
data_drift_dashboard.show()

# Custom Statistical Test 

In [None]:
def _anderson_stat_test(reference_data: pd.Series, current_data: pd.Series, _feature_type: str, threshold: float):
    p_value = anderson_ksamp(np.array([reference_data, current_data]))[2]
    return p_value, p_value < threshold

anderson_stat_test = StatTest(
    name="anderson",
    display_name="Anderson test (p_value)",
    func=_anderson_stat_test,
    allowed_feature_types=["num"]
)

In [None]:
stat_test_option = DataDriftOptions(num_features_stattest=anderson_stat_test)

In [None]:
data_drift_dashboard = Dashboard(tabs=[DataDriftTab()], options=[stat_test_option])
data_drift_dashboard.calculate(df.sample(50, random_state=0), 
                               df.sample(50, random_state=10), column_mapping=column_mapping)
data_drift_dashboard.show()

# Statistical Test for Target

In [None]:
target_drift_dashboard = Dashboard(tabs=[CatTargetDriftTab()])
target_drift_dashboard.calculate(df.sample(50, random_state=0), 
                               df.sample(50, random_state=10), column_mapping=column_mapping)
target_drift_dashboard.show()