In [1]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import RidgeClassifier
from sklearn import metrics
from sklearn.pipeline import make_pipeline

from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split

In [5]:
import alibi

In [9]:
from alibi_detect.cd import ChiSquareDrift, TabularDrift
from alibi_detect.utils.saving import save_detector, load_detector

In [10]:
# Grab the data
wine_data = load_wine()
feature_names = wine_data.feature_names
X, y = wine_data.data, wine_data.target 

# Make a 50/50 reference/test split
X_ref, X_test, y_ref, y_test = train_test_split(X, y,
                                                test_size=0.50,
                                                random_state=42)

In [14]:
# Initialise the detector
cd = TabularDrift(p_val=.05, x_ref=X_ref)



In [15]:
# Check for drift 
preds = cd.predict(X_test)
labels = ['No', 'Yes']
print('Drift: {}'.format(labels[preds['data']['is_drift']]))

Drift: No


In [16]:
# Check for drift - X_test_cal_error is simulated calibration error test set
X_test_cal_error = 1.1*X_test
preds = cd.predict(X_test_cal_error)
labels = ['No', 'Yes']
print('Drift: {}'.format(labels[preds['data']['is_drift']]))

Drift: Yes


In [17]:
# check for drift at the level of features
fpreds = cd.predict(X_test+4, drift_type='feature')

In [18]:
results = []
for f in range(cd.n_features):
    
    stat = 'K-S' #all numeric features for this dataset
    fname = feature_names[f]
    is_drift = fpreds['data']['is_drift'][f]
    stat_val, p_val = fpreds['data']['distance'][f], fpreds['data']['p_val'][f]
    
    results.append(
        {
            'feature': fname,
            'statistic': 'K-S',
            'statisticValue': float(stat_val),
            'driftResult': labels[is_drift],
            'pValue': float(p_val)
        }
    )

In [26]:
import json
print(json.dumps(results, indent=4, sort_keys=True))

[
    {
        "driftResult": "Yes",
        "feature": "alcohol",
        "pValue": 0.0,
        "statistic": "K-S",
        "statisticValue": 1.0
    },
    {
        "driftResult": "Yes",
        "feature": "malic_acid",
        "pValue": 0.0,
        "statistic": "K-S",
        "statisticValue": 0.9775280952453613
    },
    {
        "driftResult": "Yes",
        "feature": "ash",
        "pValue": 0.0,
        "statistic": "K-S",
        "statisticValue": 1.0
    },
    {
        "driftResult": "Yes",
        "feature": "alcalinity_of_ash",
        "pValue": 8.490475522648921e-08,
        "statistic": "K-S",
        "statisticValue": 0.42696627974510193
    },
    {
        "driftResult": "No",
        "feature": "magnesium",
        "pValue": 0.070224329829216,
        "statistic": "K-S",
        "statisticValue": 0.1910112351179123
    },
    {
        "driftResult": "Yes",
        "feature": "total_phenols",
        "pValue": 0.0,
        "statistic": "K-S",
        "statisti