# Experiment Virtual Concept Drifts

## Install Packages

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
!pip install --quiet seaborn numpy pandas river scikit-learn lightgbm matplotlib deepchecks evidently menelaus tensorflow-data-validation

In [None]:
import warnings

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from lightgbm import LGBMClassifier
import drift_detector_with_labels, deepcheck_detectors, evidently_ai_detectors, drift, drift_detector_multivariate_hdddm, drift_detector_multivariate_md3, drift_detector_multivariate_ollindda, tensorflow_detectors

warnings.filterwarnings('ignore')
warnings.simplefilter('ignore')

# Load Dataset & EDA

In [None]:
red_wine_dataset = pd.read_csv(
    "https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv", sep=";")
white_wine_dataset = pd.read_csv(
    "https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-white.csv", sep=";")

In [None]:
red_wine_dataset['wine_type'] = "red"
white_wine_dataset['wine_type'] = "white"

In [None]:
wine_dataset = pd.concat([red_wine_dataset, white_wine_dataset], axis=0)
wine_dataset["wine_type"] = np.where(wine_dataset["wine_type"] == "red", 1, 0)
wine_dataset.head()

In [None]:
conditions = [
    (wine_dataset["quality"] <= 3),
    (wine_dataset.quality > 3) & (wine_dataset.quality <= 6),
    (wine_dataset["quality"] > 6)
]

values = [0, 1, 2]

wine_dataset['quality'] = np.select(conditions, values)

wine_dataset['quality'].value_counts()

In [None]:
wine_dataset_train, wine_dataset_test = train_test_split(wine_dataset, test_size=0.4, shuffle=True)
X_train = wine_dataset_train.loc[:, wine_dataset_train.columns != "quality"]
y_train = wine_dataset_train["quality"]
X_test = wine_dataset_test.loc[:, wine_dataset_test.columns != "quality"]
y_test = wine_dataset_test["quality"]

## Fit Light GBM

In [None]:
categorical_features_names = ["wine_type"]
features_names = X_train.columns
cat_features_index = [index for index, feature_name in enumerate(features_names) if
                      feature_name in categorical_features_names]

In [None]:
model_params = {
    'learning_rate': 0.1,
    'max_depth': None,
    'n_estimators': 500,
    'min_child_samples': 10,
    'categorical_feature': cat_features_index,
    'n_jobs': 1,
    'random_state': 1234,
}
lgbm_model = LGBMClassifier(**model_params)
lgbm_model.fit(X_train, y_train)

In [None]:
y_pred = lgbm_model.predict(X_test)
accuracy_score(y_pred, y_test)

## Virtual Concept Drift: alcohol increase by 2

### Induce the drift

In [None]:
wine_dataset_corrupted_alcohol = drift.drift_generator_univariate_increase(data=wine_dataset_test,
                                                                           column_name='alcohol',
                                                                           value=2)

### Deepcheck

In [None]:
deepcheck_detectors.deepcheck_detect_drift(data_train=wine_dataset_train,
                                           data_to_compare=wine_dataset_corrupted_alcohol,
                                           label_col="quality",
                                           cat_features=categorical_features_names,
                                           model=lgbm_model,
                                           test_type="feature_drift")

In [None]:
deepcheck_detectors.deepcheck_detect_drift(data_train=wine_dataset_train,
                                           data_to_compare=wine_dataset_corrupted_alcohol,
                                           label_col="quality",
                                           cat_features=categorical_features_names,
                                           model=lgbm_model,
                                           test_type="prediction_drift")

In [None]:
deepcheck_detectors.deepcheck_detect_drift(data_train=wine_dataset_train,
                                           data_to_compare=wine_dataset_corrupted_alcohol,
                                           label_col="quality",
                                           cat_features=categorical_features_names,
                                           model=lgbm_model,
                                           test_type="dataset_drift")

### Evidently AI

In [None]:
evidently_ai_detectors.evidently_ai_detect_drift(data_train=wine_dataset_train,
                                                 data_to_compare=wine_dataset_corrupted_alcohol,
                                                 label_col="quality",
                                                 cat_features=categorical_features_names)

### Tensorflow

In [None]:
tensorflow_detectors.tensorflow_detect_drift(data_train=wine_dataset_train,
                                             data_to_compare=wine_dataset_corrupted_alcohol,
                                             label_col="quality")

### Drift Detectors with Labels

In [None]:
data_wine_alcohol = pd.concat([wine_dataset_train, wine_dataset_corrupted_alcohol], axis=0)

#### EDDM

In [None]:
drift_detector_with_labels.drift_detector_with_labels_test(data_to_compare=data_wine_alcohol,
                                                           label_col="quality",
                                                           model=lgbm_model,
                                                           test_name="EDDM")

#### ADWIN

In [None]:
drift_detector_with_labels.drift_detector_with_labels_test(data_to_compare=data_wine_alcohol,
                                                           label_col="quality",
                                                           model=lgbm_model,
                                                           test_name="ADWIN")

#### HDDM_W

In [None]:
drift_detector_with_labels.drift_detector_with_labels_test(data_to_compare=data_wine_alcohol,
                                                           label_col="quality",
                                                           model=lgbm_model,
                                                           test_name="HDDM_W")

### Drift Detector Multivariate: HDDDM & MD3
#### HDDDM

In [None]:
X_train = wine_dataset_train.loc[:, wine_dataset_train.columns != "quality"]
X_corrupted = wine_dataset_corrupted_alcohol.loc[:, wine_dataset_corrupted_alcohol.columns != "quality"]
drift_detector_multivariate_hdddm.hdddm_detect_drift(data_train=X_train,
                                                     data_to_compare=X_corrupted,
                                                     gamma_level=0.05)

#### MD3

In [None]:
drift_detector_multivariate_md3.md3_detect_drift(data_train=wine_dataset_train,
                                                 data_to_compare=wine_dataset_corrupted_alcohol,
                                                 label_col="quality")

#### OLINDDA

In [None]:
drift_detector_multivariate_ollindda.olindda_detect_drift(x_train_data=X_train,
                                                          x_test_data=X_corrupted,
                                                          n_clusters=6)

## Virtual Concept Drift: decrease the total sulfur dioxide variance by 100

### Induce the drift

In [None]:
sigma = (wine_dataset_test["total sulfur dioxide"].var() - 100)
mu = wine_dataset_test["total sulfur dioxide"].mean()
wine_dataset_corrupted_dioxide = drift.drift_generator_univariate_change_to_normal(data=wine_dataset_test,
                                                                                   column_name="total sulfur dioxide",
                                                                                   seed=202,
                                                                                   mu=mu,
                                                                                   sigma=sigma)

### Deepcheck

In [None]:
deepcheck_detectors.deepcheck_detect_drift(data_train=wine_dataset_train,
                                           data_to_compare=wine_dataset_corrupted_dioxide,
                                           label_col="quality",
                                           cat_features=categorical_features_names,
                                           model=lgbm_model,
                                           test_type="feature_drift")

In [None]:
deepcheck_detectors.deepcheck_detect_drift(data_train=wine_dataset_train,
                                           data_to_compare=wine_dataset_corrupted_dioxide,
                                           label_col="quality",
                                           cat_features=categorical_features_names,
                                           model=lgbm_model,
                                           test_type="prediction_drift")

In [None]:
deepcheck_detectors.deepcheck_detect_drift(data_train=wine_dataset_train,
                                           data_to_compare=wine_dataset_corrupted_dioxide,
                                           label_col="quality",
                                           cat_features=categorical_features_names,
                                           model=lgbm_model,
                                           test_type="dataset_drift")

### Evidently AI


In [None]:
evidently_ai_detectors.evidently_ai_detect_drift(data_train=wine_dataset_train,
                                                 data_to_compare=wine_dataset_corrupted_dioxide,
                                                 label_col="quality",
                                                 cat_features=categorical_features_names)

### Tensorflow


In [None]:
tensorflow_detectors.tensorflow_detect_drift(data_train=wine_dataset_train,
                                             data_to_compare=wine_dataset_corrupted_dioxide,
                                             label_col="quality")

### Drift Detectors with Labels

In [None]:
data_wine_dioxide = pd.concat([wine_dataset_train, wine_dataset_corrupted_dioxide], axis=0)


#### EDDM


In [None]:
drift_detector_with_labels.drift_detector_with_labels_test(data_to_compare=data_wine_dioxide,
                                                           label_col="quality",
                                                           model=lgbm_model,
                                                           test_name="EDDM")

#### ADWIN


In [None]:
drift_detector_with_labels.drift_detector_with_labels_test(data_to_compare=data_wine_dioxide,
                                                           label_col="quality",
                                                           model=lgbm_model,
                                                           test_name="ADWIN")

#### HDDM_W


In [None]:
drift_detector_with_labels.drift_detector_with_labels_test(data_to_compare=data_wine_dioxide,
                                                           label_col="quality",
                                                           model=lgbm_model,
                                                           test_name="HDDM_W")

### Drift Detector Multivariate: HDDDM & MD3


#### HDDDM


In [None]:
X_train = wine_dataset_train.loc[:, wine_dataset_train.columns != "quality"]
X_corrupted = wine_dataset_corrupted_dioxide.loc[:, wine_dataset_corrupted_dioxide.columns != "quality"]
drift_detector_multivariate_hdddm.hdddm_detect_drift(data_train=X_train,
                                                     data_to_compare=X_corrupted,
                                                     gamma_level=0.05)

#### MD3


In [None]:
drift_detector_multivariate_md3.md3_detect_drift(data_train=wine_dataset_train,
                                                 data_to_compare=wine_dataset_corrupted_dioxide,
                                                 label_col="quality")

#### OLINDDA

In [None]:
X_corrupted = X_corrupted.reset_index(drop=True)

In [None]:
drift_detector_multivariate_ollindda.olindda_detect_drift(x_train_data=X_train,
                                                          x_test_data=X_corrupted,
                                                          n_clusters=6)

## Virtual Concept Drift: swap values of the wine_type column

### Induce the drift

In [None]:
wine_dataset_corrupted_wine_type = drift.drift_generator_univariate_categorical_change(data=wine_dataset_test,
                                                                                       column_name='wine_type',
                                                                                       value1="1", value2="0")

### Deepcheck


In [None]:
deepcheck_detectors.deepcheck_detect_drift(data_train=wine_dataset_train,
                                           data_to_compare=wine_dataset_corrupted_wine_type,
                                           label_col="quality",
                                           cat_features=categorical_features_names,
                                           model=lgbm_model,
                                           test_type="feature_drift")

In [None]:
wine_dataset_corrupted_wine_type['wine_type'] = wine_dataset_corrupted_wine_type['wine_type'].astype(int)
deepcheck_detectors.deepcheck_detect_drift(data_train=wine_dataset_train,
                                           data_to_compare=wine_dataset_corrupted_wine_type,
                                           label_col="quality",
                                           cat_features=categorical_features_names,
                                           model=lgbm_model,
                                           test_type="prediction_drift")

In [None]:
deepcheck_detectors.deepcheck_detect_drift(data_train=wine_dataset_train,
                                           data_to_compare=wine_dataset_corrupted_wine_type,
                                           label_col="quality",
                                           cat_features=categorical_features_names,
                                           model=lgbm_model,
                                           test_type="dataset_drift")

### Evidently AI

In [None]:
evidently_ai_detectors.evidently_ai_detect_drift(data_train=wine_dataset_train,
                                                 data_to_compare=wine_dataset_corrupted_wine_type,
                                                 label_col="quality",
                                                 cat_features=categorical_features_names)

### Tensorflow


In [None]:
tensorflow_detectors.tensorflow_detect_drift(data_train=wine_dataset_train,
                                             data_to_compare=wine_dataset_corrupted_wine_type,
                                             label_col="quality")

### Drift Detectors with Labels


In [None]:
data_wine_wine_type = pd.concat([wine_dataset_train, wine_dataset_corrupted_wine_type], axis=0)


#### EDDM


In [None]:
drift_detector_with_labels.drift_detector_with_labels_test(data_to_compare=data_wine_wine_type,
                                                           label_col="quality",
                                                           model=lgbm_model,
                                                           test_name="EDDM")

#### ADWIN


In [None]:
drift_detector_with_labels.drift_detector_with_labels_test(data_to_compare=data_wine_wine_type,
                                                           label_col="quality",
                                                           model=lgbm_model,
                                                           test_name="ADWIN")

#### HDDM_W


In [None]:
drift_detector_with_labels.drift_detector_with_labels_test(data_to_compare=data_wine_wine_type,
                                                           label_col="quality",
                                                           model=lgbm_model,
                                                           test_name="HDDM_W")

### Drift Detector Multivariate: HDDDM & MD3
#### HDDDM


In [None]:
X_train = wine_dataset_train.loc[:, wine_dataset_train.columns != "quality"]
X_corrupted = wine_dataset_corrupted_wine_type.loc[:, wine_dataset_corrupted_wine_type.columns != "quality"]
drift_detector_multivariate_hdddm.hdddm_detect_drift(data_train=X_train,
                                                     data_to_compare=X_corrupted,
                                                     gamma_level=0.05)

#### MD3

In [None]:
drift_detector_multivariate_md3.md3_detect_drift(data_train=wine_dataset_train,
                                                 data_to_compare=wine_dataset_corrupted_wine_type,
                                                 label_col="quality")

#### OLINDDA

In [None]:
drift_detector_multivariate_ollindda.olindda_detect_drift(x_train_data=X_train,
                                                          x_test_data=X_corrupted,
                                                          n_clusters=6)