# Experiment

## Install Packages

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
!pip install --quiet seaborn numpy pandas river scikit-learn lightgbm matplotlib deepchecks evidently menelaus tensorflow-data-validation

In [None]:
import warnings

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from lightgbm import LGBMClassifier
import drift_detector_with_labels, deepcheck_detectors, evidently_ai_detectors, drift, drift_detector_multivariate_hdddm, drift_detector_multivariate_md3, drift_detector_multivariate_ollindda, tensorflow_detectors

warnings.filterwarnings('ignore')
warnings.simplefilter('ignore')

# Load Dataset & EDA

In [None]:
red_wine_dataset = pd.read_csv(
    "https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv", sep=";")
white_wine_dataset = pd.read_csv(
    "https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-white.csv", sep=";")

In [None]:
red_wine_dataset['wine_type'] = "red"
white_wine_dataset['wine_type'] = "white"

In [None]:
wine_dataset = pd.concat([red_wine_dataset, white_wine_dataset], axis=0)
wine_dataset["wine_type"] = np.where(wine_dataset["wine_type"] == "red", 1, 0)
wine_dataset.head()

In [None]:
conditions = [
    (wine_dataset["quality"] <= 3),
    (wine_dataset.quality > 3) & (wine_dataset.quality <= 6),
    (wine_dataset["quality"] > 6)
]

values = [0, 1, 2]

wine_dataset['quality'] = np.select(conditions, values)

wine_dataset['quality'].value_counts()

In [None]:
wine_dataset_train, wine_dataset_test = train_test_split(wine_dataset, test_size=0.4, shuffle=True)
X_train = wine_dataset_train.loc[:, wine_dataset_train.columns != "quality"]
y_train = wine_dataset_train["quality"]
X_test = wine_dataset_test.loc[:, wine_dataset_test.columns != "quality"]
y_test = wine_dataset_test["quality"]

## Fit Light GBM

In [None]:
categorical_features_names = ["wine_type"]
features_names = X_train.columns
cat_features_index = [index for index, feature_name in enumerate(features_names) if
                      feature_name in categorical_features_names]

In [None]:
model_params = {
    'learning_rate': 0.1,
    'max_depth': None,
    'n_estimators': 500,
    'min_child_samples': 10,
    'categorical_feature': cat_features_index,
    'n_jobs': 1,
    'random_state': 1234,
}
lgbm_model = LGBMClassifier(**model_params)
lgbm_model.fit(X_train, y_train)

In [None]:
y_pred = lgbm_model.predict(X_test)
accuracy_score(y_pred, y_test)

## Seasonal Virtual Concept Drift: increase pH by 50% every two days

### Deepcheck


In [None]:
deepcheck_detectors.deepcheck_detect_seasonal_drift(data_train=wine_dataset_train,
                                                   data_to_compare=wine_dataset_test,
                                                   label_col="quality",
                                                   cat_features=categorical_features_names,
                                                   model=lgbm_model,
                                                   action="multiply",
                                                   value_drift=1.5,
                                                   column_name="pH",
                                                   test_type="feature_drift",
                                                   nb_sample=100,
                                                    frequency=3,
                                                   nb_days=10)

In [None]:
deepcheck_detectors.deepcheck_detect_seasonal_drift(data_train=wine_dataset_train,
                                                   data_to_compare=wine_dataset_test,
                                                   label_col="quality",
                                                   cat_features=categorical_features_names,
                                                   model=lgbm_model,
                                                   action="multiply",
                                                   value_drift=1.5,
                                                   column_name="pH",
                                                   test_type="prediction_drift",
                                                   nb_sample=100,
                                                    frequency=3,
                                                   nb_days=10)

In [None]:
deepcheck_detectors.deepcheck_detect_seasonal_drift(data_train=wine_dataset_train,
                                                   data_to_compare=wine_dataset_test,
                                                   label_col="quality",
                                                   cat_features=categorical_features_names,
                                                   model=lgbm_model,
                                                   action="multiply",
                                                   value_drift=1.5,
                                                   column_name="pH",
                                                   test_type="dataset_drift",
                                                   nb_sample=100,
                                                    frequency=3,
                                                   nb_days=10)

### Evidently AI


In [None]:
evidently_ai_detectors.evidently_ai_detect_seasonal_drift(data_train=wine_dataset_train,
                                                         data_to_compare=wine_dataset_test,
                                                         label_col="quality",
                                                         cat_features=categorical_features_names,
                                                         action="multiply",
                                                         value_drift=1.5,
                                                         column_name="pH",
                                                         frequency=3,
                                                         nb_sample=100,
                                                         nb_days=10)

### Tensorflow

In [None]:
tensorflow_detectors.tensorflow_detect_seasonal_drift(data_train=wine_dataset_train,
                                                     data_to_compare=wine_dataset_test,
                                                     label_col="quality",
                                                     value_drift=1.5,
                                                     column_name="pH",
                                                      action="multiply",
                                                     frequency=3,
                                                     nb_sample=100,
                                                     nb_days=10)

### Drift Detector with Labels
#### EDDM

In [None]:
drift_detector_with_labels.drift_detector_labels_seasonal_drift(data_train=wine_dataset_train,
                                                               data_to_compare=wine_dataset_test,
                                                               column_name="pH",
                                                               label_col="quality",
                                                               model=lgbm_model,
                                                               frequency=3,
                                                               value_drift=1.5,
                                                               action="multiply",
                                                               test_name="EDDM",
                                                               nb_sample=100,
                                                               nb_days=10)

In [None]:
drift_detector_with_labels.drift_detector_labels_seasonal_drift(data_train=wine_dataset_train,
                                                               data_to_compare=wine_dataset_test,
                                                               column_name="pH",
                                                               label_col="quality",
                                                               model=lgbm_model,
                                                               frequency=3,
                                                               value_drift=1.5,
                                                               action="multiply",
                                                               test_name="HDDM_W",
                                                               nb_sample=100,
                                                               nb_days=10)

In [None]:
drift_detector_with_labels.drift_detector_labels_seasonal_drift(data_train=wine_dataset_train,
                                                               data_to_compare=wine_dataset_test,
                                                               column_name="pH",
                                                               label_col="quality",
                                                               model=lgbm_model,
                                                               frequency=3,
                                                               value_drift=1.5,
                                                               action="multiply",
                                                               test_name="ADWIN",
                                                               nb_sample=100,
                                                               nb_days=10)

### Drift Detector Multivariate: HDDDM & MD3
#### HDDDM

In [None]:
X_train = wine_dataset_train.loc[:, wine_dataset_train.columns != "quality"]
X_corrupted = wine_dataset_test.loc[:, wine_dataset_test.columns != "quality"]
drift_detector_multivariate_hdddm.hdddm_detect_seasonal_drift(data_train=X_train,
                                                             data_to_compare=X_corrupted,
                                                             column_name="pH",
                                                             value_drift=1.5,
                                                             action="multiply",
                                                             nb_sample=100,
                                                             frequency=3,
                                                             nb_days=10)

#### MD3

In [None]:
drift_detector_multivariate_md3.md3_seasonal_drift(data_train=wine_dataset_train,
                                                   data_to_compare=wine_dataset_test,
                                                   label_col="quality",
                                                   column_name="pH",
                                                   value_drift=1.5,
                                                   action="multiply",
                                                   nb_sample=100,
                                                   frequency=3,
                                                   nb_days=10)

#### OLINDDA

In [None]:
drift_detector_multivariate_ollindda.olindda_seasonal_drift(data_train=X_train,
                                                   data_to_compare=X_corrupted,
                                                            n_clusters=6,
                                                   column_name="pH",
                                                   value_drift=1.5,
                                                   action="multiply",
                                                   nb_sample=100,
                                                   frequency=3,
                                                   nb_days=10)