# Experiment

## Install Packages

In [1]:
% load_ext autoreload
% autoreload 2

In [2]:
!pip install seaborn
!pip install numpy
!pip install pandas
!pip install river
!pip install scikit-learn
!pip install lightgbm
!pip install matplotlib



In [96]:
import warnings

import pandas as pd
import numpy as np
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from lightgbm import LGBMClassifier
from matplotlib import pyplot as plt
import drift_detector_with_labels, deepcheck_detectors, evidently_ai_detectors, drift, drift_detector_multivariate_hdddm, drift_detector_multivariate_md3, drift_detector_multivariate_ollindda,tensorflow_detectors

warnings.filterwarnings('ignore')
warnings.simplefilter('ignore')

AttributeError: module 'pyparsing' has no attribute 'downcaseTokens'

# Load Dataset & EDA

In [73]:
red_wine_dataset = pd.read_csv(
    "https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv", sep=";")
white_wine_dataset = pd.read_csv(
    "https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-white.csv", sep=";")

In [74]:
red_wine_dataset['wine_type'] = "red"
white_wine_dataset['wine_type'] = "white"

In [75]:
wine_dataset = pd.concat([red_wine_dataset, white_wine_dataset], axis=0)
wine_dataset["wine_type"] = np.where(wine_dataset["wine_type"] == "red", 1, 0)
wine_dataset.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality,wine_type
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5,1
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5,1
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5,1
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6,1
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5,1


In [76]:
conditions = [
    (wine_dataset["quality"] <= 3),
    (wine_dataset.quality > 3) & (wine_dataset.quality <= 6),
    (wine_dataset["quality"] > 6)
]

values = [0, 1, 2]

wine_dataset['quality'] = np.select(conditions, values)

wine_dataset['quality'].value_counts()

1    5190
2    1277
0      30
Name: quality, dtype: int64

In [77]:
wine_dataset_train, wine_dataset_test = train_test_split(wine_dataset, test_size=0.4, shuffle=True)
X_train = wine_dataset_train.loc[:, wine_dataset_train.columns != "quality"]
y_train = wine_dataset_train["quality"]
X_test = wine_dataset_test.loc[:, wine_dataset_test.columns != "quality"]
y_test = wine_dataset_test["quality"]

## Fit Light GBM

In [84]:
categorical_features_names = ["wine_type"]
features_names = X_train.columns
cat_features_index = [index for index, feature_name in enumerate(features_names) if
                      feature_name in categorical_features_names]

In [85]:
model_params = {
    'learning_rate': 0.1,
    'max_depth': None,
    'n_estimators': 500,
    'min_child_samples': 10,
    'categorical_feature': cat_features_index,
    'n_jobs': 1,
    'random_state': 1234,
}
lgbm_model = LGBMClassifier(**model_params)
lgbm_model.fit(X_train, y_train)

In [86]:
y_pred = lgbm_model.predict(X_test)
accuracy_score(y_pred, y_test)

0.8741823778376299

## Real Concept Drift

In [80]:
wine_dataset_corrupted_concept_drift = drift.drift_generator_concept_drift(data=wine_dataset_train,
                                                                           label_col="quality",
                                                                           label_value=0,
                                                                           column_name="alcohol",
                                                                           value=12,
                                                                           action="greater")

In [81]:
wine_dataset_corrupted_concept_drift["alcohol"].corr(wine_dataset_corrupted_concept_drift["quality"])

-0.4144500059465872

In [82]:
wine_dataset_test["alcohol"].corr(wine_dataset_test["quality"])

0.3878129756409628

### Deepcheck

In [91]:
deepcheck_detectors.deepcheck_detect_drift(data_train=wine_dataset_train,
                                           data_to_compare=wine_dataset_corrupted_concept_drift,
                                           label_col="quality",
                                           cat_features=categorical_features_names,
                                           model=lgbm_model,
                                           test_type="feature_drift")

In [92]:
deepcheck_detectors.deepcheck_detect_drift(data_train=wine_dataset_train,
                                           data_to_compare=wine_dataset_corrupted_concept_drift,
                                           label_col="quality",
                                           cat_features=categorical_features_names,
                                           model=lgbm_model,
                                           test_type="prediction_drift")

No drift detected


In [93]:
deepcheck_detectors.deepcheck_detect_drift(data_train=wine_dataset_train,
                                           data_to_compare=wine_dataset_corrupted_concept_drift,
                                           label_col="quality",
                                           cat_features=categorical_features_names,
                                           model=lgbm_model,
                                           test_type="dataset_drift")

Calculating permutation feature importance. Expected to finish in 1 seconds
No drift detected


### Evidently AI

In [90]:
evidently_ai_detectors.evidently_ai_detect_drift(data_train=wine_dataset_train,
                                                 data_to_compare=wine_dataset_corrupted_concept_drift,
                                                 label_col="quality",
                                                 cat_features=categorical_features_names)

Alarm quality


### Tensorflow

In [103]:
wine_dataset_corrupted_concept_drift = wine_dataset_corrupted_concept_drift.reset_index(drop=True)
wine_dataset_train = wine_dataset_train.reset_index(drop=True)

### Drift Detector with Labels

In [106]:
data_wine = pd.concat([wine_dataset_train, wine_dataset_corrupted_concept_drift], axis=0)

In [107]:
drift_detector_with_labels.drift_detector_with_labels_test(data_to_compare=data_wine,
                                                           label_col="quality",
                                                           model=lgbm_model,
                                                           test_name="EDDM")

Change detected at index 4092
Change detected at index 4332
Change detected at index 4576


In [108]:
drift_detector_with_labels.drift_detector_with_labels_test(data_to_compare=data_wine,
                                                           label_col="quality",
                                                           model=lgbm_model,
                                                           test_name="HDDM_W")



In [109]:
drift_detector_with_labels.drift_detector_with_labels_test(data_to_compare=data_wine,
                                                           label_col="quality",
                                                           model=lgbm_model,
                                                           test_name="ADWIN")

Change detected at index 4031
Change detected at index 4063
Change detected at index 4095
Change detected at index 4191
Change detected at index 4223
Change detected at index 4479


## Drift Detector Multivariate: HDDDM & MD3

#### HDDDM

In [None]:
X_train = wine_dataset_train.loc[:, wine_dataset_train.columns != "quality"]
y_train = wine_dataset_train["quality"]

In [110]:
X_corrupted = wine_dataset_corrupted_concept_drift.loc[:, wine_dataset_corrupted_concept_drift.columns != "quality"]
y_corrupted = wine_dataset_corrupted_concept_drift["quality"]

In [116]:
drift_detector_multivariate_hdddm.hdddm_detect_drift(data_train=X_train,
                                                     data_to_compare=X_corrupted,
                                                     gamma_level=0.05)

None


#### MD3

In [115]:
drift_detector_multivariate_md3.md3_detect_drift(data_train=wine_dataset_train,
                                                 data_to_compare=wine_dataset_corrupted_concept_drift,
                                                 label_col="quality")

## Gradual concept drift: alcohol increase by 5

### Deepcheck

In [117]:
deepcheck_detectors.deepcheck_detect_gradual_drift(data_train=wine_dataset_train,
                                           data_to_compare=wine_dataset_corrupted_concept_drift,
                                           label_col="quality",
                                           cat_features=categorical_features_names,
                                           model=lgbm_model,
                                           test_type="feature_drift")

In [118]:
deepcheck_detectors.deepcheck_detect_drift(data_train=wine_dataset_train,
                                           data_to_compare=wine_dataset_corrupted_concept_drift,
                                           label_col="quality",
                                           cat_features=categorical_features_names,
                                           model=lgbm_model,
                                           test_type="prediction_drift")

No drift detected


In [119]:
deepcheck_detectors.deepcheck_detect_drift(data_train=wine_dataset_train,
                                           data_to_compare=wine_dataset_corrupted_concept_drift,
                                           label_col="quality",
                                           cat_features=categorical_features_names,
                                           model=lgbm_model,
                                           test_type="dataset_drift")

Calculating permutation feature importance. Expected to finish in 2 seconds
No drift detected






### Evidently AI
evidently_ai_detectors.evidently_ai_detect_drift(data_train=wine_dataset_train,
                                                 data_to_compare=wine_dataset_corrupted_concept_drift,
                                                 label_col="quality",
                                                 cat_features=categorical_features_names)
### Tensorflow
wine_dataset_corrupted_concept_drift = wine_dataset_corrupted_concept_drift.reset_index(drop=True)
wine_dataset_train = wine_dataset_train.reset_index(drop=True)
### Drift Detector with Labels
data_wine = pd.concat([wine_dataset_train, wine_dataset_corrupted_concept_drift], axis=0)
drift_detector_with_labels.drift_detector_with_labels_test(data_to_compare=data_wine,
                                                           label_col="quality",
                                                           model=lgbm_model,
                                                           test_name="EDDM")
drift_detector_with_labels.drift_detector_with_labels_test(data_to_compare=data_wine,
                                                           label_col="quality",
                                                           model=lgbm_model,
                                                           test_name="HDDM_W")
drift_detector_with_labels.drift_detector_with_labels_test(data_to_compare=data_wine,
                                                           label_col="quality",
                                                           model=lgbm_model,
                                                           test_name="ADWIN")
## Drift Detector Multivariate: HDDDM & MD3
#### HDDDM
X_train = wine_dataset_train.loc[:, wine_dataset_train.columns != "quality"]
y_train = wine_dataset_train["quality"]
X_corrupted = wine_dataset_corrupted_concept_drift.loc[:, wine_dataset_corrupted_concept_drift.columns != "quality"]
y_corrupted = wine_dataset_corrupted_concept_drift["quality"]
drift_detector_multivariate_hdddm.hdddm_detect_drift(data_train=X_train,
                                                     data_to_compare=X_corrupted,
                                                     gamma_level=0.05)
#### MD3
drift_detector_multivariate_md3.md3_detect_drift(data_train=wine_dataset_train,


In [56]:
drift_detector_labels_gradual_drift(wine_dataset_train, wine_dataset_test, "alcohol", "quality",
                                    2, "increase", 100, 5)

Day 0 :
Drift Detected
Day 1 :
Day 2 :
Drift Detected
Day 3 :
Drift Detected
Day 4 :
Drift Detected
Day 5 :


In [63]:
drift_detector_multivariate_md3.md3_seasonal_drift(wine_dataset_train, wine_dataset_test, "alcohol", "quality",
                                                   2, 3, "increase", 100, 10)

Day 0 :
Alarm, drift Detected
Day 1 :
Alarm, drift Detected
Day 2 :
Day 3 :
Alarm, drift Detected
Day 4 :
Alarm, drift Detected
Day 5 :
Day 6 :
Alarm, drift Detected
Day 7 :
Alarm, drift Detected
Day 8 :
Day 9 :
Alarm, drift Detected
Day 10 :
Alarm, drift Detected


In [71]:
drift_detector_multivariate_ollindda.olindda_gradual_drift(data_train=wine_dataset_train,
                                                           data_to_compare=wine_dataset_test,
                                                           column_name="alcohol",
                                                           n_clusters=6,
                                                           value_drift=2,
                                                           action="increase",
                                                           nb_sample=100,
                                                           nb_days=5)

Day 0 :
    fixed acidity  volatile acidity  citric acid  residual sugar  chlorides  \
0             7.4              0.50         0.47            2.00      0.086   
1             8.2              1.00         0.09            2.30      0.065   
2             6.9              0.24         0.39            1.30      0.063   
3             6.4              0.34         0.20           14.90      0.060   
4             6.3              0.15         0.30            1.40      0.022   
..            ...               ...          ...             ...        ...   
95            6.9              0.25         0.27            9.05      0.039   
96            7.2              0.21         0.34           11.90      0.043   
97            7.1              0.31         0.25           11.20      0.048   
98            6.6              0.29         0.29            1.80      0.036   
99            7.1              0.24         0.41           17.80      0.046   

    free sulfur dioxide  total sulfur dioxi

AttributeError: 'NoneType' object has no attribute 'split'