In [2]:
import pandas as pd
import numpy as np
from sklearn.datasets import fetch_openml
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

from evidently.dashboard import Dashboard
from evidently.pipeline.column_mapping import ColumnMapping
from evidently.dashboard.tabs import ProbClassificationPerformanceTab, DataDriftTab

import warnings
warnings.filterwarnings('ignore')
warnings.simplefilter('ignore')

# Load data

Using the wine_quality dataset, we will solve the binary classification task. We will predict which sample of wine is good using its physicochemical characteristics. There are grades for each wine in the quality column. We will treat the wine sample with a grade 5 or higher as good, otherwise as bad.

In [3]:
wine = fetch_openml(name='wine_quality', version=1, as_frame='auto')

In [4]:
wine_df = wine.frame

In [5]:
wine_df['target'] = (wine_df['quality'] > 5).astype(int)
wine_df['target'] = wine_df['target'].map({1: 'good', 0: 'bad'}).astype(str).values

In [6]:
wine_df.target.value_counts()

good    4113
bad     2384
Name: target, dtype: int64

In [7]:
wine_df.head()

Unnamed: 0,fixed.acidity,volatile.acidity,citric.acid,residual.sugar,chlorides,free.sulfur.dioxide,total.sulfur.dioxide,density,pH,sulphates,alcohol,quality,target
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5.0,bad
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5.0,bad
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5.0,bad
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6.0,good
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5.0,bad


In [8]:
wine.feature_names

['fixed.acidity',
 'volatile.acidity',
 'citric.acid',
 'residual.sugar',
 'chlorides',
 'free.sulfur.dioxide',
 'total.sulfur.dioxide',
 'density',
 'pH',
 'sulphates',
 'alcohol']

# Train model, get predictions

In [9]:
train_data, test_data = train_test_split(wine_df, random_state=0)

In [10]:
model = LogisticRegression()

In [11]:
model.fit(wine_df[wine.feature_names], wine_df.target)

LogisticRegression()

In [12]:
train_probas = pd.DataFrame(model.predict_proba(train_data[wine.feature_names]))
train_probas.columns = ['bad', 'good']
test_probas = pd.DataFrame(model.predict_proba(test_data[wine.feature_names]))
test_probas.columns = ['bad', 'good']

In [13]:
train_data.reset_index(inplace=True, drop=True)
test_data.reset_index(inplace=True, drop=True)

merged_train_data = pd.concat([train_data, train_probas], axis=1)
merged_test_data = pd.concat([test_data, test_probas], axis=1)

# Probabilistic Model Performance and classification_threshold and cut_quantile options

In [14]:
wine_column_mapping = ColumnMapping()

wine_column_mapping.target = 'target'
wine_column_mapping.prediction = ['good', 'bad']
wine_column_mapping.numerical_features = wine.feature_names

In [15]:
ProbClassificationPerformanceTab.list_widgets()

['Probabilistic Classification Model Performance Report.',
 'Reference: Model Quality With Macro-average Metrics',
 'Current: Model Quality With Macro-average Metrics',
 'Reference: Class Representation',
 'Current: Class Representation',
 'Reference: Confusion Matrix',
 'Current: Confusion Matrix',
 'Reference: Quality Metrics by Class',
 'Current: Quality Metrics by Class',
 'Reference: Class Separation Quality',
 'Current: Class Separation Quality',
 'Reference: Probability Distribution',
 'Current: Probability Distribution',
 'Reference: ROC Curve',
 'Current: ROC Curve',
 'Reference: Precision-Recall Curve',
 'Current: Precision-Recall Curve',
 'Reference: Precision-Recall Table',
 'Current: Precision-Recall Table',
 'Classification Quality By Feature']

In [16]:
widgets = ['Reference: Quality Metrics by Class', 'Current: Quality Metrics by Class', 'Reference: Confusion Matrix', 'Current: Confusion Matrix', 'Reference: Class Separation Quality',
           'Current: Class Separation Quality', 'Classification Quality By Feature']
wine_model_performance_dashboard = Dashboard(tabs=[ProbClassificationPerformanceTab(include_widgets=widgets)])
wine_model_performance_dashboard.calculate(reference_data=merged_train_data.sample(1000, random_state=0), current_data=merged_test_data.sample(1000, random_state=0), 
                                           column_mapping=wine_column_mapping)
wine_model_performance_dashboard.show()

Suppose that precision is a crucial metric in this task, so we want to recalculate all numbers for threshold == 0.8
In addition, we noticed that the histogram of chlorides feature in the ‘Classification Quality By Feature’ widget looks less informative because of outliers. 

So let’s add some changes: 

Recalculate all metrics with classification threshold == 0.8

Cut the data above 0.95 quantile from histogram plot for chlorides feature

In [17]:
from evidently.options import QualityMetricsOptions

In [18]:
m_options = QualityMetricsOptions(cut_quantile={'chlorides': ('right', 0.95)}, classification_threshold = 0.8)

In [19]:
wine_model_performance_dashboard = Dashboard(tabs=[ProbClassificationPerformanceTab(include_widgets=widgets)], options=[m_options])
wine_model_performance_dashboard.calculate(reference_data=merged_train_data.sample(1000, random_state=0), current_data=merged_test_data.sample(1000, random_state=0), 
                                           column_mapping=wine_column_mapping)
wine_model_performance_dashboard.show()

# Data Drift and conf_interval_n_sigmas option

In [20]:
from evidently.dashboard.tabs import DataDriftTab

In [21]:
model_performance_dashboard = Dashboard(tabs=[DataDriftTab()])
model_performance_dashboard.calculate(merged_train_data.sample(1000, random_state=0), 
                                      merged_test_data.sample(1000, random_state=0),
                                      column_mapping=wine_column_mapping)
model_performance_dashboard.show()


To plot confidence interval with confidence level = 3 sigma, specify conf_interval_n_sigmas parameter:

In [22]:
m_options = QualityMetricsOptions(conf_interval_n_sigmas=3)

In [23]:
model_performance_dashboard = Dashboard(tabs=[DataDriftTab()], options=[m_options])
model_performance_dashboard.calculate(merged_train_data.sample(1000, random_state=0), 
                                      merged_test_data.sample(1000, random_state=0),
                                      column_mapping=wine_column_mapping)
model_performance_dashboard.show()