# Data Drift Dashboard for Boston Dataset

In [1]:
import pandas as pd

from sklearn import datasets

from plotly.graph_objects import histogram

from evidently.dashboard import Dashboard
from evidently.pipeline.column_mapping import ColumnMapping
from evidently.tabs import DataDriftTab

from evidently.model_profile import Profile
from evidently.profile_sections import DataDriftProfileSection
from evidently.analyzers.data_drift_analyzer import DataDriftOptions

## Boston Data 

In [2]:
boston = datasets.load_boston()

In [3]:
boston_frame = pd.DataFrame(boston.data, columns = boston.feature_names)

In [4]:
boston_frame['target'] = boston.target

In [5]:
boston_frame.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,target
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33,36.2


In [6]:
column_mapping = ColumnMapping()

column_mapping.target = 'target'
column_mapping.prediction = None
column_mapping.datetime = None

column_mapping.numerical_features = ['CRIM', 'ZN', 'INDUS', 'NOX', 'RM', 'AGE', 'DIS', 'TAX',
                                        'PTRATIO', 'B', 'LSTAT']
column_mapping.categorical_features = ['CHAS', 'RAD']

In [7]:
#set parameters to detect dataset drift: 
# - confidence level for the individual features
# - share of the drifted features to detect dataset drift

data_drift_opts = DataDriftOptions()
data_drift_opts.confidence = 0.99
data_drift_opts.drift_share = 0.5

#set the custom bins to plot in the datadrift table 
data_drift_opts.xbins = {
    'CRIM': dict(start=-10., end=100.,size=5.), # OPTION 1
    'NOX': histogram.XBins(start=-0.5, end=1.5, size=.05) # OPTION 2 (NB: Xbins is not JSON serializable)
}

#set the custom number of bins to plot in the datadrift table 
data_drift_opts.nbinsx = {'TAX': 3, 'PTRATIO': 5}

## Data and Target Drift Dashboard

In [8]:
boston_data_and_target_drift_dashboard = Dashboard(tabs=[DataDriftTab()], options=[data_drift_opts])

# reference_data = Dataset(boston_frame[:200], column_mapping)
# current_data = Dataset(boston_frame[:200], column_mapping)

boston_data_and_target_drift_dashboard.calculate(boston_frame[:200], boston_frame[200:], column_mapping)

In [9]:
boston_data_and_target_drift_dashboard.show()

In [30]:
#boston_data_and_target_drift_dashboard.save('boston_datadrift_with_customized_bins.html')

## Data and Target Drift Profile

In [31]:
#for profile all the column_mapping parameters should be JSON serializable
data_drift_opts.xbins = {
    'CRIM': dict(start=-10., end=100.,size=5.), # OPTION 1
    'NOX': histogram.XBins(start=-0.5, end=1.5, size=.05).to_plotly_json() #transform'XBins' to JSON serializable fromat
}

In [32]:
boston_target_and_data_drift_profile = Profile(sections=[DataDriftProfileSection()], options=[data_drift_opts])

In [33]:
boston_target_and_data_drift_profile.calculate(boston_frame[:200], boston_frame[200:], 
                                               column_mapping = column_mapping) 

In [34]:
boston_target_and_data_drift_profile.json()

'{"data_drift": {"name": "data_drift", "datetime": "2021-09-06 11:55:11.401121", "data": {"utility_columns": {"date": null, "id": null, "target": "target", "prediction": null, "drift_conf_level": 0.99, "drift_features_share": 0.5, "nbinsx": {"TAX": 3, "PTRATIO": 5}, "xbins": {"CRIM": {"start": -10.0, "end": 100.0, "size": 5.0}, "NOX": {"end": 1.5, "size": 0.05, "start": -0.5}}}, "cat_feature_names": ["CHAS", "RAD"], "num_feature_names": ["CRIM", "ZN", "INDUS", "NOX", "RM", "AGE", "DIS", "TAX", "PTRATIO", "B", "LSTAT"], "metrics": {"CRIM": {"current_small_hist": [[0.08779036300750895, 0.016162242562051853, 0.005142531724289227, 0.0003673236945920878, 0.0011019710837762626, 0.0007346473891841756, 0.0, 0.0003673236945920872, 0.00036732369459208753, 0.00036732369459208753], [0.00906, 8.905774, 17.802488, 26.699202, 35.595915999999995, 44.49263, 53.389343999999994, 62.28605799999999, 71.182772, 80.079486, 88.9762]], "ref_small_hist": [[1.7232613393040463, 0.146660539515238, 0.18332567439404