In [36]:
import pandas as pd

from sklearn.datasets import fetch_california_housing

from evidently.dashboard import Dashboard
from evidently.pipeline.column_mapping import ColumnMapping
from evidently.dashboard.tabs import DataDriftTab, NumTargetDriftTab

from evidently.model_profile import Profile
from evidently.model_profile.sections import DataDriftProfileSection, NumTargetDriftProfileSection

In [2]:
import warnings
warnings.filterwarnings('ignore')
warnings.simplefilter('ignore')

# California housing data

In [4]:
ca = fetch_california_housing(as_frame=True)

In [6]:
ca_frame = ca.frame

In [11]:
ca_frame.head()

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude,MedHouseVal
0,8.3252,41.0,6.984127,1.02381,322.0,2.555556,37.88,-122.23,4.526
1,8.3014,21.0,6.238137,0.97188,2401.0,2.109842,37.86,-122.22,3.585
2,7.2574,52.0,8.288136,1.073446,496.0,2.80226,37.85,-122.24,3.521
3,5.6431,52.0,5.817352,1.073059,558.0,2.547945,37.85,-122.25,3.413
4,3.8462,52.0,6.281853,1.081081,565.0,2.181467,37.85,-122.25,3.422


In [7]:
target = 'MedHouseVal'
numerical_features = ['MedInc', 'HouseAge', 'AveRooms', 'AveBedrms', 'Population', 'AveOccup',
                      'Latitude', 'Longitude']
categorical_features = []
features = numerical_features 

# Column mapping and data sample

In [9]:
column_mapping = ColumnMapping()

column_mapping.target = target
column_mapping.numerical_features = numerical_features

In [10]:
ref_data_sample = ca_frame[:15000].sample(1000, random_state=0)
prod_data_sample = ca_frame[15000:].sample(1000, random_state=0)

# Target Drift

## Target Drift Dashboard full (verbose_level=1)

In [15]:
ca_target_drift_dashboard = Dashboard(tabs=[NumTargetDriftTab(verbose_level=1)])

In [16]:
ca_target_drift_dashboard.calculate(ref_data_sample, prod_data_sample, column_mapping=column_mapping)

In [17]:
ca_target_drift_dashboard.show()

In [18]:
# ca_target_drift_dashboard.save('ca_target_drift.html')

## Target Drift Dashboard short (verbose_level=0)

In [19]:
ca_target_drift_dashboard = Dashboard(tabs=[NumTargetDriftTab(verbose_level=0)])

In [20]:
ca_target_drift_dashboard.calculate(ref_data_sample, prod_data_sample, column_mapping=column_mapping)

In [21]:
ca_target_drift_dashboard.show()

In [22]:
# ca_target_drift_dashboard.save('ca_target_drift.html')

# Data Drift

## Data Drift Dashboard full (verbose_level=1)

In [23]:
ca_data_drift_dashboard = Dashboard(tabs=[DataDriftTab(verbose_level=1)])

In [24]:
ca_data_drift_dashboard.calculate(ref_data_sample, prod_data_sample, column_mapping=column_mapping)

In [25]:
ca_data_drift_dashboard.show()

In [26]:
# ca_data_drift_dashboard.save('ca_data_drift.html')

## Data Drift Dashboard short (verbose_level=0)

In [27]:
ca_data_drift_dashboard = Dashboard(tabs=[DataDriftTab(verbose_level=0)])

In [28]:
ca_data_drift_dashboard.calculate(ref_data_sample, prod_data_sample, column_mapping=column_mapping)

In [29]:
ca_data_drift_dashboard.show()

In [30]:
# ca_data_drift_dashboard.save('ca_data_drift.html')

# Target and Data Drift Dashboard

In [31]:
ca_data_and_target_drift_dashboard = Dashboard(tabs=[DataDriftTab(verbose_level=1), 
                                                     NumTargetDriftTab(verbose_level=1)])

In [32]:
ca_data_and_target_drift_dashboard.calculate(ref_data_sample, prod_data_sample, column_mapping=column_mapping)

In [33]:
ca_data_and_target_drift_dashboard.show()

In [34]:
# ca_data_and_target_drift_dashboard.save('ca_data_and_target_drift.html')

# Profiles

For now, you can't specify the verbose_level parameter for JSON profiles. 

## Target Drift Profile

In [37]:
ca_target_drift_profile = Profile(sections=[NumTargetDriftProfileSection()])

In [38]:
ca_target_drift_profile.calculate(ref_data_sample, prod_data_sample, column_mapping=column_mapping)

In [39]:
ca_target_drift_profile.json()

'{"num_target_drift": {"name": "num_target_drift", "datetime": "2022-01-02 12:22:35.090325", "data": {"utility_columns": {"date": null, "id": null, "target": "MedHouseVal", "prediction": null}, "cat_feature_names": [], "num_feature_names": ["MedInc", "HouseAge", "AveRooms", "AveBedrms", "Population", "AveOccup", "Latitude", "Longitude"], "target_names": null, "metrics": {"target_name": "MedHouseVal", "target_type": "num", "target_drift": 6.977595468924532e-13, "target_correlations": {"reference": {"MedInc": 0.7065242833965043, "HouseAge": 0.07115156310946809, "AveRooms": 0.19025915539696556, "AveBedrms": -0.05980562149328375, "Population": -0.024768930124835116, "AveOccup": -0.2631637124602605, "Latitude": -0.23293330925876068, "Longitude": 0.08068422712223894, "MedHouseVal": 1.0}, "current": {"MedInc": 0.6597378591275064, "HouseAge": 0.21178115016630358, "AveRooms": 0.16667433236899484, "AveBedrms": -0.1070020384971005, "Population": -0.050062662320241466, "AveOccup": 0.02281723168136

## Data Drift Profile

In [40]:
ca_data_drift_profile = Profile(sections=[DataDriftProfileSection()])

In [41]:
ca_data_drift_profile.calculate(ref_data_sample, prod_data_sample, column_mapping=column_mapping)

In [42]:
ca_data_drift_profile.json()

'{"data_drift": {"name": "data_drift", "datetime": "2022-01-02 12:22:38.689675", "data": {"utility_columns": {"date": null, "id": null, "target": "MedHouseVal", "prediction": null}, "cat_feature_names": [], "num_feature_names": ["MedInc", "HouseAge", "AveRooms", "AveBedrms", "Population", "AveOccup", "Latitude", "Longitude"], "target_names": null, "options": {"confidence": 0.95, "drift_share": 0.5, "nbinsx": 10, "xbins": null}, "metrics": {"MedInc": {"current_small_hist": [[0.05586129846484877, 0.2206866112191556, 0.2310312961200536, 0.10965365994951791, 0.04137873960359167, 0.01793078716155641, 0.0048275196204190275, 0.0020689369801795833, 0.004137873960359167, 0.0020689369801795833], [0.4999, 1.9499199999999999, 3.39994, 4.849959999999999, 6.29998, 7.75, 9.200019999999999, 10.650039999999999, 12.10006, 13.55008, 15.0001]], "ref_small_hist": [[0.08088992747561204, 0.261336688767362, 0.19358273242026808, 0.097482733111635, 0.03180287746904404, 0.01590143873452204, 0.003456834507504787,

## Target and Data Drift Profile

In [43]:
ca_data_and_target_drift_profile = Profile(sections=[DataDriftProfileSection(), NumTargetDriftProfileSection()])

In [44]:
ca_data_and_target_drift_profile.calculate(ref_data_sample, prod_data_sample, column_mapping=column_mapping)

In [45]:
ca_data_and_target_drift_profile.json()

'{"data_drift": {"name": "data_drift", "datetime": "2022-01-02 12:22:43.196308", "data": {"utility_columns": {"date": null, "id": null, "target": "MedHouseVal", "prediction": null}, "cat_feature_names": [], "num_feature_names": ["MedInc", "HouseAge", "AveRooms", "AveBedrms", "Population", "AveOccup", "Latitude", "Longitude"], "target_names": null, "options": {"confidence": 0.95, "drift_share": 0.5, "nbinsx": 10, "xbins": null}, "metrics": {"MedInc": {"current_small_hist": [[0.05586129846484877, 0.2206866112191556, 0.2310312961200536, 0.10965365994951791, 0.04137873960359167, 0.01793078716155641, 0.0048275196204190275, 0.0020689369801795833, 0.004137873960359167, 0.0020689369801795833], [0.4999, 1.9499199999999999, 3.39994, 4.849959999999999, 6.29998, 7.75, 9.200019999999999, 10.650039999999999, 12.10006, 13.55008, 15.0001]], "ref_small_hist": [[0.08088992747561204, 0.261336688767362, 0.19358273242026808, 0.097482733111635, 0.03180287746904404, 0.01590143873452204, 0.003456834507504787,