In [1]:
import pandas as pd

from sklearn.datasets import fetch_california_housing

from evidently.dashboard import Dashboard
from evidently.pipeline.column_mapping import ColumnMapping
from evidently.tabs import DataDriftTab, NumTargetDriftTab

from evidently.model_profile import Profile
from evidently.profile_sections import DataDriftProfileSection

## California housing data

In [2]:
dataset = fetch_california_housing(as_frame=True)
df = dataset.frame

In [3]:
target = 'MedHouseVal'
prediction = 'prediction'

numerical_features = ['MedInc', 'HouseAge', 'AveRooms', 'AveBedrms', 'Population', 'AveOccup',
                      'Latitude', 'Longitude']
categorical_features = []
features = numerical_features 

In [4]:
column_mapping = ColumnMapping()

column_mapping.target = target
column_mapping.prediction = 'prediction'
column_mapping.numerical_features = numerical_features

In [5]:
ref_data = df[:15000]
prod_data = df[15000:]

## Data and Target Drift Dashboard

In [14]:
data_and_target_drift_dashboard = Dashboard(tabs=[DataDriftTab(), NumTargetDriftTab()])

data_and_target_drift_dashboard.calculate(ref_data.sample(1000, random_state=0), 
                                          prod_data.sample(1000, random_state=0),
                                          column_mapping = column_mapping)

data_and_target_drift_dashboard.show()

## Data and Target Drift Dashboard with defined options

Notes: 
- You can choose following options:
    confidence: float = 0.95
    drift_share: float = 0.5
    nbinsx: Optional[Dict[str, int]] = None
    xbins: Optional[Dict[str, int]] = None
    stattest_func: Optional[Callable] = None
    feature_stattest_func: Optional[Dict[str, Callable]] = None
    cat_target_stattest_func: Optional[Callable] = None
    num_target_stattest_func: Optional[Callable] = None
- For parametres that take a function, you need define a function that takes two pd.Series (reference and current data) and returns a number (p_value)

Usage:
- Dashboard(tabs=[DataDriftTab(), NumTargetDriftTab()], options=[options])

In [15]:
import numpy as np
from scipy.stats import anderson_ksamp
from evidently.options import DataDriftOptions

In [16]:
def anderson_stat_test(reference_data: pd.DataFrame, current_data: pd.DataFrame):
    return anderson_ksamp(np.array([reference_data, current_data]))[2]

In [17]:
options = DataDriftOptions(num_target_stattest_func=anderson_stat_test, confidence=0.99, 
                           nbinsx={'MedInc':15, 'HouseAge': 25, 'AveRooms':20})

In [18]:
data_and_target_drift_dashboard = Dashboard(tabs=[DataDriftTab(), NumTargetDriftTab()], options=[options])
data_and_target_drift_dashboard.calculate(ref_data.sample(1000, random_state=0), 
                                          prod_data.sample(1000, random_state=0), 
                                          column_mapping=column_mapping)
data_and_target_drift_dashboard.show()


p-value floored: true value smaller than 0.001

