# Data Drift and Model Performance Dashboards for Iris dataset

In [1]:
import pandas as pd
import numpy as np

from sklearn import datasets, model_selection, neighbors

from evidently.dashboard import Dashboard
from evidently.tabs import DataDriftTab, CatTargetDriftTab
from evidently.tabs import ClassificationPerformanceTab, ProbClassificationPerformanceTab

from evidently.model_profile import Profile
from evidently.profile_sections import DataDriftProfileSection, CatTargetDriftProfileSection 
from evidently.profile_sections import ClassificationPerformanceProfileSection, ProbClassificationPerformanceProfileSection

## Iris data

In [2]:
iris = datasets.load_iris()

In [3]:
iris_frame = pd.DataFrame(iris.data, columns = iris.feature_names)

## Data Drift Dashboard

In [4]:
iris_data_drift_report = Dashboard(tabs=[DataDriftTab])
iris_data_drift_report.calculate(iris_frame, iris_frame, column_mapping = None)

In [5]:
iris_data_drift_report.show()

In [6]:
#iris_data_drift_report.save('iris_data_drift.html')

## Data Drift Profile

In [7]:
iris_data_drift_profile = Profile(sections=[DataDriftProfileSection])
iris_data_drift_profile.calculate(iris_frame, iris_frame, column_mapping = None)

In [8]:
drift_profile = iris_data_drift_profile.json() 

In [9]:
drift_profile

'{"data_drift": {"name": "data_drift", "datetime": "2021-05-31 22:39:20.204909", "data": {"utility_columns": {"date": null, "id": null, "target": null, "prediction": null}, "cat_feature_names": [], "num_feature_names": ["sepal length (cm)", "petal length (cm)", "petal width (cm)", "sepal width (cm)"], "metrics": {"sepal length (cm)": {"prod_small_hist": [[0.16666666666666652, 0.4259259259259266, 0.259259259259259, 0.4999999999999995, 0.2962962962962968, 0.48148148148148107, 0.33333333333333304, 0.11111111111111129, 0.09259259259259252, 0.11111111111111102], [4.3, 4.66, 5.02, 5.38, 5.74, 6.1, 6.46, 6.82, 7.18, 7.54, 7.9]], "ref_small_hist": [[0.16666666666666652, 0.4259259259259266, 0.259259259259259, 0.4999999999999995, 0.2962962962962968, 0.48148148148148107, 0.33333333333333304, 0.11111111111111129, 0.09259259259259252, 0.11111111111111102], [4.3, 4.66, 5.02, 5.38, 5.74, 6.1, 6.46, 6.82, 7.18, 7.54, 7.9]], "feature_type": "num", "p_value": 1.0}, "petal length (cm)": {"prod_small_hist

## Data and Target Drift Dashboard

In [10]:
iris_frame['target'] = iris.target

In [11]:
iris_data_and_target_drift_report = Dashboard(tabs=[DataDriftTab, CatTargetDriftTab])
iris_data_and_target_drift_report.calculate(iris_frame[:75], iris_frame[75:], column_mapping = None)


divide by zero encountered in true_divide



In [12]:
iris_data_and_target_drift_report.show()

In [13]:
iris_data_and_target_drift_report.save('iris_data_and_target_drift.html')

## Data and Target Drift Profile

In [14]:
iris_target_and_data_drift_profile = Profile(sections=[DataDriftProfileSection, CatTargetDriftProfileSection])
iris_target_and_data_drift_profile.calculate(iris_frame[:75], iris_frame[75:], column_mapping = None) 


divide by zero encountered in true_divide



In [15]:
data_and_target_profile = iris_target_and_data_drift_profile.json() 

In [16]:
data_and_target_profile

'{"data_drift": {"name": "data_drift", "datetime": "2021-05-31 22:39:23.682643", "data": {"utility_columns": {"date": null, "id": null, "target": "target", "prediction": null}, "cat_feature_names": [], "num_feature_names": ["sepal length (cm)", "petal length (cm)", "petal width (cm)", "sepal width (cm)"], "metrics": {"sepal length (cm)": {"prod_small_hist": [[0.13333333333333341, 0.04444444444444447, 0.7555555555555538, 0.40000000000000024, 0.44444444444444464, 0.44444444444444464, 0.5777777777777781, 0.2222222222222217, 0.0444444444444446, 0.26666666666666605], [4.9, 5.2, 5.5, 5.800000000000001, 6.1000000000000005, 6.4, 6.7, 7.0, 7.300000000000001, 7.6, 7.9]], "ref_small_hist": [[0.24691358024691315, 0.5432098765432107, 1.0864197530864215, 0.24691358024691315, 0.543209876543209, 0.34567901234567955, 0.1975308641975312, 0.24691358024691315, 0.14814814814814792, 0.0987654320987656], [4.3, 4.57, 4.84, 5.109999999999999, 5.38, 5.65, 5.92, 6.1899999999999995, 6.46, 6.73, 7.0]], "feature_ty

## Model Performance Dashboard

In [17]:
iris_frame = pd.DataFrame(iris.data, columns = iris.feature_names)

In [18]:
reference, production, y_train, y_test = model_selection.train_test_split(iris_frame, 
                                                                          iris.target, 
                                                                          random_state=0)

In [19]:
model = neighbors.KNeighborsClassifier(n_neighbors=1)

In [20]:
model.fit(reference, y_train)

KNeighborsClassifier(n_neighbors=1)

In [21]:
train_predictions = model.predict(reference)
test_predictions = model.predict(production)

In [22]:
reference['target'] = y_train
reference['prediction'] = train_predictions

production['target'] = y_test
production['prediction'] = test_predictions

In [23]:
reference.target = reference.target.apply(lambda x: iris.target_names[x])
reference.prediction = reference.prediction.apply(lambda x: iris.target_names[x])

production.target = production.target.apply(lambda x: iris.target_names[x])
production.prediction = production.prediction.apply(lambda x: iris.target_names[x])

In [24]:
iris_column_mapping = {}

iris_column_mapping['target'] = 'target'
iris_column_mapping['prediction'] = 'prediction'
iris_column_mapping['numerical_features'] = iris.feature_names

In [25]:
iris_model_performance = Dashboard(tabs=[ClassificationPerformanceTab])
iris_model_performance.calculate(reference, production, column_mapping = iris_column_mapping)
iris_model_performance.show()

In [26]:
#iris_drift.save('iris_classification_performance.html')

## Model Performance Profile

In [27]:
iris_classification_performance_profile = Profile(sections=[ClassificationPerformanceProfileSection])
iris_classification_performance_profile.calculate(reference, production, column_mapping = iris_column_mapping)

In [28]:
classification_performance_profile = iris_classification_performance_profile.json() 

In [29]:
classification_performance_profile

'{"classification_performance": {"name": "classification_performance", "datetime": "2021-05-31 22:39:29.679458", "data": {"utility_columns": {"date": null, "id": null, "target": "target", "prediction": "prediction"}, "cat_feature_names": [], "num_feature_names": ["sepal length (cm)", "sepal width (cm)", "petal length (cm)", "petal width (cm)"], "metrics": {"reference": {"accuracy": 1.0, "precision": 1.0, "recall": 1.0, "f1": 1.0, "metrics_matrix": {"setosa": {"precision": 1.0, "recall": 1.0, "f1-score": 1.0, "support": 37}, "versicolor": {"precision": 1.0, "recall": 1.0, "f1-score": 1.0, "support": 34}, "virginica": {"precision": 1.0, "recall": 1.0, "f1-score": 1.0, "support": 41}, "accuracy": 1.0, "macro avg": {"precision": 1.0, "recall": 1.0, "f1-score": 1.0, "support": 112}, "weighted avg": {"precision": 1.0, "recall": 1.0, "f1-score": 1.0, "support": 112}}, "confusion_matrix": {"labels": ["setosa", "versicolor", "virginica"], "values": [[37, 0, 0], [0, 34, 0], [0, 0, 41]]}}, "curre

## Probabilistic Model Performance Dashboard

In [30]:
iris_frame = pd.DataFrame(iris.data, columns = iris.feature_names)

In [31]:
reference, production, y_train, y_test = model_selection.train_test_split(iris_frame, 
                                                                          iris.target, 
                                                                          random_state=0)

In [32]:
model = neighbors.KNeighborsClassifier(n_neighbors=5)

In [33]:
model.fit(reference, y_train)

KNeighborsClassifier()

In [34]:
train_probas = pd.DataFrame(model.predict_proba(reference))
train_probas.columns = iris.target_names
test_probas = pd.DataFrame(model.predict_proba(production))
test_probas.columns = iris.target_names

In [35]:
reference['target'] = [iris.target_names[x] for x in y_train]
production['target'] = [iris.target_names[x] for x in y_test]

In [36]:
reference.reset_index(inplace=True, drop=True)
reference['result'] = [iris.target_names[x] for x in y_train]
merged_reference = pd.concat([reference, train_probas], axis = 1)

production.reset_index(inplace=True, drop=True)
production['result'] = [iris.target_names[x] for x in y_test]
merged_production = pd.concat([production, test_probas], axis = 1)

In [37]:
iris_column_mapping = {}

iris_column_mapping['target'] = 'target'
iris_column_mapping['prediction'] = iris.target_names.tolist()
iris_column_mapping['numerical_features'] = iris.feature_names

In [38]:
iris_model_performance = Dashboard(tabs=[ProbClassificationPerformanceTab])
iris_model_performance.calculate(merged_reference, merged_production, column_mapping = iris_column_mapping)
iris_model_performance.show()

## Probabilistic Model Performance Profile

In [39]:
iris_prob_performance_profile = Profile(sections=[ProbClassificationPerformanceProfileSection])
iris_prob_performance_profile.calculate(merged_reference, merged_production, column_mapping = iris_column_mapping)

In [40]:
prob_performance_profile = iris_prob_performance_profile.json() 

In [41]:
prob_performance_profile

'{"probabilistic_classification_performance": {"name": "probabilistic_classification_performance", "datetime": "2021-05-31 22:39:32.339841", "data": {"utility_columns": {"date": null, "id": null, "target": "target", "prediction": ["setosa", "versicolor", "virginica"]}, "cat_feature_names": [], "num_feature_names": ["sepal length (cm)", "sepal width (cm)", "petal length (cm)", "petal width (cm)"], "metrics": {"reference": {"accuracy": 0.9732142857142857, "precision": 0.9740259740259741, "recall": 0.9722620755619321, "f1": 0.9730264340945873, "roc_auc": 0.9972377058707812, "log_loss": 0.071679964138016, "metrics_matrix": {"setosa": {"precision": 1.0, "recall": 1.0, "f1-score": 1.0, "support": 37}, "versicolor": {"precision": 0.9696969696969697, "recall": 0.9411764705882353, "f1-score": 0.955223880597015, "support": 34}, "virginica": {"precision": 0.9523809523809523, "recall": 0.975609756097561, "f1-score": 0.963855421686747, "support": 41}, "accuracy": 0.9732142857142857, "macro avg": {"