# Data Drift and Model Performance Dashboards for Breast cancer dataset

In [18]:
import pandas as pd

from datetime import datetime
from sklearn import datasets, ensemble, model_selection

from evidently.dashboard import Dashboard
from evidently.tabs import ProbClassificationPerformanceTab

from evidently.model_profile import Profile
from evidently.profile_sections import ProbClassificationPerformanceProfileSection

## Breast Cancer Data

In [2]:
bcancer = datasets.load_breast_cancer()

In [3]:
bcancer_frame = pd.DataFrame(bcancer.data, columns = bcancer.feature_names)

In [4]:
bcancer_frame.head()

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst radius,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension
0,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,0.07871,...,25.38,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189
1,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,0.05667,...,24.99,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902
2,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,0.05999,...,23.57,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758
3,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,0.09744,...,14.91,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173
4,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,0.05883,...,22.54,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678


## Model Performance Dashboard

In [5]:
bcancer_frame = pd.DataFrame(bcancer.data, columns = bcancer.feature_names)

In [6]:
reference, production, y_train, y_test = model_selection.train_test_split(bcancer_frame, 
                                                                          bcancer.target, 
                                                                          random_state = 0)

In [7]:
model = ensemble.RandomForestClassifier(random_state = 11)

In [8]:
model.fit(reference, y_train)

RandomForestClassifier(random_state=11)

In [9]:
train_probas = pd.DataFrame(model.predict_proba(reference))
train_probas.columns = bcancer.target_names
test_probas = pd.DataFrame(model.predict_proba(production))
test_probas.columns = bcancer.target_names

In [10]:
reference.reset_index(inplace=True, drop=True)
reference['target'] = [bcancer.target_names[x] for x in y_train]
merged_reference = pd.concat([reference, train_probas], axis = 1)

production.reset_index(inplace=True, drop=True)
production['target'] = [bcancer.target_names[x] for x in y_test]
merged_production = pd.concat([production, test_probas], axis = 1)

In [11]:
column_mapping = {}

column_mapping['target'] = 'target'
column_mapping['prediction'] = bcancer.target_names.tolist()
column_mapping['numerical_features'] = bcancer.feature_names

In [24]:
model_performance_dashboard = Dashboard(tabs=[ProbClassificationPerformanceTab])
model_performance_dashboard.calculate(merged_reference, merged_production, column_mapping = column_mapping)
model_performance_dashboard.show()

In [25]:
#model_performance_dashboard.save('bcancer_prob_classification_performance.html')

## Model Performance Profile

In [19]:
model_performance_profile = Profile(sections=[ProbClassificationPerformanceProfileSection])

In [22]:
model_performance_profile.calculate(merged_reference, merged_production, column_mapping = column_mapping)

In [23]:
model_performance_profile.json()

'{"probabilistic_classification_performance": {"name": "probabilistic_classification_performance", "datetime": "2021-07-15 19:57:46.012058", "data": {"utility_columns": {"date": null, "id": null, "target": "target", "prediction": ["malignant", "benign"]}, "cat_feature_names": [], "num_feature_names": ["mean radius", "mean texture", "mean perimeter", "mean area", "mean smoothness", "mean compactness", "mean concavity", "mean concave points", "mean symmetry", "mean fractal dimension", "radius error", "texture error", "perimeter error", "area error", "smoothness error", "compactness error", "concavity error", "concave points error", "symmetry error", "fractal dimension error", "worst radius", "worst texture", "worst perimeter", "worst area", "worst smoothness", "worst compactness", "worst concavity", "worst concave points", "worst symmetry", "worst fractal dimension"], "target_names": null, "metrics": {"reference": {"accuracy": 1.0, "precision": 1.0, "recall": 1.0, "f1": 1.0, "roc_auc": 1