# Data drift dashboard in jupyter notebook

In [10]:
import pandas as pd
import numpy as np
import requests
import zipfile
import io

from datetime import datetime
from sklearn import datasets, ensemble

from evidently.dashboard import Dashboard
from evidently.pipeline.column_mapping import ColumnMapping
from evidently.tabs import DataDriftTab, NumTargetDriftTab, RegressionPerformanceTab

from evidently.model_profile import Profile
from evidently.profile_sections import DataDriftProfileSection

## Bicycle Demand Data

In [11]:
content = requests.get("https://archive.ics.uci.edu/ml/machine-learning-databases/00275/Bike-Sharing-Dataset.zip").content
with zipfile.ZipFile(io.BytesIO(content)) as arc:
    raw_data = pd.read_csv(arc.open("hour.csv"), header=0, sep=',', parse_dates=['dteday'], index_col='dteday')

In [12]:
raw_data.head()

Unnamed: 0_level_0,instant,season,yr,mnth,hr,holiday,weekday,workingday,weathersit,temp,atemp,hum,windspeed,casual,registered,cnt
dteday,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2011-01-01,1,1,0,1,0,0,6,0,1,0.24,0.2879,0.81,0.0,3,13,16
2011-01-01,2,1,0,1,1,0,6,0,1,0.22,0.2727,0.8,0.0,8,32,40
2011-01-01,3,1,0,1,2,0,6,0,1,0.22,0.2727,0.8,0.0,5,27,32
2011-01-01,4,1,0,1,3,0,6,0,1,0.24,0.2879,0.75,0.0,3,10,13
2011-01-01,5,1,0,1,4,0,6,0,1,0.24,0.2879,0.75,0.0,0,1,1


## Regression Model

### Model training

In [13]:
target = 'cnt'
prediction = 'prediction'
numerical_features = ['temp', 'atemp', 'hum', 'windspeed', 'mnth', 'weekday']
categorical_features = ['season', 'holiday', 'workingday', 'weathersit']

In [14]:
reference = raw_data.loc['2011-01-01 00:00:00':'2011-01-28 23:00:00']
production = raw_data.loc['2011-01-29 00:00:00':'2011-02-28 23:00:00']

In [15]:
reference.head()

Unnamed: 0_level_0,instant,season,yr,mnth,hr,holiday,weekday,workingday,weathersit,temp,atemp,hum,windspeed,casual,registered,cnt
dteday,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2011-01-01,1,1,0,1,0,0,6,0,1,0.24,0.2879,0.81,0.0,3,13,16
2011-01-01,2,1,0,1,1,0,6,0,1,0.22,0.2727,0.8,0.0,8,32,40
2011-01-01,3,1,0,1,2,0,6,0,1,0.22,0.2727,0.8,0.0,5,27,32
2011-01-01,4,1,0,1,3,0,6,0,1,0.24,0.2879,0.75,0.0,3,10,13
2011-01-01,5,1,0,1,4,0,6,0,1,0.24,0.2879,0.75,0.0,0,1,1


In [16]:
regressor = ensemble.RandomForestRegressor(random_state = 0, n_estimators = 50)

In [17]:
regressor.fit(reference[numerical_features + categorical_features], reference[target])

RandomForestRegressor(n_estimators=50, random_state=0)

In [18]:
ref_prediction = regressor.predict(reference[numerical_features + categorical_features])
prod_prediction = regressor.predict(production[numerical_features + categorical_features])

In [19]:
reference['prediction'] = ref_prediction
production['prediction'] = prod_prediction

### Model Perfomance 

In [20]:
column_mapping = ColumnMapping()

column_mapping.target = target
column_mapping.prediction = prediction
column_mapping.numerical_features = numerical_features
column_mapping.categorical_features = categorical_features

In [21]:
regression_perfomance_dashboard = Dashboard(tabs=[RegressionPerformanceTab()])
regression_perfomance_dashboard.calculate(reference, None, column_mapping=column_mapping)

In [22]:
regression_perfomance_dashboard.show()

In [23]:
#regression_perfomance_dashboard.save('regression_performance_at_training.html')

##  Week 1

In [24]:
regression_perfomance_dashboard.calculate(reference, production.loc['2011-01-29 00:00:00':'2011-02-07 23:00:00'], 
                                            column_mapping=column_mapping)

In [25]:
regression_perfomance_dashboard.show()

In [26]:
#regression_perfomance_dashboard.save('regression_performance_after_week1.html')

## Week 2

In [27]:
regression_perfomance_dashboard.calculate(reference, production.loc['2011-02-07 00:00:00':'2011-02-14 23:00:00'], 
                                            column_mapping=column_mapping)

In [28]:
regression_perfomance_dashboard.show()

In [29]:
#regression_perfomance_dashboard.save('regression_performance_after_week2.html')

In [30]:
target_drift_dashboard = Dashboard(tabs=[NumTargetDriftTab()])
target_drift_dashboard.calculate(reference, production.loc['2011-02-07 00:00:00':'2011-02-14 23:00:00'], 
                                   column_mapping=column_mapping)

In [31]:
target_drift_dashboard.show()

In [32]:
#target_drift_dashboard.save('target_drift_after_week2.html')

## Week 3

In [33]:
regression_perfomance_dashboard.calculate(reference, production.loc['2011-02-15 00:00:00':'2011-02-21 23:00:00'], 
                                            column_mapping=column_mapping)

In [34]:
regression_perfomance_dashboard.show()

In [35]:
#regression_perfomance_dashboard.save('regression_performance_after_week3.html')

In [36]:
target_drift_dashboard.calculate(reference, production.loc['2011-02-15 00:00:00':'2011-02-21 23:00:00'], 
                                   column_mapping=column_mapping)

In [37]:
target_drift_dashboard.show()

In [38]:
#target_drift_dashboard.save('target_drift_after_week3.html')

## Data Drift

In [39]:
column_mapping = ColumnMapping()

column_mapping.numerical_features = numerical_features

In [40]:
data_drift_dashboard = Dashboard(tabs=[DataDriftTab()])
data_drift_dashboard.calculate(reference, production.loc['2011-01-29 00:00:00':'2011-02-07 23:00:00'], 
                                   column_mapping=column_mapping)

In [41]:
data_drift_dashboard.show()

In [42]:
#data_drift_dashboard.save("data_drift_dashboard_after_week1.html")

## Data Drift Profile

In [43]:
data_drift_profile = Profile(sections=[DataDriftProfileSection()])
data_drift_profile.calculate(reference, production.loc['2011-01-29 00:00:00':'2011-02-07 23:00:00'], 
                                   column_mapping=column_mapping)

In [44]:
data_drift_profile.json()

'{"data_drift": {"name": "data_drift", "datetime": "2021-12-09 12:53:57.813166", "data": {"utility_columns": {"date": null, "id": null, "target": null, "prediction": "prediction"}, "cat_feature_names": [], "num_feature_names": ["temp", "atemp", "hum", "windspeed", "mnth", "weekday"], "target_names": null, "metrics": {"temp": {"current_small_hist": [[9.39716312056738, 3.3687943262411357, 4.787234042553187, 7.4468085106382995, 5.673758865248229, 4.964539007092194, 1.9503546099290763, 1.063829787234044, 1.595744680851066, 1.4184397163120555], [0.14, 0.164, 0.188, 0.21200000000000002, 0.23600000000000002, 0.26, 0.28400000000000003, 0.30800000000000005, 0.332, 0.356, 0.38]], "ref_small_hist": [[1.2871432774345397, 1.140041188584878, 3.1259193880553107, 5.0750220653133296, 6.251838776110623, 3.236245954692552, 1.1768167107972938, 0.47808178876140056, 0.4045307443365697, 0.5516328331862307], [0.02, 0.064, 0.108, 0.152, 0.19599999999999998, 0.23999999999999996, 0.28400000000000003, 0.328, 0.37