# Data drift dashboard in jupyter notebook

In [None]:
# Import packages not present in prod/base/1.1 (e.g. evidently)
import os

os.system('sudo pip install evidently')
# os.system('pip install ...')

# Note: Similar approach can be used to install different packages

In [None]:
import pandas as pd
import numpy as np

from datetime import datetime
from sklearn import datasets, ensemble

from evidently.dashboard import Dashboard
from evidently.tabs import DataDriftTab, NumTargetDriftTab, RegressionPerformanceTab

## Bicycle Demand Data

In [None]:
raw_data = pd.read_csv('train.csv', header=0, sep=',', parse_dates=['datetime'], index_col='datetime')

In [None]:
raw_data.head()

## Regression Model

### Feature engineering

In [None]:
raw_data['month'] = raw_data.index.map(lambda x : x.month)
raw_data['hour'] = raw_data.index.map(lambda x : x.hour)
raw_data['weekday'] = raw_data.index.map(lambda x : x.weekday() + 1)

In [None]:
raw_data.head()

### Model training

In [None]:
target = 'count'
prediction = 'prediction'
numerical_features = ['temp', 'atemp', 'humidity', 'windspeed', 'hour', 'weekday']
categorical_features = ['season', 'holiday', 'workingday']

In [None]:
reference = raw_data.loc['2011-01-01 00:00:00':'2011-01-28 23:00:00']
current = raw_data.loc['2011-01-29 00:00:00':'2011-02-28 23:00:00']

In [None]:
reference.head()

In [None]:
regressor = ensemble.RandomForestRegressor(random_state = 0, n_estimators = 50)

In [None]:
regressor.fit(reference[numerical_features + categorical_features], reference[target])

In [None]:
ref_prediction = regressor.predict(reference[numerical_features + categorical_features])
current_prediction = regressor.predict(current[numerical_features + categorical_features])

In [None]:
reference['prediction'] = ref_prediction
current['prediction'] = current_prediction

### Model Perfomance 

In [None]:
column_mapping = {}

column_mapping['target'] = target
column_mapping['prediction'] = prediction
column_mapping['numerical_features'] = numerical_features
column_mapping['categorical_features'] = categorical_features

In [None]:
regression_perfomance_dashboard = Dashboard(tabs=[RegressionPerformanceTab])
regression_perfomance_dashboard.calculate(reference, None, column_mapping=column_mapping)

In [None]:
regression_perfomance_dashboard.show()

In [None]:
regression_perfomance_dashboard.save('regression_performance_at_training.html')

##  Week 1

In [None]:
current.loc['2011-01-29 00:00:00':'2011-02-07 23:00:00']

In [None]:
regression_perfomance_dashboard.calculate(reference, current.loc['2011-01-29 00:00:00':'2011-02-07 23:00:00'], 
                                          column_mapping=column_mapping)

In [None]:
regression_perfomance_dashboard.show()

In [None]:
regression_perfomance_dashboard.save('reports/regression_performance_after_week1.html')

In [None]:
target_drift_dashboard = Dashboard(tabs=[NumTargetDriftTab])
target_drift_dashboard.calculate(reference, current.loc['2011-01-29 00:00:00':'2011-02-07 23:00:00'], 
                                   column_mapping=column_mapping)

In [None]:
target_drift_dashboard.show()

In [None]:
#target_drift_dashboard.save('reports/target_drift_after_week1.html')

## Week 2

In [None]:
regression_perfomance_dashboard.calculate(reference, current.loc['2011-02-07 00:00:00':'2011-02-14 23:00:00'], 
                                            column_mapping=column_mapping)

In [None]:
regression_perfomance_dashboard.show()

In [None]:
#regression_perfomance_dashboard.save('reports/regression_performance_after_week2.html')

In [None]:
target_drift_dashboard.calculate(reference, current.loc['2011-02-07 00:00:00':'2011-02-14 23:00:00'], 
                                   column_mapping=column_mapping)

In [None]:
target_drift_dashboard.show()

In [None]:
#target_drift_dashboard.save('reports/target_drift_after_week2.html')

## Week 3

In [None]:
regression_perfomance_dashboard.calculate(reference, current.loc['2011-02-15 00:00:00':'2011-02-21 23:00:00'], 
                                            column_mapping=column_mapping)

In [None]:
regression_perfomance_dashboard.show()

In [None]:
#regression_perfomance_dashboard.save('reports/regression_performance_after_week3.html')

In [None]:
target_drift_dashboard.calculate(reference, current.loc['2011-02-15 00:00:00':'2011-02-21 23:00:00'], 
                                   column_mapping=column_mapping)

In [None]:
target_drift_dashboard.show()

In [None]:
#target_drift_dashboard.save('reports/target_drift_after_week3.html')

## Data Drift

In [None]:
column_mapping = {}

column_mapping['numerical_features'] = numerical_features

In [None]:
data_drift_dashboard = Dashboard(tabs=[DataDriftTab])
data_drift_dashboard.calculate(reference, current.loc['2011-01-29 00:00:00':'2011-02-07 23:00:00'], 
                                   column_mapping=column_mapping)

In [None]:
data_drift_dashboard.show()

In [None]:
#data_drift_dashboard.save("reports/data_drift_dashboard_after_week1.html")