In [None]:
try:
    import evidently
except:
    !pip install git+https://github.com/evidentlyai/evidently.git

In [None]:
import pandas as pd
import numpy as np
import requests
import zipfile
import io

from datetime import datetime, time
from sklearn import datasets, ensemble

from evidently import ColumnMapping
from evidently.test_suite import TestSuite
from evidently.report import Report
from evidently.metrics import DataDriftTable
from evidently.tests import TestShareOfDriftedColumns

In [None]:
import warnings
warnings.filterwarnings('ignore')
warnings.simplefilter('ignore')

## Prepare Datasets

In [None]:
content = requests.get("https://archive.ics.uci.edu/static/public/275/bike+sharing+dataset.zip", verify=False).content
with zipfile.ZipFile(io.BytesIO(content)) as arc:
    raw_data = pd.read_csv(arc.open("hour.csv"), header=0, sep=',', parse_dates=['dteday'], index_col='dteday')


In [None]:
raw_data.index = raw_data.apply(
    lambda row: datetime.combine(row.name, time(hour=int(row['hr']))), axis = 1)

In [None]:
target = 'cnt'
prediction = 'prediction'
numerical_features = ['temp', 'atemp', 'hum', 'windspeed', 'hr', 'weekday']
categorical_features = ['season', 'holiday', 'workingday']

In [None]:
reference = raw_data.loc['2011-01-01 00:00:00':'2011-01-28 23:00:00']
current = raw_data.loc['2011-01-29 00:00:00':'2011-02-28 23:00:00']

## Regression Model

In [None]:
regressor = ensemble.RandomForestRegressor(random_state = 42, n_estimators = 50)

In [None]:
regressor.fit(reference[numerical_features + categorical_features], reference[target])

In [None]:
ref_prediction = regressor.predict(reference[numerical_features + categorical_features])
current_prediction = regressor.predict(current[numerical_features + categorical_features])

In [None]:
reference['prediction'] = ref_prediction
current['prediction'] = current_prediction

In [None]:
regressor.feature_importances_

## Drift analysis with importances: default importances estimation

In [None]:
column_mapping = ColumnMapping()

column_mapping.target = target
column_mapping.prediction = prediction
column_mapping.numerical_features = numerical_features
column_mapping.categorical_features = categorical_features

In [None]:
report = Report(metrics = [
    DataDriftTable(feature_importance=True)
])
report.run(reference_data=reference,
           current_data=current.loc['2011-01-29 00:00:00':'2011-02-07 23:00:00'],
           column_mapping=column_mapping
           )

In [None]:
report.show()

## Drift analysis with importances: custom importances

In [None]:
column_mapping = ColumnMapping()

column_mapping.target = target
column_mapping.prediction = prediction
column_mapping.numerical_features = numerical_features
column_mapping.categorical_features = categorical_features

In [None]:
report = Report(metrics = [
    DataDriftTable(feature_importance=True)
])
report.run(reference_data=reference,
           current_data=current.loc['2011-01-29 00:00:00':'2011-02-07 23:00:00'],
           column_mapping=column_mapping,
           additional_data = {'current_feature_importance':
              dict(map(lambda i,j : (i,j), numerical_features + categorical_features, regressor.feature_importances_))
            }
           )

In [None]:
report.show()