# Regression Performance Dashboard for Bike Sharing Dataset

In [81]:
import pandas as pd
import numpy as np
import requests
import zipfile
import io

from sklearn.ensemble import RandomForestRegressor

from evidently.dashboard import Dashboard
from evidently.tabs import RegressionPerformanceTab

from evidently.model_profile import Profile
from evidently.profile_sections import RegressionPerformanceProfileSection

## Bike Sharing Demand Data

More information about the dataset can be found in UCI machine learning repository: https://archive.ics.uci.edu/ml/datasets/bike+sharing+dataset

Acknowledgement: Fanaee-T, Hadi, and Gama, Joao, 'Event labeling combining ensemble detectors and background knowledge', Progress in Artificial Intelligence (2013): pp. 1-15, Springer Berlin Heidelberg

In [82]:
content = requests.get("https://archive.ics.uci.edu/ml/machine-learning-databases/00275/Bike-Sharing-Dataset.zip").content
with zipfile.ZipFile(io.BytesIO(content)) as arc:
    raw_data = pd.read_csv(arc.open("day.csv"), header=0, sep=',', parse_dates=['dteday'], index_col='dteday')

In [83]:
ref_data = raw_data[:120]
prod_data = raw_data[120:150]

In [84]:
ref_data

Unnamed: 0_level_0,instant,season,yr,mnth,holiday,weekday,workingday,weathersit,temp,atemp,hum,windspeed,casual,registered,cnt
dteday,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2011-01-01,1,1,0,1,0,6,0,2,0.344167,0.363625,0.805833,0.160446,331,654,985
2011-01-02,2,1,0,1,0,0,0,2,0.363478,0.353739,0.696087,0.248539,131,670,801
2011-01-03,3,1,0,1,0,1,1,1,0.196364,0.189405,0.437273,0.248309,120,1229,1349
2011-01-04,4,1,0,1,0,2,1,1,0.200000,0.212122,0.590435,0.160296,108,1454,1562
2011-01-05,5,1,0,1,0,3,1,1,0.226957,0.229270,0.436957,0.186900,82,1518,1600
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2011-04-26,116,2,0,4,0,2,1,1,0.631667,0.594083,0.729167,0.326500,678,3722,4400
2011-04-27,117,2,0,4,0,3,1,2,0.620000,0.575142,0.835417,0.312200,547,3325,3872
2011-04-28,118,2,0,4,0,4,1,2,0.617500,0.578929,0.700833,0.320908,569,3489,4058
2011-04-29,119,2,0,4,0,5,1,1,0.510000,0.497463,0.457083,0.240063,878,3717,4595


## Regression Model

### Model training

In [85]:
target = 'cnt'
prediction = 'prediction'
datetime = 'dteday'

numerical_features = ['temp', 'atemp', 'hum', 'windspeed', 'weekday']
categorical_features = ['season', 'holiday', 'workingday', 'weathersit']

features = numerical_features + categorical_features

In [86]:
model = RandomForestRegressor(random_state = 0)

In [87]:
model.fit(ref_data[features], ref_data[target])

RandomForestRegressor(random_state=0)

In [88]:
ref_data['prediction']  = model.predict(ref_data[features])
prod_data['prediction'] = model.predict(prod_data[features])

## Regression Perfomance Report

In [89]:
column_mapping = {}

column_mapping['target'] = target
column_mapping['prediction'] = 'prediction'

column_mapping['numerical_features'] = numerical_features
column_mapping['categorical_features'] = categorical_features

In [90]:
dashboard = Dashboard(tabs=[RegressionPerformanceTab])

In [91]:
dashboard.calculate(ref_data, prod_data, column_mapping=column_mapping)

In [92]:
dashboard.show()

In [93]:
#### dashboard.save('bike_sharing_demand_model_perfomance.html')

## Regression Model Profile

In [94]:
bike_regression_performance_profile = Profile(sections=[RegressionPerformanceProfileSection])

In [95]:
bike_regression_performance_profile.calculate(ref_data, prod_data, column_mapping=column_mapping)

In [96]:
regression_profile = bike_regression_performance_profile.json() 

In [97]:
regression_profile

'{"regression_performance": {"name": "regression_performance", "datetime": "2021-11-19 15:08:40.424806", "data": {"utility_columns": {"date": null, "id": null, "target": "cnt", "prediction": "prediction", "drift_conf_level": 0.95, "drift_features_share": 0.5, "nbinsx": null, "xbins": null}, "cat_feature_names": ["season", "holiday", "workingday", "weathersit"], "num_feature_names": ["temp", "atemp", "hum", "windspeed", "weekday"], "target_names": null, "metrics": {"reference": {"mean_error": 3.885499999999998, "mean_abs_error": 136.9645, "mean_abs_perc_error": 8.978901053926855, "error_std": 196.46870323969944, "abs_error_std": 140.35049023424034, "abs_perc_error_std": 15.150141883587574, "error_normality": {"order_statistic_medians_x": [-2.526542275665766, -2.197894402213753, -2.0086641993623844, -1.8721280960359694, -1.7635663945500484, -1.672523510822877, -1.5935482122864373, -1.5234210952712135, -1.4600748144725424, -1.40209915455854, -1.3484871031580363, -1.2984932585000837, -1.25