# Regression Performance Dashboard for Bike Sharing Dataset

In [1]:
import pandas as pd
import requests
import zipfile
import io

from sklearn.ensemble import RandomForestRegressor

from evidently import ColumnMapping
from evidently.dashboard import Dashboard
from evidently.tabs import RegressionPerformanceTab, DataDriftTab

from evidently.model_profile import Profile
from evidently.profile_sections import RegressionPerformanceProfileSection

In [2]:
import warnings
warnings.filterwarnings('ignore')
warnings.simplefilter('ignore')

## Bike Sharing Demand Data

More information about the dataset can be found in UCI machine learning repository: https://archive.ics.uci.edu/ml/datasets/bike+sharing+dataset

Acknowledgement: Fanaee-T, Hadi, and Gama, Joao, 'Event labeling combining ensemble detectors and background knowledge', Progress in Artificial Intelligence (2013): pp. 1-15, Springer Berlin Heidelberg

In [None]:
content = requests.get("https://archive.ics.uci.edu/ml/machine-learning-databases/00275/Bike-Sharing-Dataset.zip").content
with zipfile.ZipFile(io.BytesIO(content)) as arc:
    raw_data = pd.read_csv(arc.open("day.csv"), header=0, sep=',', parse_dates=['dteday'], index_col='dteday')

In [None]:
ref_data = raw_data.loc['2011-01-01 00:00:00':'2011-04-30 23:00:00']
prod_data = raw_data.loc['2011-05-01 00:00:00':'2011-05-30 23:00:00']

In [None]:
ref_data

## Regression Model

### Model training

In [None]:
target = 'cnt'
prediction = 'prediction'
datetime = 'dteday'

numerical_features = ['temp', 'atemp', 'hum', 'windspeed']
categorical_features = ['season', 'holiday', 'workingday', 'weathersit']

features = numerical_features + categorical_features

In [None]:
model = RandomForestRegressor(random_state = 0)

In [None]:
model.fit(ref_data[features], ref_data[target])

In [None]:
ref_data['prediction']  = model.predict(ref_data[features])
prod_data['prediction'] = model.predict(prod_data[features])

## Regression Perfomance Report 

## verbose_level
Usage:
- verbose_level=0 for the short reports
- verbose_level=1 for the full reports

In [None]:
column_mapping = ColumnMapping(target,
                               'prediction',
                               numerical_features=numerical_features,
                               categorical_features=categorical_features)

In [None]:
dashboard = Dashboard(tabs=[RegressionPerformanceTab(verbose_level=0)])

In [None]:
dashboard.calculate(ref_data, prod_data, column_mapping=column_mapping)

In [None]:
dashboard.show()

## Options

**Notes**: 
You can choose among the following DataDriftOptions
* confidence: float = 0.95
* drift_share: float = 0.5
* nbinsx: Optional[Dict[str, int]] = None
* xbins: Optional[Dict[str, int]] = None
* stattest_func: Optional[Callable] = None
* feature_stattest_func: Optional[Dict[str, Callable]] = None
* cat_target_stattest_func: Optional[Callable] = None
* num_target_stattest_func: Optional[Callable] = None

For parametres that take a function, you need to define a function that takes two pd.Series (reference and current data) and returns a number (p_value)

**Usage**:
- Dashboard(tabs=[DataDriftTab(), NumTargetDriftTab()], options=[options])

More Options are comming soon!

In [None]:
from evidently.options import DataDriftOptions

In [None]:
opt = DataDriftOptions(nbinsx={'temp':20, 'atemp': 20, 'hum':20}, confidence=0.99)

In [None]:
dashboard = Dashboard(tabs=[RegressionPerformanceTab(verbose_level=0), DataDriftTab()], options=[opt])
dashboard.calculate(ref_data, prod_data, column_mapping=column_mapping)
dashboard.show()

## include_widgets
Notes: 
- include_widgets overwrites the verbose_level parameter
- in order to quickly see a list of avaliable widgets run the list_widgets() method of the tab object

Usage:
- include_widgets=["Regression Model Performance Report.", "Reference: Predicted vs Actual"]

In [None]:
RegressionPerformanceTab.list_widgets()

In [None]:
dashboard = Dashboard(tabs=[RegressionPerformanceTab(include_widgets=[
    "Regression Model Performance Report.",
    "Reference: Error Distribution",
    "Current: Error Distribution",
])])

In [None]:
dashboard.calculate(ref_data, prod_data, column_mapping=column_mapping)

In [None]:
dashboard.show()

## Custom Widget

### Draft a Custom Widget with Plotly

In [None]:
import plotly.express as px

In [None]:
figure = px.line(ref_data, x=ref_data.index, y='cnt', title='Time Series with Rangeslider')
figure.update_xaxes(rangeslider_visible=True)
figure.show()

### Create a Class for the Custom Widget

In [None]:
import json
from typing import Optional

#from evidently.analyzers.regression_performance_analyzer import RegressionPerformanceAnalyzer
from evidently.model.widget import BaseWidgetInfo #, AlertStats
from evidently.widgets.widget import Widget, RED, GREY

In [None]:
class TSRangesliderWidget(Widget):
    
    def __init__(self, title: str, column: str, use_current=False, fig_size=1):
        self.title = title
        self.column = column
        self.use_current = use_current
        self.fig_size = fig_size

    def calculate(self,
                  reference_data: pd.DataFrame,
                  current_data: pd.DataFrame,
                  column_mapping,
                  analyzers_results) -> Optional[BaseWidgetInfo]:
        
        if self.use_current == True:
            frame = current_data
        else: frame = reference_data

        figure = px.line(frame, x=frame.index, y=self.column, color_discrete_sequence=[RED, GREY])
        figure.update_xaxes(rangeslider_visible=True)

        figure_json = json.loads(figure.to_json())

        return BaseWidgetInfo(
            title=self.title,
            type="big_graph",
            size=self.fig_size,
            params={
                "data": figure_json['data'],
                "layout": figure_json['layout']
            },
        )

In [None]:
dashboard = Dashboard(tabs=[RegressionPerformanceTab(include_widgets=[
    "Regression Model Performance Report.",
    TSRangesliderWidget('Reference: Target with Rangeslider', 'cnt', fig_size=2),
    "Reference: Error Distribution",
    "Current: Error Distribution",
])])

In [None]:
dashboard.calculate(ref_data, prod_data, column_mapping=column_mapping)

In [None]:
dashboard.show()

## Custom Tab

In [None]:
from evidently.tabs.base_tab import Tab, Verbose

class TimeSeriesTab(Tab):
    widgets = [
        (TSRangesliderWidget('Reference: Target with Rangeslider', 'cnt', fig_size=2), Verbose.ALWAYS),
        (TSRangesliderWidget('Current: Target with Rangeslider', 'cnt', use_current=True, fig_size=2), Verbose.ALWAYS),
        (TSRangesliderWidget('Reference: Temp with Rangeslider', 'temp', fig_size=1), Verbose.FULL),
        (TSRangesliderWidget('Reference: Temp with Rangeslider', 'temp', use_current=True, fig_size=1), Verbose.FULL),
    ]

In [None]:
dashboard = Dashboard(tabs=[TimeSeriesTab()])

In [None]:
dashboard.calculate(ref_data, prod_data, column_mapping=column_mapping)
dashboard.show()

In [None]:
#dashboard.save('bike_sharing_demand_model_perfomance.html')