# Data Quality Dashboard for Bike Sharing Dataset

In [None]:
import pandas as pd
import requests
import zipfile
import io

from sklearn.ensemble import RandomForestRegressor

from evidently import ColumnMapping
from evidently.dashboard import Dashboard
from evidently.dashboard.tabs import DataQualityTab

from evidently.model_profile import Profile
from evidently.model_profile.sections import DataQualityProfileSection

## Bike Sharing Demand Data

More information about the dataset can be found in UCI machine learning repository: https://archive.ics.uci.edu/ml/datasets/bike+sharing+dataset

Acknowledgement: Fanaee-T, Hadi, and Gama, Joao, 'Event labeling combining ensemble detectors and background knowledge', Progress in Artificial Intelligence (2013): pp. 1-15, Springer Berlin Heidelberg

In [None]:
content = requests.get("https://archive.ics.uci.edu/ml/machine-learning-databases/00275/Bike-Sharing-Dataset.zip").content
with zipfile.ZipFile(io.BytesIO(content)) as arc:
    raw_data = pd.read_csv(arc.open("day.csv"), header=0, sep=',', parse_dates=['dteday']) #, index_col='dteday')

In [None]:
ref_data = raw_data[:120]
prod_data = raw_data[120:150]

In [None]:
ref_data

## Regression Model

### Model training

In [None]:
target = 'cnt'
prediction = 'prediction'
datetime = 'dteday'

numerical_features = ['temp', 'atemp', 'hum', 'windspeed', 'weekday']
categorical_features = ['season', 'holiday', 'workingday', 'weathersit']

features = numerical_features + categorical_features

In [None]:
model = RandomForestRegressor(random_state = 0)

In [None]:
model.fit(ref_data[features], ref_data[target])

In [None]:
ref_data['prediction']  = model.predict(ref_data[features])
prod_data['prediction'] = model.predict(prod_data[features])

## Data Quality Dashboard

In [None]:
column_mapping = ColumnMapping(target,
                               'prediction',
                               datetime=datetime,
                               task='regression',
                               numerical_features=numerical_features,
                               categorical_features=categorical_features)

In [None]:
dashboard = Dashboard(tabs=[DataQualityTab()])

In [None]:
dashboard.calculate(ref_data, prod_data, column_mapping=column_mapping)

In [None]:
dashboard.show()

In [None]:
#dashboard.save('reports/bike_sharing_demand_data_quality.html')

## Regression Model Profile

In [None]:
profile = Profile(sections=[DataQualityProfileSection()])

In [None]:
profile.calculate(ref_data, prod_data, column_mapping=column_mapping)

In [None]:
profile = profile.json() 

In [None]:
profile