# Test Suites and Reports for Bicycle Demand Prediction

In [1]:
import pandas as pd
import numpy as np
import requests
import zipfile
import io

from datetime import datetime, time
from sklearn import datasets, ensemble

from evidently import ColumnMapping

from evidently.report import Report
from evidently.metric_preset import DataDriftPreset, RegressionPreset

from evidently.test_suite import TestSuite
from evidently.test_preset import DataDriftTestPreset, RegressionTestPreset
from evidently.tests import TestValueMeanError, TestValueMAE, TestValueRMSE

  @numba.jit()
  @numba.jit()
  @numba.jit()
  @numba.jit()


In [2]:
import warnings
warnings.filterwarnings('ignore')
warnings.simplefilter('ignore')

## Bicycle Demand Data

In [3]:
content = requests.get("https://archive.ics.uci.edu/static/public/275/bike+sharing+dataset.zip").content
with zipfile.ZipFile(io.BytesIO(content)) as arc:
    raw_data = pd.read_csv(arc.open("hour.csv"), header=0, sep=',', parse_dates=['dteday'], index_col='dteday')

In [4]:
raw_data.index = raw_data.apply(
    lambda row: datetime.combine(row.name, time(hour=int(row['hr']))), axis = 1)

In [5]:
raw_data.head()

Unnamed: 0,instant,season,yr,mnth,hr,holiday,weekday,workingday,weathersit,temp,atemp,hum,windspeed,casual,registered,cnt
2011-01-01 00:00:00,1,1,0,1,0,0,6,0,1,0.24,0.2879,0.81,0.0,3,13,16
2011-01-01 01:00:00,2,1,0,1,1,0,6,0,1,0.22,0.2727,0.8,0.0,8,32,40
2011-01-01 02:00:00,3,1,0,1,2,0,6,0,1,0.22,0.2727,0.8,0.0,5,27,32
2011-01-01 03:00:00,4,1,0,1,3,0,6,0,1,0.24,0.2879,0.75,0.0,3,10,13
2011-01-01 04:00:00,5,1,0,1,4,0,6,0,1,0.24,0.2879,0.75,0.0,0,1,1


## Regression Model

### Model training

In [6]:
target = 'cnt'
prediction = 'prediction'
numerical_features = ['temp', 'atemp', 'hum', 'windspeed', 'hr', 'weekday']
categorical_features = ['season', 'holiday', 'workingday']

In [7]:
reference = raw_data.loc['2011-01-01 00:00:00':'2011-01-28 23:00:00']
current = raw_data.loc['2011-01-29 00:00:00':'2011-02-28 23:00:00']

In [8]:
reference.head()

Unnamed: 0,instant,season,yr,mnth,hr,holiday,weekday,workingday,weathersit,temp,atemp,hum,windspeed,casual,registered,cnt
2011-01-01 00:00:00,1,1,0,1,0,0,6,0,1,0.24,0.2879,0.81,0.0,3,13,16
2011-01-01 01:00:00,2,1,0,1,1,0,6,0,1,0.22,0.2727,0.8,0.0,8,32,40
2011-01-01 02:00:00,3,1,0,1,2,0,6,0,1,0.22,0.2727,0.8,0.0,5,27,32
2011-01-01 03:00:00,4,1,0,1,3,0,6,0,1,0.24,0.2879,0.75,0.0,3,10,13
2011-01-01 04:00:00,5,1,0,1,4,0,6,0,1,0.24,0.2879,0.75,0.0,0,1,1


In [9]:
regressor = ensemble.RandomForestRegressor(random_state = 0, n_estimators = 50)

In [10]:
regressor.fit(reference[numerical_features + categorical_features], reference[target])

In [11]:
ref_prediction = regressor.predict(reference[numerical_features + categorical_features])
current_prediction = regressor.predict(current[numerical_features + categorical_features])

In [12]:
reference['prediction'] = ref_prediction
current['prediction'] = current_prediction

In [13]:
reference.head()

Unnamed: 0,instant,season,yr,mnth,hr,holiday,weekday,workingday,weathersit,temp,atemp,hum,windspeed,casual,registered,cnt,prediction
2011-01-01 00:00:00,1,1,0,1,0,0,6,0,1,0.24,0.2879,0.81,0.0,3,13,16,23.24
2011-01-01 01:00:00,2,1,0,1,1,0,6,0,1,0.22,0.2727,0.8,0.0,8,32,40,34.62
2011-01-01 02:00:00,3,1,0,1,2,0,6,0,1,0.22,0.2727,0.8,0.0,5,27,32,26.66
2011-01-01 03:00:00,4,1,0,1,3,0,6,0,1,0.24,0.2879,0.75,0.0,3,10,13,11.66
2011-01-01 04:00:00,5,1,0,1,4,0,6,0,1,0.24,0.2879,0.75,0.0,0,1,1,2.7


### Model Perfomance 

In [14]:
column_mapping = ColumnMapping()

column_mapping.target = target
column_mapping.prediction = prediction
column_mapping.numerical_features = numerical_features
column_mapping.categorical_features = categorical_features

In [15]:
regression_perfomance_report = Report(
    metrics=[RegressionPreset()]
)

regression_perfomance_report.run(reference_data=None, current_data=reference, column_mapping=column_mapping)

In [16]:
regression_perfomance_report.show()

##  Week 1

In [17]:
regression_performance_test = TestSuite(tests=[
    TestValueMeanError(lte=10, gte=-10),
    TestValueMAE(lte=15),
])

regression_performance_test.run(reference_data=None, 
                                current_data=current.loc['2011-01-29 00:00:00':'2011-02-07 23:00:00'],
                                column_mapping=column_mapping)
regression_performance_test

In [18]:
regression_performance_test.json()

'{"version": "0.4.0", "tests": [{"name": "Mean Error (ME)", "description": "The ME is -6.14. The test threshold is gte=-10 and lte=10.", "status": "SUCCESS", "group": "regression", "parameters": {"condition": {"gte": -10, "lte": 10}, "value": -6.141446808510638}}, {"name": "Mean Absolute Error (MAE)", "description": "The MAE is 13.4. The test threshold is lte=15", "status": "SUCCESS", "group": "regression", "parameters": {"condition": {"lte": 15}, "value": 13.381276595744682}}], "summary": {"all_passed": true, "total_tests": 2, "success_tests": 2, "failed_tests": 0, "by_status": {"SUCCESS": 2}}, "timestamp": "2023-07-27 17:17:35.509207"}'

In [19]:
regression_performance_test.save_html('test_suite.html')

In [20]:
regression_performance_test.save_json('test_suite.json')

In [21]:
regression_performance_test.as_dict()

{'tests': [{'name': 'Mean Error (ME)',
   'description': 'The ME is -6.14. The test threshold is gte=-10 and lte=10.',
   'status': 'SUCCESS',
   'group': 'regression',
   'parameters': {'condition': {'gte': -10, 'lte': 10},
    'value': -6.141446808510638}},
  {'name': 'Mean Absolute Error (MAE)',
   'description': 'The MAE is 13.4. The test threshold is lte=15',
   'status': 'SUCCESS',
   'group': 'regression',
   'parameters': {'condition': {'lte': 15}, 'value': 13.381276595744682}}],
 'summary': {'all_passed': True,
  'total_tests': 2,
  'success_tests': 2,
  'failed_tests': 0,
  'by_status': {'SUCCESS': 2}}}

In [22]:
regression_performance_test._save('snapshot.json')

In [23]:
loader_test_suite = TestSuite._load('snapshot.json')

In [24]:
loader_test_suite.show()

## Week 2

In [None]:
regression_performance_test = TestSuite(tests=[
    TestValueMeanError(lte=10, gte=-10),
    TestValueMAE(lte=15),
])

regression_performance_test.run(reference_data=None, 
                                current_data=current.loc['2011-02-07 00:00:00':'2011-02-14 23:00:00'],
                                column_mapping=column_mapping)
regression_performance_test

## Week 3

In [None]:
regression_performance_test = TestSuite(tests=[
    TestValueMeanError(lte=10, gte=-10),
    TestValueMAE(lte=15),
])

regression_performance_test.run(reference_data=None, 
                                current_data=current.loc['2011-02-15 00:00:00':'2011-02-21 23:00:00'],
                                column_mapping=column_mapping)
regression_performance_test

## What has happened?

In [None]:
regression_report = Report(
    metrics=[
        RegressionPreset()
    ]
)

regression_report.run(reference_data=reference, 
                      current_data=current.loc['2011-02-15 00:00:00':'2011-02-21 23:00:00'],
                      column_mapping=column_mapping)
regression_report

In [None]:
drift_report = Report(
    metrics=[
        DataDriftPreset()
    ]
)

drift_report.run(reference_data=reference, 
                                current_data=current.loc['2011-02-15 00:00:00':'2011-02-21 23:00:00'],
                                column_mapping=column_mapping)
drift_report

# Support Evidently
Enjoyed the tutorial? Star Evidently on GitHub to contribute back! This helps us continue creating free open-source tools for the community. https://github.com/evidentlyai/evidently