In [2]:
%cd ..

c:\Users\HuyLQ15_CTV\Desktop\model_monitoring


  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]


In [3]:
import pandas as pd
import numpy as np
import requests
import zipfile
import io

from datetime import datetime, time
from sklearn import datasets, ensemble

from evidently import ColumnMapping
from evidently.report import Report
from evidently.metric_preset import DataDriftPreset, TargetDriftPreset, RegressionPreset

In [4]:
import warnings
warnings.filterwarnings("ignore")

In [5]:
content = requests.get("https://archive.ics.uci.edu/static/public/275/bike+sharing+dataset.zip", verify=False).content
with zipfile.ZipFile(io.BytesIO(content)) as arc:
    raw_data = pd.read_csv(arc.open("hour.csv"), header=0, sep=',', parse_dates=['dteday'], index_col='dteday')

In [6]:
from datetime import time, datetime

raw_data.index = raw_data.apply(
    lambda row: datetime.combine(row.name, time(hour=int(row['hr']))), axis = 1)

In [7]:
raw_data.head()

Unnamed: 0,instant,season,yr,mnth,hr,holiday,weekday,workingday,weathersit,temp,atemp,hum,windspeed,casual,registered,cnt
2011-01-01 00:00:00,1,1,0,1,0,0,6,0,1,0.24,0.2879,0.81,0.0,3,13,16
2011-01-01 01:00:00,2,1,0,1,1,0,6,0,1,0.22,0.2727,0.8,0.0,8,32,40
2011-01-01 02:00:00,3,1,0,1,2,0,6,0,1,0.22,0.2727,0.8,0.0,5,27,32
2011-01-01 03:00:00,4,1,0,1,3,0,6,0,1,0.24,0.2879,0.75,0.0,3,10,13
2011-01-01 04:00:00,5,1,0,1,4,0,6,0,1,0.24,0.2879,0.75,0.0,0,1,1


# Regression Quality

* Step 1: 
* Step 2: 
* Step 3:

## Prepare data

In [8]:
target = 'cnt'
prediction = 'prediction'
numerical_features = ['temp', 'atemp', 'hum', 'windspeed', 'hr', 'weekday']
categorical_features = ['season', 'holiday', 'workingday']
column_mapping = ColumnMapping()
column_mapping.target = target
column_mapping.prediction = prediction
column_mapping.numerical_features = numerical_features
column_mapping.categorical_features = categorical_features

In [9]:
reference = raw_data.loc['2011-01-01 00:00:00':'2011-01-28 23:00:00']
current = raw_data.loc['2011-01-29 00:00:00':'2011-02-28 23:00:00']

In [10]:
regressor = ensemble.RandomForestRegressor(random_state = 0, n_estimators = 50)
regressor.fit(reference[numerical_features + categorical_features], reference[target])
ref_prediction = regressor.predict(reference[numerical_features + categorical_features])
current_prediction = regressor.predict(current[numerical_features + categorical_features])
reference['prediction'] = ref_prediction
current['prediction'] = current_prediction

# Regression Performance Default

In [None]:
from evidently.metric_preset import RegressionPreset
from evidently.report import Report

In [26]:
data_quality_report = Report(metrics=[
    RegressionPreset()
])

regression_performance = Report(metrics=[RegressionPreset()], options={"render": {"raw_data": True}})
regression_performance.run(current_data=reference, reference_data=None, column_mapping=column_mapping)
regression_performance.save_json('./reports/Regression Performance.json')

# Regression Performance Custom

In [35]:
from typing import Optional, List, Union, Dict
from evidently.base_metric import MetricResult
from evidently.base_metric import Metric
from evidently.base_metric import InputData

from sklearn.metrics import mean_absolute_error, mean_squared_error, root_mean_squared_error
from sklearn.metrics import mean_squared_log_error, root_mean_squared_log_error
from sklearn.metrics import r2_score

class RegressionResults(MetricResult):
    class Config:
        type_alias = "evidently:metric_result:RegressionResults"
        
    mae: float
    mse: float
    rmse: float
    mean_square_log_error: float
    root_mean_square_log_error: float
    r2: float


def calculate_regression_metrics(data: pd.DataFrame, true_label_col: str, prediction_col: str) -> RegressionResults:
    y_true = data[true_label_col].copy()
    y_pred = data[prediction_col].copy()

    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    rmse = root_mean_squared_error(y_true, y_pred)
    msle = mean_squared_log_error(y_true, y_pred)
    rmsle = root_mean_squared_log_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)
    
    return RegressionResults(
        mae=float(mae),
        mse=float(mse),
        rmse=float(rmse),
        mean_square_log_error=float(msle),
        r2=float(r2),
        root_mean_square_log_error=float(rmsle)
    )

RegressionResults(type='evidently:metric_result:RegressionResults', mae=4.104142394822006, mse=40.476731391585766, rmse=6.36213261348628, mean_square_log_error=0.03299255448524422, root_mean_square_log_error=0.18163852698489993, r2=0.9832837012532686)

In [75]:
class RegressionPerformanceResults(MetricResult):
    class Config:
        type_alias = "evidently:metric_result:RegressionPerformanceResults"

    reference: Optional[RegressionResults]
    current: RegressionResults

class RegressionPerformanceMetrics(Metric[RegressionPerformanceResults]):
    class Config:
        type_alias = "evidently:metric:RegressionPerformanceMetrics"
    _column_map: ColumnMapping

    def __init__(self, column_map: ColumnMapping):
        self._column_map = column_map
        super().__init__()

    def calculate(self, data: InputData) -> RegressionPerformanceResults:
        results = {}
        results['reference'] = None
        if data.reference_data is not None:
            results['reference'] = calculate_regression_metrics(
                data.reference_data, 
                true_label_col=self._column_map.target, 
                prediction_col=self._column_map.prediction)
            
        if data.current_data is None:
            raise ValueError("The value cannot be None")

        results['current'] = calculate_regression_metrics(
                data.current_data, 
                true_label_col=self._column_map.target, 
                prediction_col=self._column_map.prediction)

        return RegressionPerformanceResults(
            reference=results['reference'],
            current=results['current']
        )

In [77]:
from evidently.renderers.base_renderer import MetricRenderer, default_renderer

@default_renderer(wrap_type=RegressionPerformanceMetrics)
class RegressionRenders(MetricRenderer):
    def render_json(self, obj: RegressionPerformanceMetrics, include_render: bool = False,
        include: "IncludeOptions" = None, exclude: "IncludeOptions" = None,) -> dict:
        result = obj.get_result().get_dict(include_render, include, exclude)
        return result

In [78]:
report = Report(metrics=[
    RegressionPerformanceMetrics(column_map=column_mapping)
])

report.run(current_data=current, reference_data=None)
# report.save_json('./reports/Data Quality Custom.json')
report.save_json('./reports/custom/Regression Performance Custom.json')