# 5_evidently_experiment

A notebook to test the library evidently

In [9]:
import pandas as pd
import pickle

from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

from evidently.dashboard import Dashboard
from evidently.tabs import DataDriftTab
from evidently.tabs import RegressionPerformanceTab
from evidently.profile_sections import RegressionPerformanceProfileSection
from evidently.model_profile import Profile

import wandb

columns_weather = [ 't2m_min_bordeaux',
       't2m_bordeaux', 't2m_max_bordeaux', 'prectot_bordeaux', 't2m_min_lille',
       't2m_lille', 't2m_max_lille', 'prectot_lille', 't2m_min_paris',
       't2m_paris', 't2m_max_paris', 'prectot_paris', 't2m_min_rennes',
       't2m_rennes', 't2m_max_rennes', 'prectot_rennes', 't2m_min_nantes',
       't2m_nantes', 't2m_max_nantes', 'prectot_nantes', 't2m_min_toulouse',
       't2m_toulouse', 't2m_max_toulouse', 'prectot_toulouse',
       't2m_min_marseille', 't2m_marseille', 't2m_max_marseille',
       'prectot_marseille', 't2m_min_lyon', 't2m_lyon', 't2m_max_lyon',
       'prectot_lyon', 't2m_min_nice', 't2m_nice', 't2m_max_nice',
       'prectot_nice', 't2m_min_strasbourg', 't2m_strasbourg',
       't2m_max_strasbourg', 'prectot_strasbourg', 't2m_min_montpellier',
       't2m_montpellier', 't2m_max_montpellier', 'prectot_montpellier',
       'weighted_t2m', 'weighted_t2m_min', 'weighted_t2m_max',
       'weighted_prectot']

columns_features = ['weekday', 'month', 'week_number'] + columns_weather

In [6]:
dfp_train = pd.read_csv('./data/rtu/model_train_data.csv')
dfp_train['date'] = pd.to_datetime(dfp_train['date'])
dfp_train.sort_values(['date'], inplace=True)

dfp_test = pd.read_csv('./data/rtu/model_test_data.csv')
dfp_test['date'] = pd.to_datetime(dfp_test['date'])
dfp_test.sort_values(['date'], inplace=True)

dfp_2020 = pd.read_csv('./data/rtu/2020_data.csv')
dfp_2020['date'] = pd.to_datetime(dfp_2020['date'])

In [7]:
# Keep it simple only date and consumption
column_target = 'daily_electrical_consumption'
X_train, y_train = dfp_train[columns_features], dfp_train[column_target]

X_test, y_test = dfp_test[columns_features], dfp_test[column_target]
X_2020 = dfp_2020[columns_features]

In [10]:
# Load the best model
run = wandb.init(project='french_electrical_consumption', entity='jmdaignan')
model_at = run.use_artifact('best_model_hyperopt:latest')
model_dir = model_at.download()
#model_dir = './data'
with open(model_dir + '/model.pkl', 'rb') as file:
    model = pickle.load(file)
predictions = model.predict(X_test)
rmse = mean_squared_error(y_test, predictions, squared=False)
rmse

[34m[1mwandb[0m: wandb version 0.10.32 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


450678.70573988283

In [11]:
# Get the data report
data_drift_report = Dashboard(tabs=[DataDriftTab])
data_drift_report.calculate(dfp_train[columns_features], dfp_2020[columns_features], column_mapping = None)
data_drift_report.save("evidently_reports/data_report.html")

In [12]:
# Prepare some data
dfp_2020['prediction'] = model.predict(X_2020)
dfp_test['prediction'] = model.predict(X_test)
dfp_train['prediction'] = model.predict(X_train)
column_mapping = {}

column_mapping['target'] = column_target
column_mapping['prediction'] = 'prediction'
column_mapping['datetime'] = 'date'

column_mapping['numerical_features'] = columns_weather
column_mapping['categorical_features'] = ['weekday', 'month', 'week_number']

In [13]:
# Get the model report
regression_model_performance = Dashboard(tabs=[RegressionPerformanceTab]) 
regression_model_performance.calculate(dfp_train, dfp_2020, column_mapping = column_mapping) 
regression_model_performance.save("evidently_reports/model_report.html")

In [14]:
regression_performance_profile = Profile(sections=[RegressionPerformanceProfileSection])

In [15]:
regression_performance_profile.calculate(dfp_2020, dfp_test, column_mapping = column_mapping) 