# Get started with Metrics Tracking and Monitoring

This notebook demonstrates how to use Evidently to:
* Generate a model performance report and calculate associated metrics.
* Log model metrics to MLFlow.
* Store the model in MLFlow as an artifact.
* Store the model performance report in MLFlow as an artifact.

In [None]:
%load_ext autoreload
%autoreload 2

import joblib
import mlflow
import mlflow.sklearn
from mlflow.tracking import MlflowClient
import pandas as pd

from evidently.metric_preset import RegressionPreset
from evidently.pipeline.column_mapping import ColumnMapping
from evidently.report import Report
from pathlib import Path
from sklearn import ensemble, model_selection

from config import MLFLOW_TRACKING_URI, DATA_DIR, FILENAME, REPORTS_DIR

In [None]:
import warnings
warnings.filterwarnings('ignore')
warnings.simplefilter('ignore')

## Load Data

More information about the dataset can be found in UCI machine learning repository: https://archive.ics.uci.edu/ml/datasets/bike+sharing+dataset

Acknowledgement: Fanaee-T, Hadi, and Gama, Joao, 'Event labeling combining ensemble detectors and background knowledge', Progress in Artificial Intelligence (2013): pp. 1-15, Springer Berlin Heidelberg

In [None]:
# Download original dataset with: python src/pipelines/load_data.py 

raw_data = pd.read_csv(f"../{DATA_DIR}/{FILENAME}")
raw_data.head()

## Define column mapping

In [None]:
target = 'cnt'
prediction = 'prediction'
numerical_features = ['temp', 'atemp', 'hum', 'windspeed', 'mnth', 'hr', 'weekday']
categorical_features = ['season', 'holiday', 'workingday', ]

In [None]:
column_mapping = ColumnMapping()

column_mapping.target = 'target'
column_mapping.prediction = 'prediction'
column_mapping.numerical_features = numerical_features
column_mapping.categorical_features = categorical_features

## Train a Model

In [None]:
sample_data = raw_data.set_index('dteday').loc['2011-01-01 00:00:00':'2011-01-28 23:00:00'].reset_index()

print(sample_data.shape)

In [None]:
X_train, X_test, y_train, y_test = model_selection.train_test_split(
    sample_data[numerical_features + categorical_features],
    sample_data[target],
    test_size=0.3
)

In [None]:
regressor = ensemble.RandomForestRegressor(random_state = 0, n_estimators = 50)
regressor.fit(X_train, y_train) 

model_path = Path('../models/model.joblib')
joblib.dump(regressor, model_path)

## Build the model validation report

In [None]:
# Calculate predictions 
preds_train = regressor.predict(X_train)
preds_test = regressor.predict(X_test)

In [None]:
X_train['target'] = y_train
X_train['prediction'] = preds_train

X_test['target'] = y_test
X_test['prediction'] = preds_test

In [None]:
X_train.head()

In [None]:
regression_performance_report = Report(metrics=[
    RegressionPreset(),
])

regression_performance_report.run(
    reference_data=X_train, 
    current_data=X_test,
    column_mapping=column_mapping)

regression_performance_report

In [None]:
# Extract Model Train Metrics from the report

train_report_metrics = regression_performance_report.as_dict()
me = train_report_metrics['metrics'][0]['result']['current']['mean_error']
mae = train_report_metrics['metrics'][0]['result']['current']["mean_abs_error"]

print(me, mae)

## Log Model Training and Validation Metrics

### Set up MLFlow

In [None]:
# Set up MLFlow Client
mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)
client = MlflowClient()

print(f"Client tracking uri: {client.tracking_uri}")

# Set experiment name
mlflow.set_experiment('Train Model')

In [None]:
with mlflow.start_run() as run: 

    # Show newly created run metadata info
    print("Experiment id: {}".format(run.info.experiment_id))
    print("Run id: {}".format(run.info.run_id))
    print("Run name: {}".format(run.info.run_name))
    print('MLFlow tracking uri:', mlflow.get_tracking_uri())
    print('MLFlow artifact uri:', mlflow.get_artifact_uri())
    run_id = run.info.run_id

    # Log metrics
    mlflow.log_metric('me', round(me, 3))
    mlflow.log_metric('mae', round(mae, 3))
    
    # Log model 
    mlflow.log_artifact(model_path)

## Log Model Validation reports to MLFlow 

In [None]:
report_path = f"../{REPORTS_DIR}/train_report.html"
regression_performance_report.save_html(report_path)
    
with mlflow.start_run(run_id=run_id):
    
    # Show the run metadata info
    print("Experiment id: {}".format(run.info.experiment_id))
    print("Run id: {}".format(run.info.run_id))
    print("Run name: {}".format(run.info.run_name))
    
    # Log the regression_performance_report as an artifact
    mlflow.log_artifact(report_path)