In [None]:
import pandas as pd

from evidently.pipeline.column_mapping import ColumnMapping
from evidently.report import Report
from evidently.metric_preset import DataDriftPreset

import mlflow
import mlflow.sklearn
from mlflow.tracking import MlflowClient

In [None]:
df = pd.read_csv('bike.csv')
df

In [None]:
df = df.drop(columns=['instant','dteday','yr','mnth','registered','cnt'])
df

In [None]:
#set column mapping for Evidently Profile
data_columns = ColumnMapping()
data_columns.numerical_features = ['temp', 'atemp', 'hum', 'windspeed', 'casual']
data_columns.categorical_features = ['season', 'holiday', 'weekday', 'workingday', 'weathersit']

In [None]:
#evaluate data drift with Evidently Profile
def eval_drift(reference, production, column_mapping):
    """
    Returns a list with pairs (feature_name, drift_score)
    Drift Score depends on the selected statistical test or distance and the threshold
    """    
    data_drift_report = Report(metrics=[DataDriftPreset()])
    data_drift_report.run(reference_data=reference, current_data=production, column_mapping=column_mapping)
    report = data_drift_report.as_dict()

    drifts = []

    for feature in column_mapping.numerical_features + column_mapping.categorical_features:
        drifts.append((feature, report["metrics"][1]["result"]["drift_by_columns"][feature]["drift_score"]))

    return drifts

In [None]:
#set reference
reference = df.iloc[60:]

#set experiment
experiment_batches = [
    (60,90),
    (90,120),
    (120,150),
    (150,180),
]

In [None]:
#log into MLflow
client = MlflowClient()

#set experiment
mlflow.set_experiment('Data Drift Evaluation with Evidently')

#start new run
for exp in experiment_batches:
    with mlflow.start_run() as run: #inside brackets run_name='test'
        
        # Log parameters
        mlflow.log_param("begin", exp[0])
        mlflow.log_param("end", exp[1])

        # Log metrics
        metrics = eval_drift(reference, 
                             df.iloc[exp[0]:exp[1]], 
                             column_mapping=data_columns)
        for feature in metrics:
            mlflow.log_metric(feature[0], round(feature[1], 3))

        print(run.info)

In [None]:
data_drift_report = Report(metrics=[
    DataDriftPreset(),
])

data_drift_report.run(current_data=df.iloc[60:90], reference_data=df.iloc[60:], column_mapping=data_columns)
data_drift_report.show(mode='inline')

In [None]:
data_drift_report = Report(metrics=[
    DataDriftPreset(),
])

data_drift_report.run(current_data=df.iloc[365:395], reference_data=df.iloc[60:], column_mapping=data_columns)
data_drift_report.show(mode='inline')