# Debugging with Evidently Test Suites and Reports

In [10]:
import datetime
import pandas as pd

from evidently import ColumnMapping
from evidently.report import Report
from evidently.metric_preset import DataDriftPreset

from evidently.test_suite import TestSuite
from evidently.test_preset import DataDriftTestPreset

from joblib import dump, load

from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error

## Load data and model

In [2]:
ref_data = pd.read_parquet('data/reference.parquet')

In [3]:
current_data = pd.read_parquet('data/green_tripdata_2022-02.parquet')

In [12]:
with open('models/lin_reg.bin', 'rb') as f_in:
    model = load(f_in)

In [13]:
num_features = ['passenger_count', 'trip_distance', 'fare_amount', 'total_amount']
cat_features = ['PULocationID', 'DOLocationID']

## Generate Test Suite and Report

In [6]:
problematic_data = current_data.loc[(current_data.lpep_pickup_datetime >= datetime.datetime(2022,2,20,0,0)) & 
        (current_data.lpep_pickup_datetime < datetime.datetime(2022,2,21,0,0))]

In [19]:
problematic_data['prediction'] = model.predict(problematic_data[num_features + cat_features].fillna(0))

In [26]:
column_mapping = ColumnMapping(
    prediction='prediction',
    numerical_features=num_features,
    categorical_features=cat_features,
    target=None
)

In [29]:
testsuite = TestSuite(tests=[
    DataDriftTestPreset()
])

In [30]:
testsuite.run(reference_data=ref_data, 
           current_data=problematic_data, 
           column_mapping=column_mapping)

testsuite.show()

In [24]:
report = Report(metrics=[
    DataDriftPreset()
])

In [27]:
report.run(reference_data=ref_data, 
           current_data=problematic_data, 
           column_mapping=column_mapping)

report.show()