# Debugging with Evidently Test Suites and Reports

In [None]:
import datetime
import pandas as pd

from evidently import ColumnMapping
from evidently.report import Report
from evidently.metric_preset import DataDriftPreset

from evidently.test_suite import TestSuite
from evidently.test_preset import DataDriftTestPreset, DataQualityTestPreset

from joblib import dump, load
from optbinning import Scorecard

from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error

In [None]:
t = pd.read_csv("/home/fini/github-projects/mlops/data/heloc_dataset_v1.csv")

In [None]:
tt["RiskPerformance"] = (tt["RiskPerformance"]=="Bad").astype("int8")

In [None]:
r = {}
for i, j in enumerate(tt.rename(columns=str.lower).to_dict(orient="records")):
    r[f"record-{i}"] = j

r

## Load data and model

In [None]:
ref_data = pd.read_parquet('data/reference.parquet')
current_data = pd.read_parquet('data/raw.parquet')

In [None]:
current_data.fillna(0).astype("int16")

In [None]:
model = Scorecard.load('models/scorecard-model.pkl')

In [None]:
# data labeling
TARGET = "RiskPerformance"
num_features = [
            "AverageMInFile",
            "MSinceMostRecentInqexcl7days",
            "PercentTradesNeverDelq",
            "ExternalRiskEstimate",
            "NetFractionRevolvingBurden",
            "NumSatisfactoryTrades",
            "PercentInstallTrades"
      ]
cat_features = []

In [None]:
t = ref_data[num_features]

In [None]:
t.head(1).to_dict(orient="records")[0]

In [None]:
rc = {'AverageMInFile': 76,
 'MSinceMostRecentInqexcl7days': 0,
 'PercentTradesNeverDelq': 100,
 'ExternalRiskEstimate': 69,
 'NetFractionRevolvingBurden': 12,
 'NumSatisfactoryTrades': 36,
 'PercentInstallTrades': 25}

Problematic data we created was marked for the October 1 to 12. We can pretend that we found that date from monitoring.

In [None]:
problematic_data = current_data.loc[
    (current_data["operation_date"] >= datetime.date(2023,10,1)) & 
    (current_data["operation_date"]< datetime.date(2023,10,13))
    ][current_data.columns[::-1]]

In [None]:
problematic_data.head()

## Generate Test Suite and Report

In [None]:
column_mapping = ColumnMapping(
    prediction='prediction',
    numerical_features=num_features,
    categorical_features=cat_features,
    target=None
)

In [None]:
problematic_data['prediction'] = model.score(problematic_data[num_features + cat_features].fillna(0))
ref_data['prediction'] = model.score(ref_data[num_features + cat_features].fillna(0))

In [None]:
test_suite = TestSuite(tests = [DataDriftTestPreset()])
test_suite.run(reference_data=ref_data, current_data=problematic_data, column_mapping=column_mapping)

In [None]:
test_suite.show(mode='inline')

Use report to support analysis. Instead of using the metrics individually we use DataDriftPreset

In [None]:
report = Report(metrics = [DataDriftPreset()])
report.run(reference_data=ref_data, current_data=problematic_data, column_mapping=column_mapping)

In [None]:
report.show(mode='inline')

In [None]:
data_quality_test_suite = TestSuite(tests=[
   DataQualityTestPreset(),
])
 
data_quality_test_suite.run(reference_data=ref_data, current_data=problematic_data, column_mapping=column_mapping)
data_quality_test_suite

Check multiple tests

In [None]:
from evidently.tests import TestColumnDrift, TestShareOfDriftedColumns

Test type and applicability
- z: categorical
- g_test: categorical

In [None]:
test_types = ["jensenshannon", "kl_div", "jensenshannon", "cramer_von_mises", "psi", "anderson", "anderson"]
per_column_stattest = dict(zip(num_features, test_types))

In [None]:
data_drift_dataset_tests = TestSuite(tests=[
    TestShareOfDriftedColumns(per_column_stattest=per_column_stattest),
])

data_drift_dataset_tests.run(reference_data=ref_data, current_data=problematic_data)
data_drift_dataset_tests

In [None]:
# multiple_report = Report(metrics = [DataDriftPreset(), DataQualityTestPreset()])
# TestSuite(tests = [DataDriftTestPreset()])
# multiple_report.run(reference_data=ref_data, current_data=problematic_data, column_mapping=column_mapping)

Another way to set tests

In [None]:

data_drift_dataset_tests = TestSuite(tests=[
    TestShareOfDriftedColumns(num_stattest='psi', cat_stattest='jensenshannon'),
])

data_drift_dataset_tests.run(reference_data=ref_data, current_data=problematic_data)
data_drift_dataset_tests



In [None]:
multiple_test = TestSuite(tests = [DataDriftTestPreset(), DataQualityTestPreset()])
multiple_test.run(reference_data=ref_data, current_data=problematic_data, column_mapping=column_mapping)

In [None]:
multiple_test