In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split

import pickle

from evidently.report import Report
from evidently.metric_preset import DataQualityPreset, DataDriftPreset, ClassificationPreset, TargetDriftPreset

In [20]:
import os
from sqlalchemy import create_engine

db_params = {
    'host': os.getenv('db_host'),
    'port': os.getenv('db_port'), 
    'database': os.getenv('db_name'),
    'user': os.getenv('db_username'),
    'password': os.getenv('db_password')
}

engine = create_engine(f'postgresql+psycopg2://{db_params["user"]}:{db_params["password"]}@{db_params["host"]}:{db_params["port"]}/{db_params["database"]}')

ready_df = pd.read_sql_query('SELECT * FROM model_data_w_dummy', engine)
meaningful_df = pd.read_sql_query('SELECT * FROM meaningful_features', engine)

engine.dispose()
reference_data, current_data = train_test_split(ready_df, test_size=0.2, random_state=42)

In [21]:
with open('../model/balanced_rf_model/model.pkl', 'rb') as f:
    model = pickle.load(f)

reference_data['prediction'] = model.predict(reference_data.drop(columns=['FraudFound_P']))
current_data['prediction'] = model.predict(current_data.drop(columns=['FraudFound_P']))

In [22]:
meaningful_reference_data, meaningful_current_data = train_test_split(meaningful_df, test_size=0.2, random_state=42)

meaningful_reference_data['prediction'] = reference_data['prediction']
meaningful_current_data['prediction'] = current_data['prediction']

meaningful_reference_data.rename(columns={'FraudFound_P': 'target'}, inplace=True)
meaningful_current_data.rename(columns={'FraudFound_P': 'target'}, inplace=True)

## Evidently tests

In [31]:
from evidently.test_suite import TestSuite
from evidently.test_preset import NoTargetPerformanceTestPreset
from evidently.test_preset import DataQualityTestPreset
from evidently.test_preset import DataStabilityTestPreset
from evidently.test_preset import DataDriftTestPreset
from evidently.test_preset import BinaryClassificationTestPreset

data_stability = TestSuite(tests=[
    DataStabilityTestPreset(),
    NoTargetPerformanceTestPreset(),
    DataQualityTestPreset(),
    DataDriftTestPreset(),
    BinaryClassificationTestPreset(),
])

data_stability.run(reference_data=meaningful_reference_data, current_data=meaningful_current_data)

data_stability.save_html('data_stability.html')

In [27]:
target_column = 'FraudFound_P'

report = Report(metrics=[
    DataQualityPreset(),
    DataDriftPreset(),
    ClassificationPreset(),
    TargetDriftPreset()
])

report.run(reference_data=meaningful_reference_data, current_data=meaningful_current_data)

report.save_html('evidently_dashboard.html')