In [1]:
from driftdb.alerting import run_new_data_evaluator, DetectOutlierHandlerFactory, run_drift_evaluator
from dotenv import load_dotenv
from github import Github
import os

load_dotenv("../../.env")
gh_token = os.getenv("GH_TOKEN")
gh_client = Github(gh_token)
repo = os.getenv("REPO") or "gh_org/repo"
commit_sha = "221dd8f4473ee14eb65958923cb74f2ff8d6463e"
new_data_handler = DetectOutlierHandlerFactory(numerical_cols=["age"], categorical_cols=[])

# drift = run_new_data_evaluator(gh_client=gh_client, repo_name=repo, commit_sha=commit_sha, new_data_handler=new_data_handler)
# drift.message

In [5]:
from driftdb.alerting import DriftEvaluatorContext, DriftEvaluation
import pandas as pd

def compute_drift_evaluation(data_drift_context: DriftEvaluatorContext)-> DriftEvaluation:
    new_dataframe =  data_drift_context.after
    old_dataframe =  data_drift_context.before
    print("summary", data_drift_context.summary)
    if data_drift_context.summary == None:
        return DriftEvaluation(should_alert= False, message="No summary")
    additions = data_drift_context.summary["added_rows"]
    deletions = data_drift_context.summary["deleted_rows"]
    diff = data_drift_context.summary["modified_patterns"]
    diff_column = diff.columns[0][0]
    diff = diff.apply(pd.to_numeric)  # Convert the values in the diff dataframe to numeric values
    
    print("computing the diff")
    monthly_booking_drift = diff[(diff_column, "other")] - diff[(diff_column, "self")]
    print("monthly_booking_drift", monthly_booking_drift)
    if(monthly_booking_drift > 1000).any():
        return DriftEvaluation(should_alert=True, message= "The monthly booking drift is too high")
    return DriftEvaluation(should_alert= False, message= "Small drift")




drift = run_drift_evaluator(gh_client=gh_client, repo_name=repo, commit_sha=commit_sha, drift_handler=compute_drift_evaluation)
print("large_drift", drift)

summary {'added_rows':                     name        date age
unique_key                              
2023-03-Clement  Clement  2023-03-01  45
2023-03-Cyril      Cyril  2023-03-01  45
2023-03-Victor    Victor  2023-03-01  46, 'deleted_rows': Empty DataFrame
Columns: [name, date, age]
Index: [], 'modified_rows_unique_keys': Index([], dtype='string', name='unique_key'), 'modified_patterns': Empty DataFrame
Columns: []
Index: []}


IndexError: index 0 is out of bounds for axis 0 with size 0

In [4]:
from driftdb.alerting import TresholdDriftHandlerFactory

treshold_handler = TresholdDriftHandlerFactory(treshold=0.01, numerical_cols=["age"])

drift = run_drift_evaluator(gh_client=gh_client, repo_name=repo, commit_sha=commit_sha, drift_handler=treshold_handler)
drift.message

"Found 2 outliers\n |    | unique_keys         | column   |   old_value |   new_value |           pattern_id |\n|---:|:--------------------|:---------|------------:|------------:|---------------------:|\n|  0 | ['2023-01-Charlie'] | age      |          35 |          36 |  6067053025090607954 |\n|  1 | ['2023-02-Philipe'] | age      |          40 |          42 | -1500446248922248707 |"