In [1]:
import sys
from driftdb.connectors.github_connector import GithubConnector
from dotenv import load_dotenv
from github import Github
import os

load_dotenv("../.env")

gh_token = os.getenv("GH_TOKEN")
if gh_token is None:
    print("GitHub token not found! Create a .env file a the root with a GH_TOKEN variable.")
    exit(1)
github_client = Github(gh_token, timeout=60)
repo_name = os.getenv("REPO") or "gh_org/repo"
github_connector = GithubConnector(github_client=github_client, github_repository_name=repo_name, assignees=["Sammy"])


In [2]:
import sys
sys.path.append('..')
import importlib
import driftdb.drift_evaluator.drift_evaluators
importlib.reload(driftdb.drift_evaluator.drift_evaluators)

import driftdb.connectors.github_connector
importlib.reload(driftdb.connectors.github_connector)
from driftdb.connectors.workflow import snapshot_table
import pandas as pd

snapshot_table(connector=github_connector, table_name="croute")

## Test with file already existing and splitting new data and historical data

def formatDF(dict):
    df = pd.DataFrame(dict)
    df['unique_key'] = df.apply(lambda row: row['date'] + '-' + row['name'], axis=1)
    column_order = ['unique_key'] + [col for col in df.columns if col != 'unique_key']
    df = df.reindex(columns=column_order)
    return df


table_name = "path/to/metric_name_15.csv"


# Store metric for the first time
dataMonth1 = {"name": ["Alice", "Bob", "Charlie"], "date": ["2022-12","2023-01","2023-01"], "age": [25, 30, 35]}
github_connector.snapshot_table(
    table_dataframe= formatDF(dataMonth1),
    table_name= table_name,
    dr

)

driftdb.connectors.github_connector - INFO - Table found. Updating it
driftdb.connectors.github_connector - INFO - Change detected
driftdb.connectors.github_connector - INFO - Update: DRIFT
driftdb.connectors.github_connector - INFO - https://github.com/Samox/data-history/commit/31b800d2c8dd7ddcdfe6e40e968825a32224c2c4


In [None]:
# ## Introduce new data for 2023-02
dataMonth2 = {"name": ["Alice", "Bob", "Charlie", "Didier", "Philipe", "Antoine"], "date": ["2022-12","2023-01","2023-01","2023-02","2023-02","2023-02"], "age": [25, 30, 35, 40, 40, 40]}

github_connector.snapshot_table(
    connector=github_connector,
    table_dataframe= formatDF(dataMonth2),
    table_name= table_name,
)

driftdb.connectors.workflow - INFO - Table found. Updating it
driftdb.connectors.workflow - INFO - Change detected
driftdb.connectors.github_connector - INFO - Update: NEW DATA
driftdb.connectors.github_connector - INFO - Update: NEW DATA
driftdb.connectors.github_connector - INFO - https://github.com/Samox/data-history/commit/a64820edac1e30625954f6896e0708f8d8ada350
driftdb.connectors.github_connector - INFO - https://github.com/Samox/data-history/commit/a64820edac1e30625954f6896e0708f8d8ada350


In [None]:
# ## Introduce new data for 2023-03 and a drift on 2020-02
dataMonth3 = {"name": ["Alice", "Bob", "Charlie", "Didier", "Philipe", "Antoine", "Clement", "Cyril", "Victor"], "date": ["2022-12","2023-01","2023-01","2023-02","2023-02","2023-02","2023-03","2023-03","2023-03"], "age": [25, 30, 36, 40, 42, 40, 45, 45, 46]}

github_connector.snapshot_table(
    connector=github_connector,
    table_dataframe= formatDF(dataMonth3),
    table_name= table_name,
)

driftdb.connectors.workflow - INFO - Table found. Updating it
driftdb.connectors.workflow - INFO - Change detected
driftdb.connectors.github_connector - INFO - Update: NEW DATA
driftdb.connectors.github_connector - INFO - Update: NEW DATA
driftdb.connectors.github_connector - INFO - https://github.com/Samox/data-history/commit/9aebfe5ff13730a3ed331aa73dadbdd3649ad28f
driftdb.connectors.github_connector - INFO - https://github.com/Samox/data-history/commit/9aebfe5ff13730a3ed331aa73dadbdd3649ad28f
driftdb.connectors.github_connector - INFO - Update: DRIFT
driftdb.connectors.github_connector - INFO - Update: DRIFT
driftdb.connectors.github_connector - INFO - https://github.com/Samox/data-history/commit/13c2ab1d1e4fb86a0d9f339c5ca9324b8696e4d0
driftdb.connectors.github_connector - INFO - https://github.com/Samox/data-history/commit/13c2ab1d1e4fb86a0d9f339c5ca9324b8696e4d0


In [None]:
# ## No new data. Adds a drift for Philipe, and remove all other drifts
from driftdb.drift_evaluator.drift_evaluators import AlertDriftEvaluator


dataMonth3And1Day = {"name": ["Alice", "Bob", "Charlie", "Didier", "Philipe", "Antoine", "Clement", "Cyril", "Victor"], "date": ["2022-12","2023-01","2023-01","2023-02","2023-02","2023-02","2023-03","2023-03","2023-03"], "age": [25, 30, 35, 40, 42, 40, 45, 45, 46]}
github_connector.snapshot_table(
    connector=github_connector,
    table_dataframe= formatDF(dataMonth3And1Day),
    table_name= table_name,
    drift_evaluator=AlertDriftEvaluator()
)

driftdb.connectors.workflow - INFO - Table found. Updating it
driftdb.connectors.workflow - INFO - Change detected
driftdb.connectors.github_connector - INFO - Update: DRIFT
driftdb.connectors.github_connector - INFO - Update: DRIFT
driftdb.connectors.github_connector - INFO - Branch drift/2023-11-20-12-02-31/path-to-metric-name-15-csv doesn't exist. Creating it...
driftdb.connectors.github_connector - INFO - Branch drift/2023-11-20-12-02-31/path-to-metric-name-15-csv doesn't exist. Creating it...
driftdb.connectors.github_connector - INFO - Checkout branch: drift/2023-11-20-12-02-31/path-to-metric-name-15-csv from branch: main
driftdb.connectors.github_connector - INFO - Checkout branch: drift/2023-11-20-12-02-31/path-to-metric-name-15-csv from branch: main
driftdb.connectors.github_connector - INFO - https://github.com/Samox/data-history/commit/ff0cec0073d6b8a25f4cf41985d05788fb2128e1
driftdb.connectors.github_connector - INFO - https://github.com/Samox/data-history/commit/ff0cec0073

In [None]:
# ## No new data. Adds a drift for Philipe, and remove all other drifts
dataMonth3And2Day = {"name": ["Alice", "Alixe", "Bob", "Charlie", "Didier", "Philipe", "Antoine", "Clement", "Cyril", "Victor"], "date": ["2022-12","2022-12","2023-01","2023-01","2023-02","2023-02","2023-02","2023-03","2023-03","2023-03"], "age": [25, 25, 30, 35, 40, 42, 40, 45, 45, 46]}

github_connector.snapshot_table(
    connector=github_connector,
    table_dataframe= formatDF(dataMonth3And2Day),
    table_name= table_name,
    drift_evaluator=AlertDriftEvaluator()
)


driftdb.connectors.workflow - INFO - Table found. Updating it
driftdb.connectors.workflow - INFO - Change detected
driftdb.connectors.github_connector - INFO - Update: DRIFT
driftdb.connectors.github_connector - INFO - Update: DRIFT
driftdb.connectors.github_connector - INFO - Branch drift/2023-11-20-12-02-39/path-to-metric-name-15-csv doesn't exist. Creating it...
driftdb.connectors.github_connector - INFO - Branch drift/2023-11-20-12-02-39/path-to-metric-name-15-csv doesn't exist. Creating it...
driftdb.connectors.github_connector - INFO - Checkout branch: drift/2023-11-20-12-02-39/path-to-metric-name-15-csv from branch: main
driftdb.connectors.github_connector - INFO - Checkout branch: drift/2023-11-20-12-02-39/path-to-metric-name-15-csv from branch: main
driftdb.connectors.github_connector - INFO - https://github.com/Samox/data-history/commit/5e4004dbd8286a1e90b32e86d18b327248f73669
driftdb.connectors.github_connector - INFO - https://github.com/Samox/data-history/commit/5e4004dbd8