In [None]:
import os

import pandas as pd
import numpy as np

from dotenv import load_dotenv
from nixtlats import TimeGPT
from IPython.display import Markdown

currDir = os.getcwd()
os.chdir("../data")
from dataUtils import generateQuery, movePlusRename, dataPrep  # noqa: E402

os.chdir("../usage")
from usageUtils import getUsage  # noqa: E402

os.chdir(currDir)

load_dotenv()

In [None]:
# Create a Kusto query to get sample data set from Azure Data Explorer

# TODO: add tenantId param

tenantId = ""
daysAgo = 180

generateQuery(tenantId=tenantId, daysAgo=daysAgo)

NOTE: Manually pasting the query (copied to clipboard using pyperclip) into ADE, running, and exporting result to csv.

In [None]:
# Input params for sample billing data and run data prep function. The function movePlusRename moves and renames the ADE query export file to this project directory and returns the new file name.

# TODO: add new name of ADE query export file

df = "df"
filename = movePlusRename(name="")
timeCol = "TimePeriod"
dropCols = ["Tenant"]

bd180 = dataPrep(df=df, filename=filename, timeCol=timeCol, dropCols=dropCols)

display(bd180.head())

In [None]:
timegpt = TimeGPT()

In [None]:
# Get current API token usage data

getUsage()

In [None]:
# Run anomaly detection model, plot results compared with original data (default prediction interval level is 99)

bd180Dtct = timegpt.detect_anomalies(
    bd180, time_col="TimePeriod", target_col="ProratedQuantity", freq="D"
)

bd180Anms = bd180Dtct[bd180Dtct["anomaly"] == 1]

bd180Plt = timegpt.plot(
    bd180, bd180Dtct, time_col="TimePeriod", target_col="ProratedQuantity"
)

display(
    Markdown("### Anomalies Detection Result (Default 99% Prediction Interval Level)"),
    bd180Dtct,
    Markdown("### Detected Anomalies (Default 99% Prediction Interval Level)"),
    bd180Anms,
    Markdown("### Results Comparison Plot (Default 99% Prediction Interval Level)"),
    bd180Plt,
)

In [None]:
# Get historical prediction intervals to test against detected anomalies

bd180Fcst = timegpt.forecast(
    bd180, h=1, time_col="TimePeriod", target_col="ProratedQuantity", freq="D", add_history=True, level=[99, 90, 99.99],
)

bd180Fcst = bd180Fcst.merge(bd180)

for lv in [99, 90, 99.99]:
    bd180Fcst[f"anomaly_{lv}"] = (
        bd180Fcst["ProratedQuantity"] > bd180Fcst[f"TimeGPT-hi-{lv}"]
    ) | (
        bd180Fcst["ProratedQuantity"] < bd180Fcst[f"TimeGPT-lo-{lv}"]
    )

    bd180Fcst[f"anomaly_{lv}"] = bd180Fcst[f"anomaly_{lv}"].astype(np.int64)

In [None]:
# Test detected anomalies against historical prediction intervals defalut level 99

pd.testing.assert_series_equal(
    bd180Fcst["anomaly_99"],
    bd180Dtct.merge(bd180)["anomaly"],
    check_index=False,
    check_names=False,
)

In [None]:
# Run anomaly detection model, plot results compared with original data (prediction interval level 90 should identify more anomalies)

bd180Dtct90 = timegpt.detect_anomalies(
    bd180, time_col="TimePeriod", target_col="ProratedQuantity", freq="D", level=90
)

bd180Anms90 = bd180Dtct90[bd180Dtct90["anomaly"] == 1]

bd180Plt90 = timegpt.plot(
    bd180, bd180Dtct90, time_col="TimePeriod", target_col="ProratedQuantity"
)

display(
    Markdown("### Anomalies Detection Result (90% Prediction Interval Level)"),
    bd180Dtct90,
    Markdown("### Detected Anomalies (90% Prediction Interval Level)"),
    bd180Anms90,
    Markdown("### Results Comparison Plot (90% Prediction Interval Level)"),
    bd180Plt90,
)

In [None]:
# Test detected anomalies against historical prediction interval level 90

pd.testing.assert_series_equal(
    bd180Fcst["anomaly_90"],
    bd180Dtct90.merge(bd180)["anomaly"],
    check_index=False,
    check_names=False,
)

In [None]:
# Run anomaly detection model, plot results compared with original data (prediction interval level 99.99 should identify fewer anomalies)

bd180Dtct99_99 = timegpt.detect_anomalies(
    bd180, time_col="TimePeriod", target_col="ProratedQuantity", freq="D", level=99.99,
)

bd180Anms99_99 = bd180Dtct99_99[bd180Dtct99_99["anomaly"] == 1]

bd180Plt99_99 = timegpt.plot(
    bd180, bd180Dtct99_99, time_col="TimePeriod", target_col="ProratedQuantity"
)

display(
    Markdown("### Anomalies Detection Result (99.99% Prediction Interval Level)"),
    bd180Dtct99_99,
    Markdown("### Detected Anomalies (99.99% Prediction Interval Level)"),
    bd180Anms99_99,
    Markdown("### Results Comparison Plot (99.99% Prediction Interval Level)"),
    bd180Plt99_99,
)

In [None]:
# Test detected anomalies against historical prediction interval level 99.99

pd.testing.assert_series_equal(
    bd180Fcst["anomaly_99.99"],
    bd180Dtct99_99.merge(bd180)["anomaly"],
    check_index=False,
    check_names=False,
)

In [None]:
# Get updated API token usage data

getUsage(update=True)

In [None]:
# Create a Kusto query to get sample data set from Azure Data Explorer

# TODO: add tenantId param

tenantId = ""
daysAgo = 90

generateQuery(tenantId=tenantId, daysAgo=daysAgo)

NOTE: Manually pasting the query (copied to clipboard using pyperclip) into ADE, running, and exporting result to csv.

In [None]:
# Input params for sample billing data and run data prep function. The function movePlusRename moves and renames the ADE query export file to this project directory and returns the new file name.

# TODO: add new name of ADE query export file

df = "df"
filename = movePlusRename(name="")
timeCol = "TimePeriod"
dropCols = ["Tenant"]

bd90 = dataPrep(df=df, filename=filename, timeCol=timeCol, dropCols=dropCols)

display(bd90.head())

In [None]:
# Run anomaly detection model, plot results compared with original data (default prediction interval level is 99)

bd90Dtct = timegpt.detect_anomalies(
    bd90, time_col="TimePeriod", target_col="ProratedQuantity", freq="D"
)

bd90Anms = bd90Dtct[bd90Dtct["anomaly"] == 1]

bd90Plt = timegpt.plot(
    bd90, bd90Dtct, time_col="TimePeriod", target_col="ProratedQuantity"
)

display(
    Markdown("### Anomalies Detection Result (Default 99% Prediction Interval Level)"),
    bd90Dtct,
    Markdown("### Detected Anomalies (Default 99% Prediction Interval Level)"),
    bd90Anms,
    Markdown("### Results Comparison Plot (Default 99% Prediction Interval Level)"),
    bd90Plt,
)

In [None]:
# Get historical prediction intervals to test against detected anomalies

bd90Fcst = timegpt.forecast(
    bd90, h=1, time_col="TimePeriod", target_col="ProratedQuantity", freq="D", add_history=True, level=[99, 90, 99.99],
)

bd90Fcst = bd90Fcst.merge(bd90)

for lv in [99, 90, 99.99]:
    bd90Fcst[f"anomaly_{lv}"] = (
        bd90Fcst["ProratedQuantity"] > bd90Fcst[f"TimeGPT-hi-{lv}"]
    ) | (
        bd90Fcst["ProratedQuantity"] < bd90Fcst[f"TimeGPT-lo-{lv}"]
    )

    bd90Fcst[f"anomaly_{lv}"] = bd90Fcst[f"anomaly_{lv}"].astype(np.int64)

In [None]:
# Test detected anomalies against historical prediction intervals defalut level 99

pd.testing.assert_series_equal(
    bd90Fcst["anomaly_99"],
    bd90Dtct.merge(bd90)["anomaly"],
    check_index=False,
    check_names=False,
)

In [None]:
# Run anomaly detection model, plot results compared with original data (prediction interval level 90 should identify more anomalies)

bd90Dtct90 = timegpt.detect_anomalies(
    bd90, time_col="TimePeriod", target_col="ProratedQuantity", freq="D", level=90
)

bd90Anms90 = bd90Dtct90[bd90Dtct90["anomaly"] == 1]

bd90Plt90 = timegpt.plot(
    bd90, bd90Dtct90, time_col="TimePeriod", target_col="ProratedQuantity"
)

display(
    Markdown("### Anomalies Detection Result (90% Prediction Interval Level)"),
    bd90Dtct90,
    Markdown("### Detected Anomalies (90% Prediction Interval Level)"),
    bd90Anms90,
    Markdown("### Results Comparison Plot (90% Prediction Interval Level)"),
    bd90Plt90,
)

In [None]:
# Test detected anomalies against historical prediction interval level 90

pd.testing.assert_series_equal(
    bd90Fcst["anomaly_90"],
    bd90Dtct90.merge(bd90)["anomaly"],
    check_index=False,
    check_names=False,
)

In [None]:
# Run anomaly detection model, plot results compared with original data (prediction interval level 99.99 should identify fewer anomalies)

bd90Dtct99_99 = timegpt.detect_anomalies(
    bd90, time_col="TimePeriod", target_col="ProratedQuantity", freq="D", level=99.99,
)

bd90Anms99_99 = bd90Dtct99_99[bd90Dtct99_99["anomaly"] == 1]

bd90Plt99_99 = timegpt.plot(
    bd90, bd90Dtct99_99, time_col="TimePeriod", target_col="ProratedQuantity"
)

display(
    Markdown("### Anomalies Detection Result (99.99% Prediction Interval Level)"),
    bd90Dtct99_99,
    Markdown("### Detected Anomalies (99.99% Prediction Interval Level)"),
    bd90Anms99_99,
    Markdown("### Results Comparison Plot (99.99% Prediction Interval Level)"),
    bd90Plt99_99,
)

In [None]:
# Test detected anomalies against historical prediction interval level 99.99

pd.testing.assert_series_equal(
    bd90Fcst["anomaly_99.99"],
    bd90Dtct99_99.merge(bd90)["anomaly"],
    check_index=False,
    check_names=False,
)