# Read generated daily profile

In [None]:
import pandas as pd
daily_profile_df = pd.read_parquet("addemo23/daily_profile.parquet")

# Define anomaly detectors of interest

In [None]:
from ad_demo import MSTLDetector
detectors = [
    MSTLDetector("distribution/mean", [7], 99.9)
]
daily_profile_df.tail(5)

# Extract Predictions and Features profiles

In [None]:
from ad_demo import extract_profiles
preds_df, features_df = extract_profiles(daily_profile_df)

In [None]:
preds_df.tail(5)

In [None]:
# we look at the daily mean values of all the features
features_df.tail(5)

# Generate Forecasts 

In [None]:
# lets examine one detector
d = detectors[0]
forecast_df = d.score(preds_df)
forecast_df.tail(5)

# Find anomalies

In [None]:
import datetime
from typing import Dict, List

# lets generate forcasts for all detectors and find anomalies
anomalies: Dict[str, List[datetime.datetime]] = {}
for detector in detectors:
    anomalies[detector.name] = detector.find_anomalies(detector.score(preds_df))
anomalies

# Find anomalies with Fugue

In [None]:
from fugue import transform
from ad_demo import fugue_find_anomalies

res = transform(
    daily_profile_df,
    fugue_find_anomalies,
    params={
        "detectors": detectors
    },
    schema="model_name:str, anomalies:[str]",
    partition={
        "by": ["model_name"],
        "presort": "date",
    },
    engine="dask",
    engine_conf={"fugue.dask.default.partitions": 5}
)

In [None]:
res.head(npartitions=5)

# View prediction anomalies

In [None]:
from IPython.display import display, HTML
from ad_demo import plot_series_graph

display(HTML(f'<h1>Prediction</h1>'))
fig = plot_series_graph(preds_df["distribution/mean"], anomalies)
fig.show()

# Feature importance w.r.t prediction change

In [None]:
from IPython.display import display, HTML
from ad_demo import plot_feature_drift_stack, get_feature_drift_stack

display(HTML(f'<h1>Feature drift - stacked importance</h1>'))
features_df["_pred"] = preds_df["distribution/mean"]
importance = get_feature_drift_stack(features_df)
fig = plot_feature_drift_stack(importance)
fig.show()

# Feature values

In [None]:
#show all features in drift importance rank order
for feature in reversed(importance.name.values):
    series = features_df[feature]
    display(HTML(f'<h3>{feature}</h1>'))
    fig = plot_series_graph(series, anomalies)
    fig.show()