# Part(3): Поиск аномалий


## 0. Utils

In [None]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt

plt.rcParams["figure.figsize"] = (23, 9)

import warnings

warnings.filterwarnings("ignore")

# For adtk
import matplotlib.style as mplstyle

if "seaborn-v0_8-whitegrid" in mplstyle.library:
    mplstyle.library["seaborn-whitegrid"] = mplstyle.library["seaborn-v0_8-whitegrid"]

np.random.seed(42)

In [None]:
from etna.datasets import TSDataset
from etna.analysis import plot_anomalies as plot_anomalies_etna
from adtk.visualization import plot
from adtk.metrics import recall, precision, f1_score

In [None]:
def convert_to_etna(df):
    df_etna = df_etna = pd.melt(
        pd.DataFrame(df).reset_index(),
        id_vars="timestamp",
        value_vars=[f"segment_{i}" for i in range(1, 8)],
        var_name="segment",
        value_name="target",
    )
    return df_etna


def anomaly_dict_to_mask(df, anomaly_dict):
    df_mask = df.copy()
    df_mask[:] = 0
    for segment, indexes in anomaly_dict.items():
        df_mask.loc[indexes, segment] = 1
    return df_mask


def eval_etna(df, anomaly_dict):
    anomalies_mask = anomaly_dict_to_mask(df, anomaly_dict)

    plot_anomalies_etna(ts, anomaly_dict)
    metrics, metrics_agg = eval_metrics(df_anomaly, anomalies_mask, metrics_list=[precision, recall, f1_score])
    print(metrics_agg)
    return metrics, metrics_agg, anomalies_mask


def eval_metrics(y_true, y_pred, metrics_list):
    metrics = {}
    metrics_agg = {}
    for metric in metrics_list:
        name = metric.__name__
        metrics[name] = metric(y_true, y_pred)
        metrics_agg[name] = pd.Series(metric(y_true, y_pred)).mean()

    return metrics, metrics_agg


def plot_anomalies(series, detector):
    try:
        anomalies = detector.fit_detect(series)
    except:
        anomalies = detector.detect(series)

    metrics, metrics_agg = eval_metrics(df_anomaly, anomalies, metrics_list=[precision, recall, f1_score])
    plot(
        series,
        anomaly=anomalies,
        ts_linewidth=1,
        ts_markersize=3,
        anomaly_markersize=5,
        anomaly_color="red",
        anomaly_tag="marker",
    )
    print(metrics_agg)
    return metrics, metrics_agg, anomalies

## 1. Даныне

Будем использовать данные с семинара

In [None]:
df = pd.read_csv("data/data.csv")
df["timestamp"] = pd.to_datetime(df["timestamp"])
df = df.set_index("timestamp")

df_anomaly = pd.read_csv("data/anomaly.csv")
df_anomaly["timestamp"] = pd.to_datetime(df_anomaly["timestamp"])
df_anomaly = df_anomaly.set_index("timestamp")

df_etna = convert_to_etna(df)
ts = TSDataset(df=df_etna, freq="D")
ts.plot()

## 2. Дефолтный вариант(с семинара)

In [None]:
from etna.analysis import get_anomalies_isolation_forest

anomaly_dict = get_anomalies_isolation_forest(ts)
metrics_if, metrics_agg_if, anomalies_if = eval_etna(df, anomaly_dict)

## 3. Убираем тренд

Попробуем убрать тренд из рядов

In [None]:
from etna.transforms import LinearTrendTransform

In [None]:
trend_transform = ...  # <your code here>
ts.fit_transform([trend_transform])

In [None]:
anomaly_dict = get_anomalies_isolation_forest(ts, features_to_use=["target"])
ts.inverse_transform([trend_transform])

metrics_if_det, metrics_agg_if_det, anomalies_if_det = eval_etna(df, anomaly_dict)

## 4. Добавляем признаки

На первом семинаре мы научились доставать компоненты из модели HotlWinters -- попробуем использовать их в качестве признаков

**Замечание:** в идеале нужно разбить датасет на несоклько по периоду сезонности чтобы оценить сезонную компоненту более корректно, однако сейчас мы на это забьем

In [None]:
from etna.models import HoltWintersModel
from etna.transforms import ModelDecomposeTransform

In [None]:
model_decompose = ...  # <your code here>
ts.fit_transform([model_decompose])

In [None]:
ts.head()

In [None]:
features = ...  # <your code here>
anomaly_dict = ...  # <your code here>
metrics_if_det, metrics_agg_if_det, anomalies_if_det = eval_etna(df, anomaly_dict)