In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn
# print(plt.style.available)

# служебные функции
from adtk.data import validate_series
from adtk.visualization import plot
# Статистические методы детектирования точечных аномалий
from adtk.detector import ThresholdAD
from adtk.detector import QuantileAD
from adtk.detector import InterQuartileRangeAD
from adtk.detector import GeneralizedESDTestAD
# Статистические методы детектирования групповых аномалий
from adtk.detector import PersistAD
from adtk.detector import LevelShiftAD
from adtk.detector import VolatilityShiftAD
# методы на основе декомпозиции временного ряда и авторегрессии
from adtk.detector import SeasonalAD
from adtk.detector import AutoregressionAD
# Методы на основе кластеризации - неконтролируемое обучение
from adtk.detector import MinClusterDetector
from sklearn.cluster import KMeans
# Методы на основе плотности
from adtk.detector import OutlierDetector
from sklearn.neighbors import LocalOutlierFactor
# Методы на основе регрессии - контролируемое обучение
from adtk.detector import RegressionAD
from sklearn.linear_model import LinearRegression
# Методы на основе понижения размерности
from adtk.detector import PcaAD
# кастомизация
from adtk.detector import CustomizedDetectorHD


In [None]:
def detect_ThresholdAD(df, high, low):
    s = validate_series(df)
    threshold_ad = ThresholdAD(high=high, low=low)
    return threshold_ad.detect(s)

In [None]:
def detect_QuantileAD(df, high, low):
    s = validate_series(df)
    quantile_ad = QuantileAD(high=high, low=low)
    return quantile_ad.fit_detect(s)

In [None]:
def detect_InterQuartileRangeAD(df, c):
    s = validate_series(df)
    iqr_ad = InterQuartileRangeAD(c=c)
    return iqr_ad.fit_detect(s)

In [None]:
def detect_SeasonalAD(df, c=3.0, side="both"):
    s = validate_series(df)
    seasonal_ad = SeasonalAD(c=c, side=side)
    return seasonal_ad.fit_detect(s)

In [None]:
def detect_AutoregressionAD(df, n_steps=7*2, step_size=24, c=3.0):
    s = validate_series(df)
    autoregression_ad = AutoregressionAD(n_steps=n_steps, step_size=step_size, c=c)
    return autoregression_ad.fit_detect(s)

In [None]:
def detect_MinClusterDetector(df, n_clusters=3):
    s = validate_series(df)
    min_cluster_detector = MinClusterDetector(KMeans(n_clusters=n_clusters))
    return min_cluster_detector.fit_detect(s)


In [None]:
def detect_OutlierDetector(df, contamination=0.05):
    s = validate_series(df)
    outlier_detector = OutlierDetector(LocalOutlierFactor(contamination=contamination))
    return outlier_detector.fit_detect(s)

In [None]:
def detect_RegressionAD(df, target='Close', c=3.0):
    s = validate_series(df)
    regression_ad = RegressionAD(regressor=LinearRegression(), target=target, c=c)
    return regression_ad.fit_detect(s)

### Testing

In [None]:
# import os
# import import_ipynb
# from file_loader import loading_v3

# dir = "crypto_data"
# symbol = 'BTC-USD'
# df = pd.DataFrame()
# try:
#     f = os.path.join(dir, symbol+'.csv')
#     if os.path.isfile(f):
#         df = loading_v3(f)
#     df = df.dropna()
#     df.set_index('Date', inplace=True)
# except Exception as e:
#     print(f"Error loading file {symbol}.csv: {e}")

# close_df = df['Close'].copy()
# close_df

In [None]:

# anomalies = detect_InterQuartileRangeAD(close_df, 1.5)
# anomalies = detect_ThresholdAD(close_df, 30, 15)
# anomalies = detect_InterQuartileRangeAD(close_df, 1.5)
# anomalies = detect_AutoregressionAD(close_df, 7*2, 6, 3.0)

# anomalies = detect_SeasonalAD(df['Volume'], 3.0, 'both')
# plot(df['Volume'], anomaly=anomalies, ts_markersize=1, anomaly_color='red', anomaly_tag="marker", anomaly_markersize=2);

# anomalies = detect_MinClusterDetector(df, 3) # must be pandas Dataframe
# anomalies = detect_OutlierDetector(df, 0.05) # ERROR
# anomalies = detect_RegressionAD(df, 'Volume', 3.0) # ERROR
# plot(df, anomaly=anomalies, ts_linewidth=1, ts_markersize=3, anomaly_color='red', anomaly_alpha=0.3, curve_group='all');

# print(anomalies.value_counts())
# plot(close_df, anomaly=anomalies, ts_linewidth=1, ts_markersize=3, anomaly_markersize=5, ts_color='g', anomaly_color='red', anomaly_tag="marker");
