In [None]:
import numpy as np
import os
import pandas as pd
import matplotlib.pyplot as plt
from fedot_ind.core.operation.filtration.quantile_filtration import quantile_filter
from fedot_ind.core.architecture.pipelines.anomaly_detection import AnomalyDetectionPipelines

Download and filter data

In [None]:
selected_col = [
    'Accelerometer1RMS',
    'Accelerometer2RMS',
    'Current',
    'Pressure',
    'Temperature',
    'Thermocouple',
    # 'Voltage',
    'Volume Flow RateRMS'
    # ,'anomaly', 'changepoint'
]
def download_skab_data():
    all_files = []
    for root, dirs, files in os.walk(r".\SKAB"):
        for file in files:
            if file.endswith(".csv"):
                all_files.append(os.path.join(root, file))
    # datasets with anomalies loading
    list_of_df = [pd.read_csv(file,
                              sep=';',
                              index_col='datetime',
                              parse_dates=True) for file in all_files if 'anomaly-free' not in file]
    # anomaly-free df loading
    anomaly_free_df = pd.read_csv([file for file in all_files if 'anomaly-free' in file][0],
                                  sep=';',
                                  index_col='datetime',
                                  parse_dates=True)
    return list_of_df,anomaly_free_df

def filter_data(data):
    mask = ((data['Accelerometer1RMS'] > 0) &
            (data['Accelerometer2RMS'] > 0) &
            (data['Volume Flow RateRMS'] > 0) &
            (data['Current'] < 100) &
            (data['Voltage'] > 100)
            )
    return data[mask]


def get_target(list_of_df, type: str = 'anomaly'):
    anomaly_list = []
    for i in list_of_df:
        i = i[type].values.flatten()
        anomaly_list.append(i)
    return anomaly_list

def smooth_data(list_of_df, anomaly_free_df, window_size = 10):
    filter_data_list = []
    for i in list_of_df:
        i = i.rolling(window=window_size).mean()
        i = i.bfill()
        i = i[selected_col]
        filter_data_list.append(filter_data(i))
    anomaly_free_df = filter_data(anomaly_free_df)
    anomaly_free_df = anomaly_free_df.rolling(window=window_size).mean()
    anomaly_free_df = anomaly_free_df.bfill()
    anomaly_free_df = anomaly_free_df[selected_col]
    return filter_data_list,anomaly_free_df

Prepare data for experiment

In [None]:
list_of_df, anomaly_free_df = download_skab_data()
test_features, train_features = smooth_data(list_of_df, anomaly_free_df)
test_target = get_target(list_of_df)
train_features = train_features.iloc[:1200, :].T.values
train, test = (train_features, None), (test_features, test_target)
window_length = 20

Initialize Fedot and Industrial models hyperparams

In [None]:
feature_hyperparams = {
    'window_mode': True,
    'window_size': 10
}
fedot_model_hyperparams = {
    'problem': 'regression',
    'seed': 42,
    'metric': 'rmse',
    'timeout': 10,
    'max_depth': 6,
    'max_arity': 3,
    'cv_folds': 3,
    'logging_level': 20,
    'n_jobs': 4
}
dict_result = {}

Initilize Industrial anomaly detection pipelines

In [None]:
detection_model = AnomalyDetectionPipelines(train_data=train, test_data=test)

Experiment with Singular Transformation pipeline

In [None]:
sst_model_hyperparams = {
    'n_components': 3,
    'window_length': 60,
    'trajectory_window_length': 15,
    'dynamic_mode': True,
    'delay_lag': 60
}
model_sst, prediction_sst = detection_model('SST')(model_hyperparams=sst_model_hyperparams,
                                                   feature_hyperparams=feature_hyperparams,
                                                   window_length=window_length,
                                                   mode='multits')

Experiment with Fedot Unscented Kalman Filter

In [None]:
model_kalman, prediction_kalman = detection_model('Kalman')(model_hyperparams=fedot_model_hyperparams,
                                                    feature_hyperparams=feature_hyperparams,
                                                    window_length=window_length)

Experiment with Fedot Functional PCA Filter

In [None]:
pca_model_hyperparams = {
    'n_components': 2,
    'regularization': None,
    'basis_function': None
}

model_functional, prediction_functional = detection_model('FunctionalPCA')(component=[[1], [1], [1]],
                                           model_hyperparams=pca_model_hyperparams,
                                           feature_hyperparams=feature_hyperparams)

In [None]:
for i in range(len(prediction_functional)):
    recover_func = prediction_functional[i][0].T
    recover_kalman = np.concatenate(prediction_kalman[i][0], axis=1)
    test_features = test[0][i]
    test_target = test[1][i]
    fig, ax = plt.subplots(2, 1)
    ax[0, 0].plot(pd.DataFrame(test_features - recover_func).T.values)
    ax[1, 0].plot(pd.DataFrame(recover_kalman).T.values)
    ax.vlines(0.5, 0, 3)
    ax.vlines(0.5, 0, 3)
    plt.show()

    pd.DataFrame(test_features - recover_func).T.plot()
    plt.show()
    pd.DataFrame(recover_kalman).T.plot()
    plt.show()
    outlier_idx = quantile_filter(input_data=test_features, predicted_data=recover_func)
_ = 1

