In [1]:
%cd /notebooks/anomaly-detector

/notebooks/anomaly-detector


In [2]:
import os

try:
    from src.utils import utils
    from src.utils import params
    from src.utils import thresholds as th
    from src.models import usad
    from src.models import usad_utils
    from src.data import columns
    from src.data import preprocessing
    from src.visualization import plotter
except ModuleNotFoundError:
    print("installing requirements..")
    os.system('pip install -r requirements.txt')
    from src.utils import utils
    from src.utils import params
    from src.utils import thresholds as th
    from src.models import usad
    from src.models import usad_utils
    from src.data import columns
    from src.data import preprocessing
    from src.visualization import plotter

In [3]:
PREPROCESSING_PARAMS, TRAINING_PARAMS, INTERVALS_PARAMS, TH_ALGORITHM, PLOT_PARAMS, df_path = params.get_params()

PREPROCESSING_PARAMS: {'downsamplig_rate': 5, 'window_size': 12, 'normalization': 'all', 'metrics': 'columns_6', 'multi_file': False}
TRAINING_PARAMS: {'batch_size': 32, 'epochs': 3, 'hidden_size': 10, 'alpha': 0.5, 'beta': 0.5}
INTERVALS_PARAMS: {'train_start': 40320, 'train_end': 70560, 'test_start': 0, 'test_end': 30240}
TH_ALGORITHM: iqr
DF_PATH: /notebooks/anomaly-detector/data/raw/V_GV_SYSMETRIC_INCTANCE_2.csv
PLOT_PARAMS: {'history_static': True, 'history_html': False, 'db_time_static': True, 'db_time_html': False, 'labels_static': True, 'labels_html': False, 'thresholds_static': True, 'thresholds_html': True}


In [7]:
class multivariate_ad():
    def __init__(self, PREPROCESSING_PARAMS, TRAINING_PARAMS, INTERVALS_PARAMS, 
                         TH_ALGORITHM, PLOT_PARAMS, df_path):
        
        self.PREPROCESSING_PARAMS = PREPROCESSING_PARAMS
        # self.TRAINING_PARAMS = TRAINING_PARAMS
        self.INTERVALS_PARAMS = INTERVALS_PARAMS
        self.TH_ALGORITHM = TH_ALGORITHM
        self.PLOT_PARAMS
        self.df_path = df_path
        
        self.batch_size = TRAINING_PARAMS['batch_size']
        self.epochs = TRAINING_PARAMS['epochs']
        self.hidden_size = TRAINING_PARAMS['hidden_size']
        self.single_file = PREPROCESSING_PARAMS['multi_file']
        self.scaler = PREPROCESSING_PARAMS['normalization']
        self.columns_name = PREPROCESSING_PARAMS['metrics']
        self.alpha =  TRAINING_PARAMS['alpha']
        self.beta =  TRAINING_PARAMS['beta']
        
        self.df = None
        self.train_df = None
        self.test_df = None

        self.w_size = windows_train.shape[1] * windows_train.shape[2]
        self.z_size = windows_train.shape[1] * hidden_size
        self.device = utils.get_default_device()
        
        self.labels_ = None
        self.decision_scores_ = None
        self.anomalies_intervals_ = None
        
        
    def get_data(self, preprocessing=True):
        if preprocessing: scaler = self.scaler
        else: scaler = None
        
        if self.single_file is True:
            self.df = preprocessing.get_df(self.df_path, columns_name=self.columns_name)
            return preprocessing.data_preprocessing(
                                                    self.PREPROCESSING_PARAMS, self.df, 
                                                    INTERVALS_PARAMS=self.INTERVALS_PARAMS, 
                                                    scaler=scaler, multi=True, single_file=False
                                                )

        else:
            self.train_df = preprocessing.get_df(self.df_train_path, columns_name=self.columns_name)
            self.test_df = preprocessing.get_df(self.df_test_path, columns_name=self.columns_name)
            return preprocessing.data_preprocessing(
                                                    self.PREPROCESSING_PARAMS, df=None, 
                                                    INTERVALS_PARAMS=self.INTERVALS_PARAMS, 
                                                    scaler=scaler, multi=True,
                                                    single_file=True, train_df=self.train_df, test_df=self.test_df
                                                )
        
    def get_db_time(self):
        if self.single_file is True:
            _, db_time = preprocessing.get_db_time(self.df, self.PREPROCESSING_PARAMS,
                                                           INTERVALS_PARAMS=self.INTERVALS_PARAMS, multi=True,
                                                           single_file=True)
        else:
             db_time = preprocessing.get_db_time(self.test_df, self.PREPROCESSING_PARAMS, 
                                                    INTERVALS_PARAMS=self.INTERVALS_PARAMS, multi=False,
                                                    single_file=False)
                
        return db_time
        
    # TODO: add fit and predict
    def fit_predict(self, plot=True):
        df_train, df_test, windows_train, windows_test, train_timestamps, test_timestamps = self.get_data()
        db_time = self.get_db_time()
        _, original_test_data, _, _, _, _ = self.get_data(preprocessing=False)
        train_loader, val_loader, test_loader = preprocessing.get_dataloaders(
                                                                    windows_train, windows_test, 
                                                                    self.batch_size, self.w_size, self.z_size
                                                                )
        model = usad.UsadModel(self.w_size, self.z_size)
        model = utils.to_device(model, aelf.device)
        history = usad_utils.training(self.epochs, model, self.device, train_loader, val_loader)
        usad_utils.save_model(model)
        model = usad_utils.load_checkpoint(model)
        
        results = usad_utils.test_model(model, self.device, test_loader, alpha=self.alpha, beta=self.beta)
        y_pred = usad_utils.get_prediction_score(results)
        labels = th.get_labels(self.TH_ALGORITHM, y_pred) 
        
        anomalies_intervals_df = utils.generate_anomalies_intervals(labels, test_timestamps)
        utils.save_anomalies_intervals(anomalies_intervals_df)
        
        # update public variables
        self.decision_scores_ = y_pred
        self.labels_ = labels
        self.anomalies_intervals_ = anomalies_intervals_df
        
        if plot is True:
            plotter.plot_history(history, save_static=PLOT_PARAMS['history_static'], save_html=PLOT_PARAMS['history_html'])
            plotter.plot_res_db_time(y_pred, db_time, timestamps=test_timestamps, 
                                             save_static=PLOT_PARAMS['db_time_static'], save_html=PLOT_PARAMS['db_time_html'])
            plotter.plot_labels(y_pred, labels, timestamps=test_timestamps, 
                                        save_static=PLOT_PARAMS['labels_static'], save_html=PLOT_PARAMS['labels_html'])
            plotter.plot_thresholds(original_test_data, labels, timestamps=test_timestamps,
                                            save_static=PLOT_PARAMS['thresholds_static'], save_html=PLOT_PARAMS['thresholds_html'])