In [1]:
%cd /notebooks/anomaly-detector

/notebooks/anomaly-detector


In [69]:
import os

try:
    from src.utils import utils
    from src.utils import params
    from src.utils import thresholds as th
    from src.data import columns
    from src.data import preprocessing  
    
    from pyod.models.lof import LOF
    from pyod.models.iforest import IForest
    from pyod.models.ecod import ECOD
    from pyod.models.lunar import LUNAR
    from pyod.models.knn import KNN
    
except ModuleNotFoundError:
    print("installing requirements..")
    os.system('pip install -r requirements.txt')
    from src.utils import utils
    from src.utils import params
    from src.utils import thresholds as th
    from src.data import columns
    from src.data import preprocessing
    from src.visualization import plotter
    
    from pyod.models.lof import LOF
    from pyod.models.iforest import IForest
    from pyod.models.ecod import ECOD
    from pyod.models.knn import KNN

In [3]:
PREPROCESSING_PARAMS, TRAINING_PARAMS, INTERVALS_PARAMS, TH_ALGORITHM, df_path = params.get_params()

In [4]:
scaler = PREPROCESSING_PARAMS['normalization']
columns_name = PREPROCESSING_PARAMS['metrics']

In [10]:
PREPROCESSING_PARAMS

{'downsamplig_rate': 5,
 'window_size': 12,
 'normalization': 'all',
 'metrics': 'columns_6'}

In [5]:
df = preprocessing.get_df(df_path, columns_name=columns_name)

dataframe shape: (78503, 19)


In [6]:
db_time_train, db_time_test = preprocessing.get_db_time(df, PREPROCESSING_PARAMS, INTERVALS_PARAMS=INTERVALS_PARAMS)

In [8]:
df_train, df_test, _, _, train_timestamps, test_timestamps = preprocessing.data_preprocessing(
                                                                                            PREPROCESSING_PARAMS, df, 
                                                                                            INTERVALS_PARAMS=INTERVALS_PARAMS, 
                                                                                            scaler=scaler
                                                                                    )

normalizing data using Z-Score
normalizing data using MinMax Scaler
normalizing data using Z-Score
normalizing data using MinMax Scaler


In [16]:
db_time = df_test[18]

In [22]:
# normalization NOT needed
clf = LOF(contamination=0.001)
clf.fit(db_time.values.reshape(-1,1), y=None)
y_pred = clf.decision_scores_
print(clf.threshold_)

1.469465344637773


In [34]:
# labels = th.get_labels('iqr', y_pred)  # best methods: iqr, filter, mtt, clf, clust
labels = clf.labels_

plotter.plot_res_db_time(y_pred, db_time_test, timestamps=test_timestamps, save_static=False)
plotter.plot_labels(y_pred, labels, timestamps=test_timestamps, save_static=False)

In [44]:
# normalization NOT needed
clf = IForest(contamination=0.001)
clf.fit(db_time.values.reshape(-1,1), y=None)
y_pred = clf.decision_scores_
print(clf.threshold_)

1.5034089424770247e-15


In [47]:
# labels = th.get_labels('clust', y_pred)  # best methods: iqr, filter, mtt, clf, clust
labels = clf.labels_

plotter.plot_res_db_time(y_pred, db_time_test, timestamps=test_timestamps, save_static=False)
plotter.plot_labels(y_pred, labels, timestamps=test_timestamps, save_static=False)

In [48]:
clf = KNN(contamination=0.001)
clf.fit(db_time.values.reshape(-1,1), y=None)
y_pred = clf.decision_scores_
print(clf.threshold_)

0.05035950175044947


In [52]:
labels = th.get_labels('clust', y_pred)  # best methods: iqr, filter, mtt, clf, clust
# labels = clf.labels_

plotter.plot_res_db_time(y_pred, db_time_test, timestamps=test_timestamps, save_static=False)
plotter.plot_labels(y_pred, labels, timestamps=test_timestamps, save_static=False)

In [53]:
clf = ECOD(contamination=0.001)
clf.fit(db_time.values.reshape(-1,1), y=None)
y_pred = clf.decision_scores_
print(clf.threshold_)

7.326136612356848


In [58]:
# labels = th.get_labels('filter', y_pred)  # best methods: iqr, filter, mtt, clf, clust
labels = clf.labels_

plotter.plot_res_db_time(y_pred, db_time_test, timestamps=test_timestamps, save_static=False)
plotter.plot_labels(y_pred, labels, timestamps=test_timestamps, save_static=False)