In [1]:
%matplotlib inline
from helper import *
import plotter as pltr

In [2]:
# Hyper-parameters
window_size = 50
step_size = int(0.1 * window_size)
dataset = 'SMD'

## Step 1. Load Dataset

In [3]:
if dataset == 'SMD':
    GROUP = 1
    ENTITY = 2
    SMD_BASE_PATH = 'Dataset/SMD'

    X_train = load_data('%s/train/machine-%d-%d.txt' % (SMD_BASE_PATH, GROUP, ENTITY), header=False)
    X_train.columns = ['m%d' % i for i in range(X_train.shape[1])]
    X_train.index = pd.date_range('2021/03/02', '2021/03/21', periods=X_train.shape[0])
    X_train.index.name = 'timestamp'

    X_test = load_data('%s/test/machine-%d-%d.txt' % (SMD_BASE_PATH, GROUP, ENTITY), header=False)
    X_test.columns = ['m%d' % i for i in range(X_test.shape[1])]
    X_test.index = pd.date_range('2021/03/21', '2021/4/8', periods=X_test.shape[0])
    X_test.index.name = 'timestamp'

    y_true = pd.read_csv('Dataset/SMD/test_label/machine-%d-%d.txt' % (GROUP, ENTITY), header=None)
    y_true.columns = ['label']
    y_true.index = X_test.index

In [4]:
print('Train Shape:', X_train.shape)
print('Test Shape:', X_test.shape)

In [5]:
# plot the first dimension
pltr.plot_data(pd.concat([X_train, X_test], axis=0), 'm0')

In [6]:
# from sklearn.preprocessing import StandardScaler
# scaler = StandardScaler()
# X_train = scaler.fit_transform(X_train)

In [7]:
X_train = X_train.to_numpy()

In [8]:
X_train.shape

(23694, 37)

## Step 2. Anomaly Detection

In [9]:
from collections import Counter
import matplotlib.pyplot as plt
import numpy as np
import pymc3 as pm
import seaborn as sns
import theano
import theano.tensor as tt
import arviz as az

pm.set_tt_rng(42)
np.random.seed(42)

In [10]:
az.plot_kde(X_train[:, 0], rug=True)
plt.yticks([0], alpha=0)
plt.show()

In [11]:
with pm.Model() as model:
    pm.Normal('N', mu=0, sigma=1, shape=(X_train.shape[1], ))
    inference = pm.NFVI("scale-hh*%d-loc" % X_train.shape[1], jitter=0.01, observed=X_train)

In [12]:
inference.fit(
    n=10000,
    obj_optimizer=pm.adam(learning_rate=0.01),
    obj_n_mc=100,
    callbacks=[pm.callbacks.CheckParametersConvergence()],
)

In [13]:
traceNF = inference.approx.sample(1000)
dftrace = pm.trace_to_dataframe(traceNF)

In [14]:
with model:
    traceNF = pm.sample(1000, return_inferencedata=True)

In [None]:
az.plot_trace(traceNF)

In [17]:
sns.jointplot(dftrace.iloc[:, 0],  dftrace[:, 1], kind="kde") # plot this

In [None]:
dftrace

In [None]:
with model:
    ppc = pm.sample_posterior_predictive(traceNF, var_names=['N'])

In [None]:
ppc['N']