In [4]:
import pdb

import numpy as np
import pandas as pd

from sklearn.preprocessing import MinMaxScaler

from plotly import graph_objects as go

In [2]:
df = pd.read_csv("./metrics/prometheus-route-aiops-prod-prometheus-predict.cloud.paas.psi.redhat.com/request_duration_seconds_count.csv")
df

Unnamed: 0,ds,y
0,2020-05-01 16:25:54.703000069,31885
1,2020-05-01 16:26:54.703000069,31887
2,2020-05-01 16:27:54.703000069,31889
3,2020-05-01 16:28:54.703000069,31891
4,2020-05-01 16:29:54.703000069,31893
...,...,...
4315,2020-05-04 16:20:54.703000069,40517
4316,2020-05-04 16:21:54.703000069,40519
4317,2020-05-04 16:22:54.703000069,40521
4318,2020-05-04 16:23:54.703000069,40523


## Explore Data Behavior

In [10]:
# visualize data over time
fig = go.Figure()
fig.add_trace(
    go.Scatter(
        x=df['ds'],
        y=df['y'],
        mode='lines+markers',
    )
)
fig.show()

In [11]:
# calculate diffs and visualize (is the rate of change constant)
diffs = df['y'].diff()

fig = go.Figure()
fig.add_trace(
    go.Scatter(
        x=df['ds'],
        y=diffs,
        mode='lines+markers',
    )
)
fig.show()

In [12]:
# does the data really only have one spike?
diffs.value_counts()

2.0    4317
3.0       2
Name: y, dtype: int64

In [37]:
# train a simple heuristic - check if increase in the last 10 min window is greater than 2 std
# note: we'll have to use window size 9 coz for each 10 day window interval, there will only be 9 diffs available
rolling_mean_diffs = df['y'].rolling(9).apply(lambda x: x.diff().mean()).shift(1)
rolling_std_diffs = df['y'].rolling(9).apply(lambda x: x.diff().std()).shift(1)

In [40]:
# diff between current value and previosu 9 day rolling mean
val_prev_rolling_diffs = (diffs - rolling_mean_diffs).abs()

anomaly_score = val_prev_rolling_diffs.copy()

# where the numerator is non zero, divide by std
# this way we'll keep the 0 diffs as 0 sores, instead of 0/0 errors
anomaly_score.mask(
    (val_prev_rolling_diffs != 0) & (rolling_std_diffs != 0),
    anomaly_score / rolling_std_diffs,
    inplace=True,
)
anomaly_score.mask(
    (val_prev_rolling_diffs != 0) & (rolling_std_diffs == 0),
    np.inf,
    inplace=True,
)
anomaly_score

0       NaN
1       NaN
2       NaN
3       NaN
4       NaN
       ... 
4315    0.0
4316    0.0
4317    0.0
4318    0.0
4319    0.0
Name: y, Length: 4320, dtype: float64

In [41]:
fig = go.Figure()
fig.add_trace(
    go.Scatter(
        x=df['ds'],
        y=diffs,
        mode='lines+markers',
    )
)
fig.add_trace(
    go.Scatter(
        x=df['ds'],
        y=anomaly_score,
        mode='lines+markers',
    )
)
fig.show()

In [57]:
df.iloc[2157]

ds    2020-05-03 04:22:54.703000069
y                             36200
Name: 2157, dtype: object

In [58]:
anomaly_score.iloc[2157]

inf

## Existing Methods

In [5]:
# func process data
df = df
number_of_features = 10
train_test_split_ratio = 0.8

np_arr = df.values
scaler = MinMaxScaler(feature_range = (0, 1))
scaled_np_arr = scaler.fit_transform(np_arr[:,1].reshape(-1, 1))
np_arr[:,1] = scaled_np_arr.flatten()

train = np_arr[:int(np_arr.shape[0]*train_test_split_ratio),:]
test = np_arr[int(np_arr.shape[0]*train_test_split_ratio):,:]

In [21]:
# func prepare data
train_x = np.array(train[:, 1])[np.newaxis, :].T
pdb.set_trace()
for i in range(number_of_features):
    train_x = np.concatenate((train_x, np.roll(train[:, 1], -i)[np.newaxis, :].T), axis=1)

train_x = train_x[:train_x.shape[0] - number_of_features, :number_of_features]

train_yt = np.roll(train[:, 1], -number_of_features+1)
train_y = np.roll(train[:, 1], -number_of_features)
train_y = train_y - train_yt
train_y = train_y[:train_y.shape[0] - number_of_features]

train_x = train_x.reshape(train_x.shape[0], 1, train_x.shape[1])

--Return--
None
> [0;32m/tmp/ipykernel_39876/680604988.py[0m(3)[0;36m<module>[0;34m()[0m
[0;32m      1 [0;31m[0;31m# func prepare data[0m[0;34m[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m      2 [0;31m[0mtrain_x[0m [0;34m=[0m [0mnp[0m[0;34m.[0m[0marray[0m[0;34m([0m[0mtrain[0m[0;34m[[0m[0;34m:[0m[0;34m,[0m [0;36m1[0m[0;34m][0m[0;34m)[0m[0;34m[[0m[0mnp[0m[0;34m.[0m[0mnewaxis[0m[0;34m,[0m [0;34m:[0m[0;34m][0m[0;34m.[0m[0mT[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m----> 3 [0;31m[0mpdb[0m[0;34m.[0m[0mset_trace[0m[0;34m([0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m      4 [0;31m[0;32mfor[0m [0mi[0m [0;32min[0m [0mrange[0m[0;34m([0m[0mnumber_of_features[0m[0;34m)[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m      5 [0;31m    [0mtrain_x[0m [0;34m=[0m [0mnp[0m[0;34m.[0m[0mconcatenate[0m[0;34m([0m[0;34m([0m[0mtrain_x[0m[0;34m,[0m [0mnp[0m[0;34m.[0m[0mroll[0m[0;34m([0m[0mtrain[0

ipdb>  break 5


Breakpoint 1 at /tmp/ipykernel_39876/680604988.py:5


ipdb>  n


[0;31m    [... skipped 1 hidden frame][0m

[0;31m    [... skipped 1 hidden frame][0m

[0;31m    [... skipped 1 hidden frame][0m

[0;31m    [... skipped 1 hidden frame][0m

> [0;32m/home/kachau/.local/share/virtualenvs/prometheus-anomaly-detector-IrfCS95I/lib/python3.8/site-packages/IPython/core/interactiveshell.py[0m(3353)[0;36mrun_ast_nodes[0;34m()[0m
[0;32m   3351 [0;31m                    [0mto_run[0m[0;34m.[0m[0mappend[0m[0;34m([0m[0;34m([0m[0mnode[0m[0;34m,[0m [0;34m'single'[0m[0;34m)[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m   3352 [0;31m[0;34m[0m[0m
[0m[0;32m-> 3353 [0;31m                [0;32mfor[0m [0mnode[0m[0;34m,[0m[0mmode[0m [0;32min[0m [0mto_run[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m   3354 [0;31m                    [0;32mif[0m [0mmode[0m [0;34m==[0m [0;34m'exec'[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m   3355 [0;31m                        [0mmod[0m [0;34m=[0m [0mModule

ipdb>  c


None
> [0;32m/tmp/ipykernel_39876/680604988.py[0m(5)[0;36m<module>[0;34m()[0m
[0;32m      3 [0;31m[0mpdb[0m[0;34m.[0m[0mset_trace[0m[0;34m([0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m      4 [0;31m[0;32mfor[0m [0mi[0m [0;32min[0m [0mrange[0m[0;34m([0m[0mnumber_of_features[0m[0;34m)[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[1;31m1[0;32m---> 5 [0;31m    [0mtrain_x[0m [0;34m=[0m [0mnp[0m[0;34m.[0m[0mconcatenate[0m[0;34m([0m[0;34m([0m[0mtrain_x[0m[0;34m,[0m [0mnp[0m[0;34m.[0m[0mroll[0m[0;34m([0m[0mtrain[0m[0;34m[[0m[0;34m:[0m[0;34m,[0m [0;36m1[0m[0;34m][0m[0;34m,[0m [0;34m-[0m[0mi[0m[0;34m)[0m[0;34m[[0m[0mnp[0m[0;34m.[0m[0mnewaxis[0m[0;34m,[0m [0;34m:[0m[0;34m][0m[0;34m.[0m[0mT[0m[0;34m)[0m[0;34m,[0m [0maxis[0m[0;34m=[0m[0;36m1[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m      6 [0;31m[0;34m[0m[0m
[0m[0;32m      7 [0;31m[0mtrain_x[0m [0;34m=[0m [0mtrai

ipdb>  train.shape


(3456, 2)


ipdb>  foo =  np.roll(train[:, 1], -i)
ipdb>  foo


array([0.0, 0.0002314814814816657, 0.0004629629629628873, ...,
       0.7995370370370369, 0.7997685185185186, 0.8000000000000003],
      dtype=object)


ipdb>  foo.shape


(3456,)


ipdb>  foo[:5]


array([0.0, 0.0002314814814816657, 0.0004629629629628873,
       0.000694444444444553, 0.0009259259259257746], dtype=object)


ipdb>  train[:5]


array([['2020-05-01 16:25:54.703000069', 0.0],
       ['2020-05-01 16:26:54.703000069', 0.0002314814814816657],
       ['2020-05-01 16:27:54.703000069', 0.0004629629629628873],
       ['2020-05-01 16:28:54.703000069', 0.000694444444444553],
       ['2020-05-01 16:29:54.703000069', 0.0009259259259257746]],
      dtype=object)


ipdb>  train[:, 1][:5]


array([0.0, 0.0002314814814816657, 0.0004629629629628873,
       0.000694444444444553, 0.0009259259259257746], dtype=object)


ipdb>  n


None
> [0;32m/tmp/ipykernel_39876/680604988.py[0m(4)[0;36m<module>[0;34m()[0m
[0;32m      2 [0;31m[0mtrain_x[0m [0;34m=[0m [0mnp[0m[0;34m.[0m[0marray[0m[0;34m([0m[0mtrain[0m[0;34m[[0m[0;34m:[0m[0;34m,[0m [0;36m1[0m[0;34m][0m[0;34m)[0m[0;34m[[0m[0mnp[0m[0;34m.[0m[0mnewaxis[0m[0;34m,[0m [0;34m:[0m[0;34m][0m[0;34m.[0m[0mT[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m      3 [0;31m[0mpdb[0m[0;34m.[0m[0mset_trace[0m[0;34m([0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m----> 4 [0;31m[0;32mfor[0m [0mi[0m [0;32min[0m [0mrange[0m[0;34m([0m[0mnumber_of_features[0m[0;34m)[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[1;31m1[0;32m     5 [0;31m    [0mtrain_x[0m [0;34m=[0m [0mnp[0m[0;34m.[0m[0mconcatenate[0m[0;34m([0m[0;34m([0m[0mtrain_x[0m[0;34m,[0m [0mnp[0m[0;34m.[0m[0mroll[0m[0;34m([0m[0mtrain[0m[0;34m[[0m[0;34m:[0m[0;34m,[0m [0;36m1[0m[0;34m][0m[0;34m,[0m [0;34m-[0m[0mi[0m[

ipdb>  train_x.shape


(3456, 2)


ipdb>  train_x[:2, :]


array([[0.0, 0.0],
       [0.0002314814814816657, 0.0002314814814816657]], dtype=object)


ipdb>  np.allcloseo(train_x[:, 0], train_x[:, 1])


*** AttributeError: module 'numpy' has no attribute 'allcloseo'


ipdb>  np.allclose(train_x[:, 0], train_x[:, 1])


*** TypeError: ufunc 'isfinite' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''


ipdb>  (train_x[:, 0] - train_x[:, 1]).mean()


0.0


ipdb>  n


None
> [0;32m/tmp/ipykernel_39876/680604988.py[0m(5)[0;36m<module>[0;34m()[0m
[0;32m      3 [0;31m[0mpdb[0m[0;34m.[0m[0mset_trace[0m[0;34m([0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m      4 [0;31m[0;32mfor[0m [0mi[0m [0;32min[0m [0mrange[0m[0;34m([0m[0mnumber_of_features[0m[0;34m)[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[1;31m1[0;32m---> 5 [0;31m    [0mtrain_x[0m [0;34m=[0m [0mnp[0m[0;34m.[0m[0mconcatenate[0m[0;34m([0m[0;34m([0m[0mtrain_x[0m[0;34m,[0m [0mnp[0m[0;34m.[0m[0mroll[0m[0;34m([0m[0mtrain[0m[0;34m[[0m[0;34m:[0m[0;34m,[0m [0;36m1[0m[0;34m][0m[0;34m,[0m [0;34m-[0m[0mi[0m[0;34m)[0m[0;34m[[0m[0mnp[0m[0;34m.[0m[0mnewaxis[0m[0;34m,[0m [0;34m:[0m[0;34m][0m[0;34m.[0m[0mT[0m[0;34m)[0m[0;34m,[0m [0maxis[0m[0;34m=[0m[0;36m1[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m      6 [0;31m[0;34m[0m[0m
[0m[0;32m      7 [0;31m[0mtrain_x[0m [0;34m=[0m [0mtrai

ipdb>  foo = np.roll(train[:, 1], -i)[np.newaxis, :].T
ipdb>  foo


array([[0.0002314814814816657],
       [0.0004629629629628873],
       [0.000694444444444553],
       ...,
       [0.7997685185185186],
       [0.8000000000000003],
       [0.0]], dtype=object)


ipdb>  foo = np.roll(train[:, 1], -i)
ipdb>  foo


array([0.0002314814814816657, 0.0004629629629628873, 0.000694444444444553,
       ..., 0.7997685185185186, 0.8000000000000003, 0.0], dtype=object)


ipdb>  foo[:-5]


array([0.0002314814814816657, 0.0004629629629628873, 0.000694444444444553,
       ..., 0.7986111111111112, 0.7988425925925928, 0.7990740740740745],
      dtype=object)


ipdb>  train[-5:, 1]


array([0.7990740740740745, 0.7993055555555553, 0.7995370370370369,
       0.7997685185185186, 0.8000000000000003], dtype=object)


ipdb>  foo[-5:]


array([0.7993055555555553, 0.7995370370370369, 0.7997685185185186,
       0.8000000000000003, 0.0], dtype=object)


ipdb>  break 7


Breakpoint 2 at /tmp/ipykernel_39876/680604988.py:7


ipdb>  clear 1


Deleted breakpoint 1 at /tmp/ipykernel_39876/680604988.py:5


ipdb>  c


None
> [0;32m/tmp/ipykernel_39876/680604988.py[0m(7)[0;36m<module>[0;34m()[0m
[0;32m      5 [0;31m    [0mtrain_x[0m [0;34m=[0m [0mnp[0m[0;34m.[0m[0mconcatenate[0m[0;34m([0m[0;34m([0m[0mtrain_x[0m[0;34m,[0m [0mnp[0m[0;34m.[0m[0mroll[0m[0;34m([0m[0mtrain[0m[0;34m[[0m[0;34m:[0m[0;34m,[0m [0;36m1[0m[0;34m][0m[0;34m,[0m [0;34m-[0m[0mi[0m[0;34m)[0m[0;34m[[0m[0mnp[0m[0;34m.[0m[0mnewaxis[0m[0;34m,[0m [0;34m:[0m[0;34m][0m[0;34m.[0m[0mT[0m[0;34m)[0m[0;34m,[0m [0maxis[0m[0;34m=[0m[0;36m1[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m      6 [0;31m[0;34m[0m[0m
[0m[1;31m2[0;32m---> 7 [0;31m[0mtrain_x[0m [0;34m=[0m [0mtrain_x[0m[0;34m[[0m[0;34m:[0m[0mtrain_x[0m[0;34m.[0m[0mshape[0m[0;34m[[0m[0;36m0[0m[0;34m][0m [0;34m-[0m [0mnumber_of_features[0m[0;34m,[0m [0;34m:[0m[0mnumber_of_features[0m[0;34m][0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m      8 [0;31m[0;34m[0m[0m
[0m

ipdb>  train_x.shape[0] - number_of_features


3446


ipdb>  train_x.shape


(3456, 11)


ipdb>  train_x[:train_x.shape[0] - number_of_features, :number_of_features].shape


(3446, 10)


ipdb>  n


--Return--
None
> [0;32m/tmp/ipykernel_39876/680604988.py[0m(7)[0;36m<module>[0;34m()[0m
[0;32m      5 [0;31m    [0mtrain_x[0m [0;34m=[0m [0mnp[0m[0;34m.[0m[0mconcatenate[0m[0;34m([0m[0;34m([0m[0mtrain_x[0m[0;34m,[0m [0mnp[0m[0;34m.[0m[0mroll[0m[0;34m([0m[0mtrain[0m[0;34m[[0m[0;34m:[0m[0;34m,[0m [0;36m1[0m[0;34m][0m[0;34m,[0m [0;34m-[0m[0mi[0m[0;34m)[0m[0;34m[[0m[0mnp[0m[0;34m.[0m[0mnewaxis[0m[0;34m,[0m [0;34m:[0m[0;34m][0m[0;34m.[0m[0mT[0m[0;34m)[0m[0;34m,[0m [0maxis[0m[0;34m=[0m[0;36m1[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m      6 [0;31m[0;34m[0m[0m
[0m[1;31m2[0;32m---> 7 [0;31m[0mtrain_x[0m [0;34m=[0m [0mtrain_x[0m[0;34m[[0m[0;34m:[0m[0mtrain_x[0m[0;34m.[0m[0mshape[0m[0;34m[[0m[0;36m0[0m[0;34m][0m [0;34m-[0m [0mnumber_of_features[0m[0;34m,[0m [0;34m:[0m[0mnumber_of_features[0m[0;34m][0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m      8 [0;31m[0;34m[0

ipdb>  n


> [0;32m/home/kachau/.local/share/virtualenvs/prometheus-anomaly-detector-IrfCS95I/lib/python3.8/site-packages/IPython/core/interactiveshell.py[0m(3353)[0;36mrun_ast_nodes[0;34m()[0m
[0;32m   3351 [0;31m                    [0mto_run[0m[0;34m.[0m[0mappend[0m[0;34m([0m[0;34m([0m[0mnode[0m[0;34m,[0m [0;34m'single'[0m[0;34m)[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m   3352 [0;31m[0;34m[0m[0m
[0m[0;32m-> 3353 [0;31m                [0;32mfor[0m [0mnode[0m[0;34m,[0m[0mmode[0m [0;32min[0m [0mto_run[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m   3354 [0;31m                    [0;32mif[0m [0mmode[0m [0;34m==[0m [0;34m'exec'[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m   3355 [0;31m                        [0mmod[0m [0;34m=[0m [0mModule[0m[0;34m([0m[0;34m[[0m[0mnode[0m[0;34m][0m[0;34m,[0m [0;34m[[0m[0;34m][0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m


ipdb>  c


In [12]:
# func prepare data
test_x = np.array(test[:, 1])[np.newaxis, :].T

for i in range(number_of_features):
    test_x = np.concatenate((test_x, np.roll(test[:, 1], -i)[np.newaxis, :].T), axis=1)

test_x = test_x[:test_x.shape[0] - number_of_features, :number_of_features]

test_yt = np.roll(test[:, 1], -number_of_features+1)
test_y = np.roll(test[:, 1], -number_of_features)
test_y = test_y - test_yt
test_y = test_y[:test_y.shape[0] - number_of_features]

test_x = test_x.reshape(test_x.shape[0], 1, test_x.shape[1])

In [13]:
test_df = df.values[-test_y.shape[0]:, 1]