In [None]:
# Evaluation
# https://keras.io/examples/timeseries/timeseries_anomaly_detection/

# Numerical arrays.
import numpy as np
# Spreadsheet-like Data Frames
import pandas as pd
# Neural networks
import tensorflow.keras as keras
# Plotting tools
import matplotlib.pyplot as plt

In [None]:
# Data
# https://www.kaggle.com/datasets/boltzmannbrain/nab
# https://github.com/numenta/NAB

# Root of URLs
root_url = 'https://raw.githubusercontent.com/numenta/NAB/master/data/'
# Data file without anomaly
no_anomaly_url = root_url + 'artificialNoAnomaly/art_daily_small_noise.csv'
# Data file with anomaly
with_anomaly_url = root_url + 'artificialWithAnomaly/art_daily_jumpsup.csv'

In [None]:
no_anomaly_url

In [None]:
with_anomaly_url

In [None]:
df_small_noise = pd.read_csv(no_anomaly_url, parse_dates=True, index_col="timestamp")

In [None]:
df_small_noise.head()

In [None]:
df_small_noise.describe()

In [None]:
df_small_noise.iloc[0]

In [None]:
df_small_noise.iloc[0]['value']

In [None]:
df_small_noise.iloc[0].values[0]

In [None]:
df_small_noise.iloc[0].values[0] * 10e12

In [None]:
df_small_noise.shape

In [None]:
# creating a figure
fig, ax = plt.subplots(figsize=(10,4))
# Using pandas to plot a data frame.
df_small_noise.plot(legend = False, ax = ax)

In [None]:
df_daily_jumpsup = pd.read_csv(with_anomaly_url, parse_dates=True, index_col="timestamp")

In [None]:
df_daily_jumpsup.head()

In [None]:
df_daily_jumpsup.describe()

In [None]:
df_daily_jumpsup.iloc[0]['value']

In [None]:
df_daily_jumpsup.shape

In [None]:
# creating a figure
fig, ax = plt.subplots(figsize=(10,4))
# Using pandas to plot data frame.
df_daily_jumpsup.plot(legend = False, ax = ax)

In [None]:
# Pre-Processing
train_mean = df_small_noise.mean()
train_mean

In [None]:
train_std = df_small_noise.std()
train_std

In [None]:
df_train_vals = (df_small_noise - train_mean) / train_std

In [None]:
df_train_vals.head()

In [None]:
df_small_noise['less_mean'] = df_small_noise['value'] - df_small_noise['value'].mean()
df_small_noise['div_std'] = df_small_noise['less_mean'] / df_small_noise['value'].std()

In [None]:
df_small_noise

In [None]:
df_small_noise['value'].mean(), df_small_noise['less_mean'].mean(), df_small_noise['div_std'].mean()

In [None]:
df_small_noise['value'].std(), df_small_noise['less_mean'].std(), df_small_noise['div_std'].std()

In [None]:
# creating a figure
fig, ax = plt.subplots(figsize=(10,4))
# Using pandas to plot data frame.
df_train_vals.plot(legend=False, ax=ax)

In [None]:
df_train_vals.shape

In [None]:
window_size = 288

In [None]:
# Window list example
L = [1, 1, 2, 4, 5, 6, 7, 1, 2, 3, 4, 5]
# Example window size
win_size = 3
# Length of the list L variable
len(L)

In [None]:
# Number of windows from L
len(L) - (win_size - 1)

In [None]:
# Remove brackets
len(L) - win_size + 1

In [None]:
# Print the windows.
for i in range(len(L) - win_size + 1):
    print(L[i:i + win_size])

In [None]:
# For accumulating the windows.
wins = []
# Generate the windows
for i in range(len(L) - win_size + 1):
    wins.append(L[i:i + win_size])
wins

In [None]:
# Create a 2D numpy array from wins.
np.stack(wins)

In [None]:
def windows(vals, N = window_size):
  L = []
  for i in range(len(vals) - N + 1): 
    L.append(vals[i:i+N])
  return np.stack(L)

In [None]:
x_train = windows(df_train_vals)

In [None]:
x_train[0][0], x_train[-1][-1]

In [None]:
df_train_vals.iloc[0], df_train_vals.iloc[-1]

In [None]:
x_train.shape

In [None]:
df_train_vals['value'].values

In [None]:
x_train[0].flatten()

In [None]:
window_no = 200

# creating a figure 
fig, ax = plt.subplots(figsize=(12, 6))
 
y = df_train_vals['value'].values
ax.plot(np.arange(y.shape[0]), y, label='signal')

# The first window.
w = x_train[window_no].flatten()
ax.plot(np.arange(w.shape[0]) + window_no, w, label='window')
ax.legend()

In [None]:
# Neural Network
x_train.shape

In [None]:
layers = [
  keras.layers.Input(shape=(x_train.shape[1], x_train.shape[2])),
  keras.layers.Conv1D(
    filters=32, kernel_size=7, padding="same", strides=2, activation="relu"),
  keras.layers.Dropout(rate=0.2),
  keras.layers.Conv1D(
    filters=16, kernel_size=7, padding="same", strides=2, activation="relu"),
  keras.layers.Conv1DTranspose(
    filters=16, kernel_size=7, padding="same", strides=2, activation="relu"),
  keras.layers.Dropout(rate=0.2),
  keras.layers.Conv1DTranspose(
    filters=32, kernel_size=7, padding="same", strides=2, activation="relu"),
  keras.layers.Conv1DTranspose(filters=1, kernel_size=7, padding="same"),
]

In [None]:
model = keras.Sequential(layers)

In [None]:
optimizer = keras.optimizers.Adam(learning_rate=0.001)

In [None]:
model.compile(optimizer=optimizer, loss="mse")

In [None]:
model.summary()

In [None]:
history = model.fit(
    x_train,
    x_train,
    epochs=50,
    batch_size=128,
    validation_split=0.1,
    callbacks=[
        keras.callbacks.EarlyStopping(monitor="val_loss", patience=5, mode="min")
    ],
)

In [None]:
# Evaluation
history.history["loss"]

In [None]:
history.history["val_loss"]

In [None]:
# creating a figure
fig, ax = plt.subplots(figsize=(10, 6))
ax.plot(history.history["loss"], label="Training Loss")
ax.plot(history.history["val_loss"], label="Validation Loss")
ax.legend()

In [None]:
# Get train MAE loss.
x_train_pred = model.predict(x_train)

In [None]:
# Calculate loss...
train_mae_loss = np.mean(np.abs(x_train_pred - x_train), axis=1)

In [None]:
# creating a figure
fig, ax = plt.subplots(figsize=(10, 6))

ax.hist(train_mae_loss, bins=50) 
ax.set_xlabel("Train MAE loss")
ax.set_ylabel("No of samples");

In [None]:
# Get reconstruction loss threshold.
threshold = np.max(train_mae_loss)
threshold

In [None]:
# creating a figure
fig, ax = plt.subplots(figsize=(10, 6))

ax.plot(x_train[0]) 
ax.plot(x_train_pred[0])

In [None]:
df_test_value = (df_daily_jumpsup - train_mean) / train_std

In [None]:
# creating a figure
fig, ax = plt.subplots(figsize=(10, 6)) 
df_test_value.plot(legend=False, ax=ax)

In [None]:
# Create sequences from test values.
x_test = windows(df_test_value.values)
x_test.shape

In [None]:
x_test_pred = model.predict(x_test)

In [None]:
test_mae_loss = np.mean(np.abs(x_test_pred - x_test), axis=1)
test_mae_loss

In [None]:
test_mae_loss = test_mae_loss.reshape((-1))
test_mae_loss

In [None]:
# creating a figure
fig, ax = plt.subplots(figsize=(10, 6))

ax.hist(test_mae_loss, bins=50)
ax.set_xlabel("test MAE loss")
ax.set_ylabel("Number of samples")

In [None]:
# Detect all the samples which are anomalies.
anomalies = test_mae_loss > threshold
# Number of anomalies.
np.sum(anomalies)

In [None]:
# data i is an anomaly if samples [(i - timesteps + 1) to (i)] are anomalies
inds = []

for i in range(window_size - 1, len(df_test_value) - window_size + 1):
    if np.all(anomalies[i - window_size + 1 : i]):
        inds.append(i)

In [None]:
df_subset = df_daily_jumpsup.iloc[inds]

In [None]:
# creating a figure
fig, ax = plt.subplots(figsize=(10, 6))

df_daily_jumpsup.plot(legend=False, ax=ax) 
df_subset.plot(legend=False, ax=ax, color="r")