In [None]:
import pandas as pd

MASTER_URL_ROOT = "https://raw.githubusercontent.com/numenta/NAB/master/data/"

datasets = {
    "df_small_noise": "artificialNoAnomaly/art_daily_small_noise.csv",
    "df_daily_jumpsup": "artificialWithAnomaly/art_daily_jumpsup.csv"
}

def load_dataset(url_suffix):
    url = MASTER_URL_ROOT + url_suffix
    return pd.read_csv(url, parse_dates=True, index_col="timestamp")

df_small_noise = load_dataset(datasets["df_small_noise"])
df_daily_jumpsup = load_dataset(datasets["df_daily_jumpsup"])

print("Small Noise DataFrame shape: ", df_small_noise.shape)
print("Daily Jumps Up DataFrame shape: ", df_daily_jumpsup.shape)

In [None]:
df_small_noise # for every 5 mins for 14 days: 24 * (60/5) * 14

In [None]:
import matplotlib.pyplot as plt

def plot_dataframe(df, title):
    fig, ax = plt.subplots()
    df.plot(legend=False, ax=ax)
    ax.set_title(title)
    plt.show()

plot_dataframe(df_small_noise, "Small Noise Data")
plot_dataframe(df_daily_jumpsup, "Daily Jumps Up Data")

In [None]:
training_mean = df_small_noise.mean()
training_std = df_small_noise.std()
df_training_value = (df_small_noise - training_mean) / training_std

In [None]:
import numpy as np

TIME_STEPS = 288

def create_sequences(data, time_steps=TIME_STEPS):
    sequences = []
    for start_idx in range(len(data) - time_steps + 1):
        end_idx = start_idx + time_steps
        sequences.append(data[start_idx:end_idx])
    return np.stack(sequences)

training_data = df_training_value.values

x_train = create_sequences(training_data)

print("Training input shape: ", x_train.shape)

In [None]:
import tensorflow as tf

model = tf.keras.Sequential(
    [
        tf.keras.layers.Input(shape=(x_train.shape[1], x_train.shape[2])),
        tf.keras.layers.Conv1D(
            filters=32, kernel_size=7, padding="same", strides=2, activation="relu"
        ),
        tf.keras.layers.Dropout(rate=0.2),
        tf.keras.layers.Conv1D(
            filters=16, kernel_size=7, padding="same", strides=2, activation="relu"
        ),
        tf.keras.layers.Conv1DTranspose(
            filters=16, kernel_size=7, padding="same", strides=2, activation="relu"
        ),
        tf.keras.layers.Dropout(rate=0.2),
        tf.keras.layers.Conv1DTranspose(
            filters=32, kernel_size=7, padding="same", strides=2, activation="relu"
        ),
        tf.keras.layers.Conv1DTranspose(filters=1, kernel_size=7, padding="same"),

    ]
)

model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), loss="mse")
model.summary()

In [None]:
history = model.fit(
    x_train,
    x_train,
    epochs=50,
    batch_size=128,
    validation_split=0.1,
    callbacks=[
        tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=5, mode="min")
    ],
)

In [None]:
plt.plot(history.history["loss"], label="Training Loss")
plt.plot(history.history["val_loss"], label="Validation Loss")
plt.legend()
plt.show()

In [None]:
x_train_pred = model.predict(x_train)
train_mae_loss = np.mean(np.abs(x_train_pred - x_train), axis=1)

plt.hist(train_mae_loss, bins=50)
plt.xlabel("Train MAE loss")
plt.ylabel("No of samples")
plt.show()

threshold = np.max(train_mae_loss)
print("Reconstruction error threshold: ", threshold)

In [None]:
plt.plot(x_train[0])
plt.plot(x_train_pred[0])
plt.show()

In [None]:
df_test_value = (df_daily_jumpsup - training_mean) / training_std
x_test = create_sequences(df_test_value.values)
print("Test input shape: ", x_test.shape)

x_test_pred = model.predict(x_test)
test_mae_loss = np.mean(np.abs(x_test_pred - x_test), axis=1)
test_mae_loss = test_mae_loss.reshape((-1))

plt.hist(test_mae_loss, bins=50)
plt.xlabel("test MAE loss")
plt.ylabel("No of samples")
plt.show()

anomalies = test_mae_loss > threshold
print("Number of anomaly samples: ", np.sum(anomalies))
print("Indices of anomaly samples: ", np.where(anomalies))

In [None]:
def find_anomalous_indices(anomalies, time_steps, data_length):
    anomalous_indices = []
    for idx in range(time_steps - 1, data_length - time_steps + 1):
        if np.all(anomalies[idx - time_steps + 1 : idx]):
            anomalous_indices.append(idx)
    return anomalous_indices

data_length = len(df_test_value)

anomalous_data_indices = find_anomalous_indices(anomalies, TIME_STEPS, data_length)

print("Anomalous data indices: ", anomalous_data_indices)

In [None]:
df_subset = df_daily_jumpsup.iloc[anomalous_data_indices]
fig, ax = plt.subplots()
df_daily_jumpsup.plot(legend=False, ax=ax)
df_subset.plot(legend=False, ax=ax, color="r")
plt.show()