In [None]:
import os
os.environ['KERAS_BACKEND'] = 'torch'
import torch
import keras
from keras import layers
import matplotlib.pyplot as plt
import numpy as np
from tqdm.notebook import tqdm
from math import isnan
from cdasws import CdasWs, timeinterval
import datetime
import spacepy

In [None]:
cdas = CdasWs()
dataset = 'PSP_FLD_L2_MAG_RTN_1MIN'
var_names = cdas.get_variable_names(dataset)

train_interval = timeinterval.TimeInterval(datetime.datetime(2021,5,15),datetime.datetime(2021,8,1))

status, rawdtraining = cdas.get_data(dataset, var_names, train_interval)

In [None]:
TIMEDELTA = datetime.timedelta(minutes=1)
good = [not isnan(val) for val in rawdtraining['psp_fld_l2_mag_RTN_1min'][:,0]]
train_t = np.array(rawdtraining['epoch_mag_RTN_1min'][good])
train_d = np.array(rawdtraining['psp_fld_l2_mag_RTN_1min'][good,:])


In [None]:
TIME_STEPS = 240
FEATURES = train_d.shape[-1]

# Generated training sequences for use in the model.
def create_sequences(values, times, time_steps):
    if len(times.shape) == 1:
        times = np.expand_dims(times,-1)
    xout = []
    tout = []
    for i in tqdm(range(len(values) - time_steps + 1)):
        if times[i+time_steps-1]-times[i] == (time_steps-1)*TIMEDELTA:
            xout.append(values[i : (i + time_steps)])
            tout.append(times[i:(i+time_steps)])
    return (np.stack(xout),np.stack(tout))


(x_train, t_train) = create_sequences(train_d,train_t,TIME_STEPS)
print("Training input shape: ", x_train.shape)

In [None]:
model = keras.Sequential([
    layers.Input(shape=(TIME_STEPS, FEATURES)),
    layers.Conv1D(
        filters=32,
        kernel_size=10,
        padding="same",
        activation="relu",
    ),
    layers.Dropout(rate=0.2),
    layers.Conv1D(
        filters=8,
        kernel_size=10,
        padding="same",
        activation="relu",
    ),
    layers.Dropout(rate=0.2),
    layers.Conv1D(
        filters=4,
        kernel_size=10,
        padding="same",
        activation="relu",
    ),
    layers.Conv1D(filters=FEATURES, kernel_size=4, padding="same"),
])
model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001), loss="mse")
model.summary()

In [None]:
history = model.fit(
    x_train,
    x_train,
    epochs=10,
    batch_size=128,
    validation_split=0.1,
    callbacks=[
        keras.callbacks.EarlyStopping(monitor="val_loss", patience=5, mode="min")
    ],
)

In [None]:
plt.plot(history.history["loss"], label="Training Loss")
plt.plot(history.history["val_loss"], label="Validation Loss")
plt.legend()
plt.show()

In [None]:
# Get train MAE loss.
x_train_pred = model.predict(x_train)
train_mae_loss = np.mean(np.abs(x_train_pred - x_train), axis=1)
train_mae_loss = train_mae_loss.reshape(-1)

# Get reconstruction loss threshold.
threshold = np.max(train_mae_loss)
print("Reconstruction error threshold: ", threshold)

plt.hist(train_mae_loss, bins=50)
plt.axvline(threshold,ls='--',c='r')
plt.xlabel("Train MAE loss")
plt.ylabel("No of samples")
plt.show()

In [None]:
# Checking how the first sequence is learnt
plt.plot(t_train[0],x_train[0,:,0])
plt.plot(t_train[0],x_train_pred[0,:,0])
plt.xticks(rotation=45)
plt.show()

In [None]:
test_interval = timeinterval.TimeInterval(datetime.datetime(2022,1,1),datetime.datetime(2022,3,31))
status, rawdtesting = cdas.get_data(dataset, var_names, test_interval)
good = [not isnan(val) for val in rawdtesting['psp_fld_l2_mag_RTN_1min'][:,0]]
test_t = np.array(rawdtesting['epoch_mag_RTN_1min'][good])
test_d = np.array(rawdtesting['psp_fld_l2_mag_RTN_1min'][good,:])
(x_test, t_test) = create_sequences(test_d,test_t,TIME_STEPS)
print("Testing input shape: ", x_train.shape)

In [None]:
x_test_pred = model.predict(x_test)
test_mae_loss = np.mean(np.abs(x_test_pred - x_test), axis=1)
test_mae_loss = np.max(test_mae_loss,axis=-1)

plt.hist(test_mae_loss, bins=50)
plt.axvline(threshold,ls='--',c='r')
plt.xlabel("test MAE loss")
plt.ylabel("No of samples")
plt.show()

# Detect all the samples which are anomalies.
anomalies = test_mae_loss > threshold

anomalous_idx = []# np.where(anomalies)[0]
for data_idx in range(TIME_STEPS - 1, len(x_test) - TIME_STEPS + 1):
    if np.all(anomalies[data_idx - TIME_STEPS + 1 : data_idx]):
        anomalous_idx.append(data_idx)

In [None]:
plt.vlines(test_t[anomalous_idx],-600,600,alpha=0.1,color='red')
plt.xlim((test_t[0],test_t[-1]))
plt.xticks(rotation=45)
plt.show()

In [None]:
plt.vlines(test_t[anomalous_idx],-600,600,alpha=0.1,color='red')
plt.plot(test_t,test_d[:,0])
plt.xlim((test_t[0],test_t[-1]))
plt.xticks(rotation=45)
plt.show()