# USAD

## Environment

In [None]:
%cd /nfs/home/medoro/Unsupervised_Anomaly_Detection_thesis

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import torch
import torch.nn as nn

from USAD.utils import *
from USAD.usad import *
#from linear_ae import *
#from utils_ae import *

In [None]:
import plotly.graph_objects as go

In [None]:
!nvidia-smi -L

device = get_default_device()

## EDA - Data Pre-Processing

### Normal period

In [None]:
#Read data
normal = pd.read_csv("data/SWaT_Dataset_Normal_v1.csv")

In [None]:
normal['Timestamp'] = pd.to_datetime(normal['Timestamp'])

In [None]:
is_sorted = normal['Timestamp'].is_monotonic_increasing
print(f"Dataset sorted by timestamp: {is_sorted}")

In [None]:
normal = normal.drop(["Timestamp" , "Normal/Attack" ] , axis = 1)
normal.shape

In [None]:
normal

In [None]:
# Transform all columns into float64
for i in list(normal): 
    normal[i]=normal[i].apply(lambda x: str(x).replace("," , "."))
normal = normal.astype(float)

#### Normalization

In [None]:
from sklearn import preprocessing
min_max_scaler = preprocessing.MinMaxScaler()

x = normal.values
x_scaled = min_max_scaler.fit_transform(x)
normal = pd.DataFrame(x_scaled)

In [None]:
normal

### Attack

In [None]:
#Read data
attack = pd.read_csv("data/SWaT_Dataset_Attack_v0.csv",sep=";")
labels = [ float(label!= 'Normal' ) for label  in attack["Normal/Attack"].values]
attack = attack.drop(["Timestamp" , "Normal/Attack" ] , axis = 1)
attack.shape

In [None]:
attack

In [None]:
# Transform all columns into float64
for i in list(attack):
    attack[i]=attack[i].apply(lambda x: str(x).replace("," , "."))
attack = attack.astype(float)

#### Normalization

In [None]:
from sklearn import preprocessing

x = attack.values 
x_scaled = min_max_scaler.transform(x)
attack = pd.DataFrame(x_scaled)

In [None]:
attack

### Windows

In [None]:
window_size=12 #9 ---> for better reconstruction #12

In [None]:
windows_normal=normal.values[np.arange(window_size)[None, :] + np.arange(normal.shape[0]-window_size)[:, None]]
windows_normal.shape

In [None]:
windows_attack=attack.values[np.arange(window_size)[None, :] + np.arange(attack.shape[0]-window_size)[:, None]]
windows_attack.shape

## Training

In [None]:
import torch.utils.data as data_utils

BATCH_SIZE =  7919
N_EPOCHS = 100
hidden_size = 40

w_size=windows_normal.shape[1]*windows_normal.shape[2] #12*51 = 612
z_size=windows_normal.shape[1]*hidden_size # 12*100 = 1200

#ENCODER:
#612 --> 306
#306 --> 153
#153 --> 1200

windows_normal_train = windows_normal[:int(np.floor(.8 *  windows_normal.shape[0]))]
windows_normal_val = windows_normal[int(np.floor(.8 *  windows_normal.shape[0])):int(np.floor(windows_normal.shape[0]))]

train_loader = torch.utils.data.DataLoader(data_utils.TensorDataset(
    torch.from_numpy(windows_normal_train).float().view(([windows_normal_train.shape[0],w_size]))
) , batch_size=BATCH_SIZE, shuffle=False, num_workers=0)

val_loader = torch.utils.data.DataLoader(data_utils.TensorDataset(
    torch.from_numpy(windows_normal_val).float().view(([windows_normal_val.shape[0],w_size]))
) , batch_size=BATCH_SIZE, shuffle=False, num_workers=0)

test_loader = torch.utils.data.DataLoader(data_utils.TensorDataset(
    torch.from_numpy(windows_attack).float().view(([windows_attack.shape[0],w_size]))
) , batch_size=BATCH_SIZE, shuffle=False, num_workers=0)

model = UsadModel(w_size, z_size)
model = to_device(model,device)

In [None]:
history = training(N_EPOCHS,model,train_loader,val_loader)

In [None]:
plot_history(history)

In [None]:
torch.save({
            'encoder': model.encoder.state_dict(),
            'decoder1': model.decoder1.state_dict(),
            'decoder2': model.decoder2.state_dict()
            }, "checkpoints/usad_model2.pth")

## Testing

In [None]:
checkpoint = torch.load("checkpoints/usad_model.pth", map_location = torch.device('cpu')) #usad_model.pth con window = 12; usad_model2.pth con window = 9

model.encoder.load_state_dict(checkpoint['encoder'])
model.decoder1.load_state_dict(checkpoint['decoder1'])
model.decoder2.load_state_dict(checkpoint['decoder2'])

In [None]:
results=testing(model,test_loader)

In [None]:
results

In [None]:
# Qui va ad ottenere le label per ogni finestra
# Input modello è una lista di array, ognuno corrispondente a una sliding window con stride = 1 sui dati originali
# Quindi dobbiamo applicare la sliding window anche sulle label
windows_labels=[]
for i in range(len(labels)-window_size):
    windows_labels.append(list(np.int_(labels[i:i+window_size])))

In [None]:
windows_labels

In [None]:
y_test = [1.0 if (np.sum(window) > 0) else 0 for window in windows_labels ]
# Qui vado a dare le label 1 per ogni finestra: se tutta la finestra contiene label = 0, allora y_test(t) = 0, altrimenti = 1

In [None]:
print(y_test)

In [None]:
y_pred=np.concatenate([torch.stack(results[:-1]).flatten().detach().cpu().numpy(),
                              results[-1].flatten().detach().cpu().numpy()])

In [None]:
y_pred

In [None]:
threshold=ROC(y_test,y_pred)

In [None]:
threshold

In [None]:
y_pred_ = np.zeros(y_pred.shape[0])

In [None]:
y_pred_[y_pred >= threshold] = 1

In [None]:
y_pred_

In [None]:
np.array(y_test)

In [None]:
import sklearn

In [None]:
sklearn.metrics.roc_auc_score(y_test, y_pred_)

In [None]:
confusion_matrix(np.array(y_test), y_pred_)

In [None]:
print(sklearn.metrics.classification_report(y_test, y_pred_))

In [None]:
y_pred.min(), y_pred.max()

In [None]:
threshold = np.percentile(y_pred, 93)
print(threshold)
y_pred_ = np.zeros(y_pred.shape[0])
y_pred_[y_pred >= threshold] = 1

In [None]:
len(y_pred_)

In [None]:
y_pred_.sum()

In [None]:
y_pred_.sum()/len(y_pred_)

In [None]:
confusion_matrix(np.array(y_test), y_pred_)

In [None]:
print(sklearn.metrics.classification_report(y_test, y_pred_))

In [None]:
sklearn.metrics.roc_auc_score(y_test, y_pred_)

In [None]:
attack.loc[:449906, 0].shape

In [None]:
att = attack.loc[:449906, :]

In [None]:
att

In [None]:
att['y_gt'] = y_test

In [None]:
att['y_pred'] = y_pred_

In [None]:
true_an = att[att.y_gt == 1]
pred_an = att[att.y_pred == 1]

In [None]:
true_an.shape, pred_an.shape

In [None]:
plt.figure(figsize=(12,6))
plt.plot(att.index, att.loc[:, 0], label = "first_feature", color = "blue")
plt.scatter(true_an.index, true_an.y_gt, label = "true anomaly", color = "red")
plt.scatter(pred_an.index, pred_an.y_pred, label = "predicted anomaly", color = "green")
plt.legend()
plt.show()

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=attack.index, y=attack.loc[:, 0], name='First feature'))
fig.add_trace(go.Scatter(x=attack.index, y=y_test, mode='markers', marker=dict(color='forestgreen'), name='True_Anomaly'))
fig.add_trace(go.Scatter(x=attack.index, y=y_pred_, mode='markers', marker=dict(color='yellow'), name='W1_anomaly'))

fig.update_layout(showlegend=True)
fig.show()

# Reconstruction

Let's try to see what the actual reconstruction looks like (for this window = 9 is better).

We are going to firstly create non overlapping windows from the attack dataset, then we pass them to the model and take the corresponding result, to then perform anomaly detection.

In [None]:
def windows_non_overlapping(dataframe, window_size):
  total_length = dataframe.shape[0]
  output = []
  data = np.array(dataframe)
  for i in range(0, total_length - window_size + 1, window_size):
    # find the end of this sequence
    end_ix = i + window_size
    # check if we are beyond the dataset length for this building
    #if end_ix > total_length-1:
     # break
    output.append(data[i : (i + window_size),:])
  return np.stack(output)

In [None]:
attack_wndw = windows_non_overlapping(attack, window_size)

In [None]:
attack_wndw.shape

In [None]:
attack_wndw

In [None]:
test_loader = torch.utils.data.DataLoader(data_utils.TensorDataset(
    torch.from_numpy(attack_wndw).float().contiguous().view(([attack_wndw.shape[0],w_size]))
) , batch_size=BATCH_SIZE, shuffle=False, num_workers=0)

In [None]:
results_non_over, w1_non_over = testing_prova(model, test_loader) #, w2_non_over

In [None]:
results_non_over

In [None]:
results_non_over[0].size(), results_non_over[-1].size()

In [None]:
w1_non_over

In [None]:
#w2_non_over

## w1

In [None]:
len(w1_non_over), w1_non_over[0].size(), w1_non_over[-1].size()

In [None]:
# Fai reshape da [batch, 459] a [batch, 9, 51]
reshaped_w1 = [torch.reshape(w1_el, (w1_el.size()[0], int(w1_el.size()[1]/51), int(w1_el.size()[1]/12))) for w1_el in w1_non_over]

In [None]:
reshaped_w1[0].size(), reshaped_w1[-1].size()

In [None]:
reshaped_w1_try = [torch.reshape(w1_el, (w1_el.size()[0]*w1_el.size()[1], w1_el.size()[2])) for w1_el in reshaped_w1]

In [None]:
reshaped_w1_try[0].size(), reshaped_w1_try[-1].size()

In [None]:
stacked = torch.stack(reshaped_w1_try[:-1])
stacked.shape

In [None]:
stacked_reshaped = torch.reshape(stacked, (stacked.size()[0] * stacked.size()[1], stacked.size()[2]))

In [None]:
stacked_reshaped.size()

In [None]:
stacked_array = stacked_reshaped.cpu().numpy()
stacked_array

In [None]:
last_array = reshaped_w1_try[-1].cpu().numpy()
last_array

In [None]:
total = np.concatenate([stacked_array, last_array])

In [None]:
total.shape

In [None]:
total

## w2

In [None]:
# Fai reshape da [batch, 459] a [batch, 9, 51]
reshaped_w2 = [torch.reshape(w2_el, (w2_el.size()[0], int(w2_el.size()[1]/51), int(w2_el.size()[1]/9))) for w2_el in w2_non_over]

In [None]:
reshaped_w2[0].size(), reshaped_w2[-1].size()

In [None]:
reshaped_w2_try = [torch.reshape(w2_el, (w2_el.size()[0]*w2_el.size()[1], w2_el.size()[2])) for w2_el in reshaped_w2]

In [None]:
reshaped_w2_try[0].size(), reshaped_w2_try[-1].size()

In [None]:
stacked2 = torch.stack(reshaped_w2_try[:-1])
stacked2.shape

In [None]:
stacked_reshaped2 = torch.reshape(stacked2, (stacked2.size()[0] * stacked2.size()[1], stacked2.size()[2]))

In [None]:
stacked_reshaped2.size()

In [None]:
stacked_array = stacked_reshaped2.cpu().numpy()
stacked_array

In [None]:
last_array2 = reshaped_w2_try[-1].cpu().numpy()
last_array2

In [None]:
total2 = np.concatenate([stacked_array, last_array2])

In [None]:
total2.shape

In [None]:
total2

## Reconstruction

Now that we have reconstructed, let's try to see how it maps the different time series.

In [None]:
attack_w1_reco = pd.DataFrame(total)
attack_w1_reco

In [None]:
attack_w2_reco = pd.DataFrame(total2)
attack_w2_reco

In [None]:
attack_w2_reco.loc[:, 0]

In [None]:
plt.plot(np.arange(len(attack.loc[:, 0])), attack.loc[:, 0], label = "First feature") #predicted_df_test.meter_reading[:8784]
plt.plot(np.arange(len(attack_w1_reco.loc[:, 0])), attack_w1_reco.loc[:, 0], label = "w1_reconstruction")
#plt.plot(np.arange(len(attack_w2_reco.loc[:, 0])), attack_w2_reco.loc[:, 0], label = "w2_reconstruction")
plt.legend()
plt.show()

In [None]:
plt.plot(labels, marker = 'o')
plt.show()

In [None]:
plt.plot(attack.loc[:, 0], label = "First feature")
plt.legend()
plt.show()

Let's try my anomaly detection method.

In [None]:
data = {'attack_gt': attack.loc[:449915, 0].values, 'attack_w1_reco': attack_w1_reco.loc[:, 0].values} #, 'attack_w2_reco': attack_w2_reco.loc[:, 0].values

In [None]:
attack.loc[:449915, 0].values.shape

In [None]:
attack_w1_reco.shape

In [None]:
predicted_df_test = pd.DataFrame(data)

In [None]:
predicted_df_test

In [None]:
predicted_df_test['loss'] = np.abs(predicted_df_test['attack_gt'] - predicted_df_test['attack_w1_reco'])

In [None]:
predicted_df_test['loss2'] = np.abs(predicted_df_test['attack_gt'] - predicted_df_test['attack_w2_reco'])

In [None]:
predicted_df_test['relative_loss'] = np.abs((predicted_df_test['attack_w1_reco']-predicted_df_test['attack_gt'])/predicted_df_test['attack_w1_reco'])

In [None]:
predicted_df_test['relative_loss2'] = np.abs((predicted_df_test['attack_w2_reco']-predicted_df_test['attack_gt'])/predicted_df_test['attack_w2_reco'])

In [None]:
predicted_df_test

In [None]:
#calculate threshold on relative loss quartiles but only on test
test_loss = predicted_df_test.relative_loss.values
threshold = np.percentile(test_loss, 67)
predicted_df_test['threshold']= threshold

In [None]:
#calculate threshold on absolute loss quartiles but only on val, and in this case per 
test_loss = predicted_df_test.loss.values
threshold = np.percentile(test_loss, 90)
predicted_df_test['threshold']= threshold

In [None]:
val_loss = predicted_df_test.relative_loss2.values
threshold = np.percentile(val_loss, 67)
#(np.percentile(val_loss, 75)) + 1.5 *((np.percentile(val_loss, 75))-(np.percentile(val_loss, 25)))
predicted_df_test['threshold2']= threshold

In [None]:
#calculate threshold on absolute loss quartiles but only on val, and in this case per 
test_loss = predicted_df_test.loss2.values
threshold = np.percentile(test_loss, 67)
predicted_df_test['threshold2']= threshold

In [None]:
predicted_df_test['predicted_anomaly'] = predicted_df_test['loss'] > predicted_df_test['threshold'] #relative_loss
predicted_df_test['predicted_anomaly']=predicted_df_test['predicted_anomaly'].replace(False,0)
predicted_df_test['predicted_anomaly']=predicted_df_test['predicted_anomaly'].replace(True,1)

In [None]:
predicted_df_test['predicted_anomaly2'] = predicted_df_test['loss2'] > predicted_df_test['threshold2'] #relative_loss2
predicted_df_test['predicted_anomaly2']=predicted_df_test['predicted_anomaly2'].replace(False,0)
predicted_df_test['predicted_anomaly2']=predicted_df_test['predicted_anomaly2'].replace(True,1)

In [None]:
predicted_df_test.predicted_anomaly.sum() / len(predicted_df_test)

In [None]:
predicted_df_test.predicted_anomaly2.sum() / len(predicted_df_test)

In [None]:
from sklearn.metrics import classification_report

In [None]:
print(classification_report(labels[:-3], predicted_df_test['predicted_anomaly']))

In [None]:
print(classification_report(labels, predicted_df_test['predicted_anomaly2']))

In [None]:
roc_auc_score(labels[:-3], predicted_df_test['predicted_anomaly'])

In [None]:
roc_auc_score(labels, predicted_df_test['predicted_anomaly2'])

In [None]:
predicted_df_test['anomaly'] = labels[:-3]

In [None]:
predicted_df_test

In [None]:
predicted_anomalies = predicted_df_test.loc[predicted_df_test['predicted_anomaly'] == 1]
#predicted_anomalies2 = predicted_df_test.loc[predicted_df_test['predicted_anomaly2'] == 1]
true_anomalies = predicted_df_test.loc[predicted_df_test['anomaly'] == 1]

In [None]:
true_anomalies

In [None]:
predicted_anomalies

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=predicted_df_test.index, y=predicted_df_test.attack_gt, name='First feature'))
fig.add_trace(go.Scatter(x=predicted_df_test.index, y=predicted_df_test.attack_w1_reco, name='w1 reconstruction'))
#fig.add_trace(go.Scatter(x=predicted_df_test.index, y=predicted_df_test.attack_w2_reco, name='w2 reconstruction'))
fig.add_trace(go.Scatter(x=true_anomalies.index, y=true_anomalies.anomaly, mode='markers', marker=dict(color='forestgreen'), name='True_Anomaly'))
fig.add_trace(go.Scatter(x=predicted_anomalies.index, y=predicted_anomalies.predicted_anomaly, mode='markers', marker=dict(color='yellow'), name='W1_anomaly'))
#fig.add_trace(go.Scatter(x=predicted_anomalies2.index, y=predicted_anomalies2.anomaly, mode='markers', marker=dict(color='orange'), name='W2_Anomaly'))
fig.update_layout(showlegend=True)
fig.show()

In [None]:
plt.figure(figsize = (12, 6))
plt.plot(np.arange(len(predicted_df_test)), predicted_df_test.attack_gt, label = "First feature")
plt.plot(np.arange(len(predicted_df_test)), predicted_df_test.attack_w1_reco, label = "w1_reconstruction")
#plt.plot(np.arange(len(predicted_df_test)), predicted_df_test.attack_w2_reco, label = "w2_reconstruction")
#plt.plot(np.arange(len(true_anomalies)), true_anomalies.anomaly)
plt.scatter(true_anomalies.index,true_anomalies.anomaly, color = "red", label = "anomalies" )
plt.scatter(predicted_anomalies.index,predicted_anomalies.predicted_anomaly, color = "green", label = "Predicted anomalies" )
plt.legend()
plt.show()