In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import torch
import torch.nn as nn

import torch.utils.data as data_utils
from tqdm import tqdm
from utils import *
import pickle

c:\Users\gioel\AppData\Local\Programs\Python\Python310\lib\site-packages\numpy\.libs\libopenblas.EL2C6PLE4ZYW3ECEVIV3OXXGRN2NRFM2.gfortran-win_amd64.dll
c:\Users\gioel\AppData\Local\Programs\Python\Python310\lib\site-packages\numpy\.libs\libopenblas.FB5AE2TYXYH2IJRDKGDGQ3XBKLKTF43H.gfortran-win_amd64.dll


In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from utils_USAD import *
device = get_default_device()

class Encoder(nn.Module):
  def __init__(self, in_size, latent_size):
    super().__init__()
    
    self.linear1 = nn.Linear(in_size, int(in_size/2))
    self.linear2 = nn.Linear(int(in_size/2), int(in_size/4))
    self.linear3 = nn.Linear(int(in_size/4), latent_size)
    self.relu = nn.ReLU(True)
        
  def forward(self, w):
    out = self.linear1(w)
    out = self.relu(out)
    out = self.linear2(out)
    out = self.relu(out)
    out = self.linear3(out)
    z = self.relu(out)

    return z
    
class Decoder(nn.Module):
  def __init__(self, latent_size, out_size):
    super().__init__()

    self.linear1 = nn.Linear(latent_size, int(out_size/4))
    self.linear2 = nn.Linear(int(out_size/4), int(out_size/2))
    self.linear3 = nn.Linear(int(out_size/2), out_size)
    self.relu = nn.ReLU(True)
    self.sigmoid = nn.Sigmoid()

        
  def forward(self, z):

    out = self.linear1(z)
    out = self.relu(out)
    out = self.linear2(out)
    out = self.relu(out)
    out = self.linear3(out)
    w = self.sigmoid(out)
    return w
    
class UsadModel(nn.Module):
  def __init__(self, w_size, z_size):
    super().__init__()
    self.encoder = Encoder(w_size, z_size)
    self.decoder1 = Decoder(z_size, w_size)
    self.decoder2 = Decoder(z_size, w_size)
  
  def training_step(self, batch, n):
    z = self.encoder(batch)
    w1 = self.decoder1(z)
    w2 = self.decoder2(z)
    w3 = self.decoder2(self.encoder(w1))
    loss1 = 1/n*torch.mean((batch-w1)**2)+(1-1/n)*torch.mean((batch-w3)**2)
    loss2 = 1/n*torch.mean((batch-w2)**2)-(1-1/n)*torch.mean((batch-w3)**2)
    return loss1,loss2

  def validation_step(self, batch, n):
    z = self.encoder(batch)
    w1 = self.decoder1(z)
    w2 = self.decoder2(z)
    w3 = self.decoder2(self.encoder(w1))
    loss1 = 1/n*torch.mean((batch-w1)**2)+(1-1/n)*torch.mean((batch-w3)**2)
    loss2 = 1/n*torch.mean((batch-w2)**2)-(1-1/n)*torch.mean((batch-w3)**2)
    return {'val_loss1': loss1, 'val_loss2': loss2}
        
  def validation_epoch_end(self, outputs):
    batch_losses1 = [x['val_loss1'] for x in outputs]
    epoch_loss1 = torch.stack(batch_losses1).mean()
    batch_losses2 = [x['val_loss2'] for x in outputs]
    epoch_loss2 = torch.stack(batch_losses2).mean()
    return {'val_loss1': epoch_loss1.item(), 'val_loss2': epoch_loss2.item()}
    
  def epoch_end(self, epoch, result):
    print("Epoch [{}], val_loss1: {:.4f}, val_loss2: {:.4f}".format(epoch, result['val_loss1'], result['val_loss2']))
    
def evaluate(model, val_loader, n):
    outputs = [model.validation_step(to_device(batch,device), n) for [batch] in val_loader]
    return model.validation_epoch_end(outputs)

def training(epochs, model, train_loader, val_loader, opt_func=torch.optim.Adam):
    history = []


    optimizer1 = opt_func(list(model.encoder.parameters())+list(model.decoder1.parameters()))
    optimizer2 = opt_func(list(model.encoder.parameters())+list(model.decoder2.parameters()))
    for epoch in range(epochs):
        for [batch] in train_loader:
            batch=to_device(batch,device)
            
            #Train AE1
            loss1,loss2 = model.training_step(batch,epoch+1)
            loss1.backward()
            optimizer1.step()
            optimizer1.zero_grad()
            
            
            #Train AE2
            loss1,loss2 = model.training_step(batch,epoch+1)
            loss2.backward()
            optimizer2.step()
            optimizer2.zero_grad()
            
            
        result = evaluate(model, val_loader, epoch+1)
        model.epoch_end(epoch, result)
        history.append(result)
    return history
    
def testing(model, test_loader, alpha=.5, beta=.5):
    results=[]
    for [batch] in test_loader:
        batch=to_device(batch,device)
        w1=model.decoder1(model.encoder(batch))
        w2=model.decoder2(model.encoder(w1))
        results.append(alpha*torch.mean((batch-w1)**2,axis=1)+beta*torch.mean((batch-w2)**2,axis=1))
        #results.append(alpha*(batch-w1)**2+beta*(batch-w2)**2)
    return results

ModuleNotFoundError: No module named 'utils_USAD'

In [None]:
WINDOW_SIZE=40
def create_sequences(values, time_steps=WINDOW_SIZE):
    output = []
    for i in range(len(values) - time_steps + 1):
        output.append(values[i : (i + time_steps)])
    return np.stack(output)

In [84]:



BATCH_SIZE =  64
N_EPOCHS = 50
hidden_size = 100


with open(f'.././DATA_SPLITTED/app_tot.pkl', 'rb') as f:
            DATA= pickle.load(f)


X_train=DATA['X_train']
X_val=DATA['X_val']
X_test=DATA['X_test']

w_size=X_train.shape[1]*X_train.shape[2]
z_size=hidden_size

train_loader = torch.utils.data.DataLoader(data_utils.TensorDataset(
        torch.from_numpy(X_train).float().reshape(([X_train.shape[0],w_size]))
    ) , batch_size=BATCH_SIZE, shuffle=False, num_workers=0)

val_loader = torch.utils.data.DataLoader(data_utils.TensorDataset(
        torch.from_numpy(X_val).float().reshape(([X_val.shape[0],w_size]))
    ) , batch_size=BATCH_SIZE, shuffle=False, num_workers=0)

test_loader = torch.utils.data.DataLoader(data_utils.TensorDataset(
        torch.from_numpy(X_test).float().reshape(([X_test.shape[0],w_size]))
    ) , batch_size=BATCH_SIZE, shuffle=False, num_workers=0)

model = UsadModel(w_size, z_size)
model = to_device(model,device)
print(model)
history = training(N_EPOCHS,model,train_loader,val_loader)
#history = training(N_EPOCHS,model,train_loader,train_loader)



UsadModel(
  (encoder): Encoder(
    (linear1): Linear(in_features=760, out_features=380, bias=True)
    (linear2): Linear(in_features=380, out_features=190, bias=True)
    (linear3): Linear(in_features=190, out_features=100, bias=True)
    (relu): ReLU(inplace=True)
  )
  (decoder1): Decoder(
    (linear1): Linear(in_features=100, out_features=190, bias=True)
    (linear2): Linear(in_features=190, out_features=380, bias=True)
    (linear3): Linear(in_features=380, out_features=760, bias=True)
    (relu): ReLU(inplace=True)
    (sigmoid): Sigmoid()
  )
  (decoder2): Decoder(
    (linear1): Linear(in_features=100, out_features=190, bias=True)
    (linear2): Linear(in_features=190, out_features=380, bias=True)
    (linear3): Linear(in_features=380, out_features=760, bias=True)
    (relu): ReLU(inplace=True)
    (sigmoid): Sigmoid()
  )
)
Epoch [0], val_loss1: 0.8662, val_loss2: 0.8662
Epoch [1], val_loss1: 0.8662, val_loss2: 0.0000
Epoch [2], val_loss1: 0.8662, val_loss2: -0.2887
Epoch [

In [89]:
test_loader = torch.utils.data.DataLoader(data_utils.TensorDataset(
        torch.from_numpy(X_test).float().reshape(([X_test.shape[0],w_size]))
    ) , batch_size=BATCH_SIZE, shuffle=False, num_workers=0)
results=testing(model,test_loader)

y_pred=np.concatenate([torch.stack(results[:-1]).flatten().detach().cpu().numpy(),
                                results[-1].flatten().detach().cpu().numpy()])



CSV

In [90]:
data=DATA['X_test'][:,:,:].mean(axis=(1,2))
score=np.power(y_pred-data,2)

In [96]:
import csv


header=['th_factor','method','value','F1','precision','recall','TP','TN','FP','FN']

with open('USAD.csv', 'w', encoding='UTF8') as f:
    writer = csv.writer(f)

    writer.writerow(header)
    thresholding_factor=[0.5,1,1.5,2]
    for t in thresholding_factor:
        #IQR
        q1, q3 = np.percentile(score, 25), np.percentile(score, 75)
        iqr = q3 - q1
        IQR = q3 + t* iqr

        #MAD
        median = np.median(score)
        mad = 1.4826 * np.median(np.abs(score - median))
        MAD = median + t * mad

        #STD
        mean, std = np.mean(score), np.std(score)
        STD = mean + t * std

        method=[IQR,MAD,STD]
        for g in range(len(method)):
            TP=0
            TN=0
            FN=0
            FP=0

            for i in tqdm(range(1,11)):
                if(i!=7):
                
                    TEST=np.load(f'.././OUTPUTS_ROOT/data/processed/spark_0_15s/spark_0_trace-scl_std/test{i}.npy',allow_pickle=True)

                    ANOMALY=np.load(f'.././OUTPUTS_ROOT/data/processed/spark_0_15s/spark_0_trace-scl_std/y_test{i}.npy',allow_pickle=True)

                    with open(f'.././OUTPUTS_ROOT/data/interim/spark_0_15s/test_info{i}.pkl', 'rb') as f:
                        TEST_info= pickle.load(f)

                    for x in range(len(TEST_info)):

                        X=create_sequences(TEST[x])

                        test_loader = torch.utils.data.DataLoader(data_utils.TensorDataset(
                                torch.from_numpy(X).float().reshape(([X.shape[0],w_size]))
                            ) , batch_size=BATCH_SIZE, shuffle=False, num_workers=0)
                        
                        results=testing(model,test_loader)

                        Y=np.concatenate([torch.stack(results[:-1]).flatten().detach().cpu().numpy(),
                                            results[-1].flatten().detach().cpu().numpy()])
                        
                        error=np.power(X[:,:,:].mean(axis=(1,2))-Y,2)
                        error=[error[l] if error[l]<3 else 2 for l in range(len(error))]

                        outlier=error>method[g]

                        le=len(outlier)

                        true_= ANOMALY[x][20:20+le]>=1
                        prediction_ = outlier[:].astype(int)==1
                        #plt.plot(true_.astype(int)/100)
                        TP = TP+(true_ & prediction_).sum()   
                        TN = TN+(~true_ & ~prediction_).sum()  
                        FP = FP+(~true_ & prediction_).sum()    
                        FN = FN+(true_ & ~prediction_).sum()    

            PREC=TP / (TP + FP)
            REC = TP/ (TP+FN)
            f1=2 * PREC * REC/(PREC + REC)
            m=['IQR','MAD','STD']
            row=[t,m[g],method[g],f1,PREC,REC,TP,TN,FP,FN]
            writer.writerow(row)
            

PermissionError: [Errno 13] Permission denied: 'USAD.csv'