In [1]:
import pickle

# libraries importing
import numpy as np
import pandas as pd
from tqdm import tqdm
import csv
import matplotlib.pyplot as plt
%matplotlib inline

#from sklearn.preprocessing import MinMaxScaler

from tensorflow import keras
from keras.models import Model, load_model
from keras.layers import Input, Dense, LSTM, RepeatVector, Lambda
from keras.callbacks import EarlyStopping
from keras import backend as K
from tensorflow.keras import losses
import tensorflow as tf
from tensorflow.python.framework.ops import disable_eager_execution
disable_eager_execution()

import warnings
warnings.filterwarnings("ignore")

c:\Users\gioel\AppData\Local\Programs\Python\Python310\lib\site-packages\numpy\.libs\libopenblas.EL2C6PLE4ZYW3ECEVIV3OXXGRN2NRFM2.gfortran-win_amd64.dll
c:\Users\gioel\AppData\Local\Programs\Python\Python310\lib\site-packages\numpy\.libs\libopenblas.FB5AE2TYXYH2IJRDKGDGQ3XBKLKTF43H.gfortran-win_amd64.dll


In [2]:
WINDOW_SIZE=40
def create_sequences(values, time_steps=WINDOW_SIZE):
    output = []
    for i in range(len(values) - time_steps + 1):
        output.append(values[i : (i + time_steps)])
    return np.stack(output)

In [3]:

latent_dim=64

def create_lstm_vae():
    
    x = Input(shape=(WINDOW_SIZE, 19))

    # encoding
    h = LSTM(128,return_sequences=True)(x)
    h = LSTM(64)(h)

    # VAE Z layer
    z_mean = Dense(latent_dim, name='mean')(h)
    z_log_sigma = Dense(latent_dim, name='var')(h)
    
    def sampling(args):
        z_mean, z_log_sigma = args
        epsilon = K.random_normal(shape=(1, latent_dim),
                                  mean=0., stddev=1.)
        return z_mean + z_log_sigma * epsilon

    z = Lambda(sampling, output_shape=(latent_dim,), name='z')([z_mean, z_log_sigma])

    #decoding
    decoder= RepeatVector(WINDOW_SIZE)(z)
    decoder= LSTM(64, return_sequences=True)(decoder)
    decoder= LSTM(128, return_sequences=True)(decoder)
    
    x_decoded_mean= Dense(19)(decoder)
    vae = Model(x, x_decoded_mean)

    # encoder, from inputs to latent space
    encoder = Model(x, [z_mean, z_log_sigma, z], name='encoder')
    #encoder.summary()

    #decoder, from latent space to output
    decoder_input = Input(shape=(latent_dim,))
    _h_decoded = RepeatVector(WINDOW_SIZE)(decoder_input)
    _h_decoded = LSTM(64, return_sequences=True)(_h_decoded)
    _h_decoded= LSTM(128, return_sequences=True)(_h_decoded)
    x_decoded= Dense(19)(_h_decoded)

    dencoder = Model(decoder_input, x_decoded, name='dencoder')
    #dencoder.summary()

    
    def vae_loss(x, x_decoded_mean):
        mse = losses.MeanSquaredError()
        xent_loss = mse(x, x_decoded_mean)
        kl_loss = - 0.5 * K.mean(1 + z_log_sigma - K.square(z_mean) - K.exp(z_log_sigma))
        loss = xent_loss + kl_loss

        return loss

    vae.compile(optimizer='rmsprop', loss=vae_loss)

    return vae,encoder

In [8]:
import pickle
with open(f'./DATA_SPLITTED/app_tot.pkl', 'rb') as f:
            DATA = pickle.load(f)
    
model,encoder=create_lstm_vae()

model.load_weights(filepath=f'MODEL_VAE/model_app_Win_{WINDOW_SIZE}_.h5')
encoder.load_weights(filepath=f'MODEL_VAE/encoder_app_Win_{WINDOW_SIZE}_.h5')

e=model.predict(DATA['X_test'])
score=np.mean(np.power(e-DATA['X_test'],2),axis=(1,2))

'''_,_,e1=encoder.predict(DATA['X_test'])

e2=model.predict(DATA['X_test'])
_,_,e2=encoder.predict(e2)

score=np.mean(np.power(e1-e2,2),axis=(1))'''

np.save('./score_analisi/VAE.npy',score,allow_pickle=True)


save results

In [11]:
import csv

with open(f'./DATA_SPLITTED/app_tot.pkl', 'rb') as f:
            DATA = pickle.load(f)
    
model,encoder=create_lstm_vae()

model.load_weights(filepath=f'MODEL_VAE/model_app_Win_{WINDOW_SIZE}_.h5')
encoder.load_weights(filepath=f'MODEL_VAE/encoder_app_Win_{WINDOW_SIZE}_.h5')

header=['th_factor','method','value','F1','precision','recall','TP','TN','FP','FN']

with open('VAE_reenc.csv', 'w', encoding='UTF8') as f:
    writer = csv.writer(f)

    writer.writerow(header)
    thresholding_factor=[0.5,1,1.5,2]
    for t in thresholding_factor:
        
        _,_,e1=encoder.predict(DATA['X_test'])

        e2=model.predict(DATA['X_test'])
        _,_,e2=encoder.predict(e2)

        score=np.mean(np.power(e1-e2,2),axis=(1))
        

        #IQR
        q1, q3 = np.percentile(score, 25), np.percentile(score, 75)
        iqr = q3 - q1
        IQR = q3 + t* iqr

        #MAD
        median = np.median(score)
        mad = 1.4826 * np.median(np.abs(score - median))
        MAD = median + t * mad

        #STD
        mean, std = np.mean(score), np.std(score)
        STD = mean + t * std

        method=[IQR,MAD,STD]
        
        for g in range(len(method)):
            TP=0
            TN=0
            FN=0
            FP=0

            for i in tqdm(range(1,11)):
                if(i!=7):
                
                            TEST=np.load(f'./OUTPUTS_ROOT/data/processed/spark_0_15s/spark_0_trace-scl_std/test{i}.npy',allow_pickle=True)

                            ANOMALY=np.load(f'./OUTPUTS_ROOT/data/processed/spark_0_15s/spark_0_trace-scl_std/y_test{i}.npy',allow_pickle=True)

                            with open(f'./OUTPUTS_ROOT/data/interim/spark_0_15s/test_info{i}.pkl', 'rb') as f:
                                TEST_info= pickle.load(f)

                            
                            for x in range(len(TEST_info)):
                                
                                X=create_sequences(TEST[x])

                                Y=model.predict(X)

                                _,_,X=encoder.predict(X)
                                _,_,Y=encoder.predict(Y)

                                error=np.mean(np.power(X-Y,2),axis=(1))
                                error=[error[l] if error[l]<2 else 2 for l in range(len(error))]

                                err=[]
                                tr=[]
                                for v in range(len(error)-39):
                                    if(ANOMALY[x][WINDOW_SIZE-1+v]>=1):
                                        err.append(error[v])
                                    else:
                                        tr.append(error[v])

                                outlier=error>method[g]

                                
                                le=len(outlier)

                                true_= ANOMALY[x][20:le+20]>=1
                                prediction_ = outlier[:].astype(int)==1

                                TP = TP+(true_ & prediction_).sum()   
                                TN = TN+(~true_ & ~prediction_).sum()  
                                FP = FP+(~true_ & prediction_).sum()    
                                FN = FN+(true_ & ~prediction_).sum()    

            PREC=TP / (TP + FP)
            REC = TP/ (TP+FN)
            f1=2 * PREC * REC/(PREC + REC)
            m=['IQR','MAD','STD']
            row=[t,m[g],method[g],f1,PREC,REC,TP,TN,FP,FN]
            writer.writerow(row)

100%|██████████| 10/10 [03:16<00:00, 19.70s/it]
100%|██████████| 10/10 [02:52<00:00, 17.21s/it]
100%|██████████| 10/10 [02:44<00:00, 16.45s/it]
100%|██████████| 10/10 [02:42<00:00, 16.29s/it]
100%|██████████| 10/10 [01:58<00:00, 11.83s/it]
100%|██████████| 10/10 [02:03<00:00, 12.36s/it]
100%|██████████| 10/10 [01:53<00:00, 11.35s/it]
100%|██████████| 10/10 [02:22<00:00, 14.27s/it]
100%|██████████| 10/10 [02:28<00:00, 14.90s/it]
100%|██████████| 10/10 [01:56<00:00, 11.62s/it]
100%|██████████| 10/10 [02:08<00:00, 12.84s/it]
100%|██████████| 10/10 [02:05<00:00, 12.52s/it]
