In [9]:
# Import the libraries
import tensorflow as tf
from tensorflow import keras
import numpy as np
import matplotlib.pyplot as plt  
import pandas as pd
from numpy import loadtxt
from tensorflow.keras.models import load_model,save_model
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Dropout, Activation,Flatten,Input
from tensorflow.keras import regularizers
from tensorflow.keras.optimizers import Adam,RMSprop
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
import tensorflow.keras.backend as K
import math,json,os,random,itertools
import time,traceback
from scipy import stats
random.seed(2)
#my scaler
import predict_utils
#read pre-trained scaler
from pickle import load
import tcn
from tcn import TCN 
from bokeh.plotting import figure, show
import json 

class Sampling(keras.layers.Layer):
    def call(self, inputs):
        mean, log_var = inputs
        return K.random_normal(tf.shape(log_var)) * K.exp(log_var / 2) + mean

#pd.set_option('mode.chained_assignment', None)

anomaly_threshold = 5.
tolerance = 20
detection_columns = ["Memory used  (%)",
        "CPU utilization (%) "]

try:
    # Load the scaler
    scaler = load(open('6v_minmax_scaler.pkl', 'rb'))
    print("load scaler")  
except:
    print("loading scaler error")
json_string="./Model/best_model/20220101-174647_bvae/model.json"
model_weight="./Model/best_model/20220101-174647_bvae/weights.h5"


with open(json_string, 'r') as json_file:
    json_savedModel= json_file.read()
#load the model architecture 
predict_model = tf.keras.models.model_from_json(json_savedModel,custom_objects={"TCN":TCN,"Sampling": Sampling})
predict_model.load_weights(model_weight)

#Read CSV 
def read_data(path,o_dtname="Datetime",c_dtname = 'datetime'):
    df = pd.read_csv(path, sep=',', 
                     parse_dates={c_dtname:[o_dtname]}, 
                     infer_datetime_format=True, 
                     low_memory=False, 
                     na_values=['nan','?'], 
                     index_col=c_dtname)
    
    print(df.isnull().sum())
    return df


#caculate mape
def mean_absolute_percentage_error(y_true, y_pred): 
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

#caculate and return rmse,mae,mape
def cal_score(y_real,y_hat):
    from sklearn.metrics import mean_squared_error
    from sklearn.metrics import mean_absolute_error,mean_squared_error
    import math

    MAEScore = mean_absolute_error(y_real,y_hat)
    MSEScore = mean_squared_error(y_real,y_hat)
    RMSEScore = math.sqrt(mean_squared_error(y_real,y_hat))
    

    return MAEScore,MSEScore,RMSEScore


#read test df
df = read_data("test_df.csv",o_dtname="dt",c_dtname = 'datetime')
print(df)

#split data into size=100 batch
detect_window_size = 50
df_list = []
gt_list = []
for i in range(0,df.shape[0],detect_window_size):
    
    if df.iloc[i:i+detect_window_size].shape[0] == detect_window_size:
        df_list.append(df.iloc[i:i+detect_window_size,:])
        if np.sum(df.iloc[i:i+detect_window_size,-1].values)>=1:
            gt_list.append(1)
        else:
            gt_list.append(0)
    else:
        print("tail")
        
        df_list.append(df.iloc[-detect_window_size:,:])
        if np.sum(df.iloc[-detect_window_size:,-1].values)>=1:
            gt_list.append(1)
        else:
            gt_list.append(0)
        break

print(*gt_list)


#split data into size=100 batch

#predict_count: a pointer, if pointer >=  len(list) then Waiting the new data
#window_size: capture time series features using sliding window
#df_list: variables that temporarily store real-time data  
predict_count = 0
window_size = predict_model.layers[0].input_shape[0][1]
error_sum_list=[]
failure_bitmap = []
rmse_list = []
mae_list = []
anomaly_batch = []
loss_list = []


#for each batch
for i,df_batch in enumerate(df_list):
    #print(df_batch.head())
    #for each windows
    print("{}th batch".format(i))
    anomaly_list = []
    for j in range(0,df_batch.shape[0]-window_size):
        pre_batch = np.expand_dims(df_list[predict_count].iloc[j:j+window_size,:-1].values,axis=0)
        #print(pre_batch)
        
        pre = predict_model.predict(pre_batch)
        pre[0] = scaler.inverse_transform(pre[0])
        pre_batch[0] = scaler.inverse_transform(pre_batch[0])
        
        mae,mse,rmse= cal_score(pre_batch[0,:,:],pre[0,:,:])
        loss_list.append([mae,mse,rmse])
        #print(mae)
        print(mae)
        if mae >=10:
            anomaly_list.append(1)
        else:
            anomaly_list.append(0)
        #os.system("cls")
        
    print(*anomaly_list)
    print(sum(anomaly_list))
    error_sum_list.append(sum(anomaly_list))
    predict_count+=1
    #print(sum(anomaly_list))


    if sum(anomaly_list)>=tolerance:
        anomaly_batch.append(1)
    else:
        anomaly_batch.append(0)
    #os.system("cls")


print(*anomaly_batch) 
print(*error_sum_list)  

result_dict = {
    "predict":anomaly_batch,
    "ground true":gt_list,
    "windows error":error_sum_list

}
loss_dict = {
    "MAE":np.array(loss_list)[:,0],
    "MSE":np.array(loss_list)[:,1],
    "RMSE":np.array(loss_list)[:,2]

}
res_df = pd.DataFrame(result_dict)
res_df.to_csv("vae_test_res.csv")

loss_df = pd.DataFrame(loss_dict)                 
loss_df.to_csv("vae_test_loss.csv")

load scaler
CPU utilization (%)    0
Memory used  (%)       0
C_anomaly_label        0
dtype: int64
                     CPU utilization (%)  Memory used  (%)  C_anomaly_label
datetime                                                                   
2021-10-25 12:00:00             0.619872          0.751620                0
2021-10-25 12:03:00             0.362755          0.350977                0
2021-10-25 12:06:00             0.409731          0.355691                0
2021-10-25 12:09:00             0.478531          0.399189                0
2021-10-25 12:12:00             0.375117          0.466420                0
...                                  ...               ...              ...
2021-10-27 00:45:00             0.360103          0.200533                0
2021-10-27 00:48:00             0.369827          0.234972                0
2021-10-27 00:51:00             0.402045          0.235262                0
2021-10-27 00:54:00             0.397626          0.268535      

KeyboardInterrupt: 

In [8]:
print(predict_model.layers[0].input_shape[0][1])

None
