# Model 4 (Quelle aus Github)

In [32]:
from math import ceil

import numpy as np
import tensorflow as tf
from scipy.stats import multivariate_normal

In [2]:
def tf_namespace(namespace):
    def wrapper(f):
        def wrapped_f(*args, **kwargs):
            with tf.name_scope(namespace):
                return f(*args, **kwargs)

        return wrapped_f

    return wrapper

In [30]:
class VAE:

    def __init__(self, input_shape, encode_sizes, latent_size, decode_sizes=None, mu_prior=None, sigma_prior=None,
                 lr=10e-4,  momentum=0.9, save_model=True):
        self.encode_sizes = encode_sizes
        self.latent_size = latent_size
        self.decode_sizes = decode_sizes or encode_sizes[::-1]
        self.mu_prior = mu_prior or np.zeros([latent_size], dtype='float32')
        self.sigma_prior = sigma_prior or np.ones([latent_size], 'float32')
        self.lr = lr
        self.momentum = momentum
        self.input_shape = input_shape
        self.save_model = save_model
        self._build_graph(input_shape, latent_size)

    def _build_graph(self, input_shape, latent_size):
        self.graph = tf.Graph()
        with self.graph.as_default():
            self._create_placeholders(input_shape)
            self._create_encoder(self.X)
            self._create_latent_distribution(self.encoder, latent_size)
            self._create_decoder(self.z)
            self.loss = - self.elbo(self.X, self.decoder, self.mu, self.log_sigma_square, self.sigma_square,
                                    tf.constant(self.mu_prior), tf.constant(self.sigma_prior))
            self.opt = tf.train.AdamOptimizer(self.lr, self.momentum)
            self.opt_op = self.opt.minimize(self.loss)
            self.session = tf.InteractiveSession(graph=self.graph)
        writer = tf.summary.FileWriter(logdir='logdir', graph=self.graph)
        writer.flush()

    @property
    def k_init(self):
        return {'kernel_initializer': tf.glorot_uniform_initializer()}

    def elbo(self, X_true, X_pred, mu, log_sigma, sigma, mu_prior, sigma_prior):
        epsilon = tf.constant(0.000001)
        self.mae = tf.losses.absolute_difference(X_true, X_pred, reduction=tf.losses.Reduction.NONE)
        self.mae_sum = tf.reduce_sum(self.mae, axis=1)
        log_sigma_prior = tf.log(sigma_prior + epsilon)
        mu_diff = mu - mu_prior
        self.kl = log_sigma_prior - log_sigma - 1 + (sigma + tf.multiply(mu_diff, mu_diff)) / sigma_prior
        self.kl_sum = tf.reduce_sum(self.kl, axis=1)
        return tf.reduce_mean(- self.mae_sum - self.kl_sum)

    @tf_namespace('placeholders')
    def _create_placeholders(self, input_shape):
        self.X = tf.placeholder(tf.float32, shape=[None, *input_shape], name='X')

    @tf_namespace('encoder')
    def _create_encoder(self, X):
        self.encode_layers = []
        self.encoder = X
        for i, conv_size in enumerate(self.encode_sizes):
            self.encoder = tf.layers.dense(self.encoder, conv_size, **self.k_init,
                                           activation=tf.nn.relu, name=f'encoder_{i + 1}')
            self.encode_layers.append(self.encoder)
            setattr(self, f'encoder_{i + 1}', self.encoder)

    @tf_namespace('latent')
    def _create_latent_distribution(self, encoder, latent_dim):
        self.mu = tf.layers.dense(encoder, latent_dim, **self.k_init, name='mu')
        self.log_sigma_square = tf.layers.dense(encoder, latent_dim,
                                                **self.k_init, name='log_sigma_square')
        self.sigma_square = tf.exp(self.log_sigma_square, 'sigma_square')
        self.z = tf.add(self.mu, self.sigma_square * tf.random.normal(tf.shape(self.sigma_square)), 'z')

    @tf_namespace('decoder')
    def _create_decoder(self, z):
        self.decoder = z
        self.decode_layers = []
        for i, lsize in enumerate(self.decode_sizes):
            self.decoder = tf.layers.dense(self.decoder, lsize, **self.k_init,
                                           activation=tf.nn.relu, name=f'decoder_{i + 1}')
            setattr(self, f'decoder_{i + 1}', self.decoder)
            self.decode_layers.append(self.decoder)
            if i == len(self.decode_sizes) - 1:
                self.mu_post = tf.layers.dense(self.decoder, self.input_shape[0], name='mu_posterior')
                self.log_sigma_post = tf.layers.dense(self.decoder, self.input_shape[0])
                self.sigma_post = tf.exp(self.log_sigma_post, 'sigma_square_posterior')
                self.decoder = tf.add(self.mu_post,
                                      self.sigma_post * tf.random.normal((self.input_shape[0],), name='eps_post'),
                                      name='decoder_output')
                setattr(self, f'decoder_{i + 2}', self.decoder)
                self.decode_layers.append(self.decoder)
        return self.decoder

    @property
    def layers(self):
        return [(f'encoder_{i}', getattr(self, f'encoder_{i}')) for i in range(1, len(self.encode_layers) + 1)] + \
               [('mu', self.mu), ('sigma', self.log_sigma_square), ('z', self.z)] + \
               [(f'decoder_{i}', getattr(self, f'decoder_{i}')) for i in range(1, len(self.decode_layers) + 1)]

    def fit(self, X, epochs, batch_size, print_every=50, save_every_epochs=5, verbose=True):
        n_batch = ceil(X.shape[0] / batch_size)
        if self.save_model:
            saver = tf.train.Saver()
        self.session.run(tf.global_variables_initializer())
        for epoch in range(1, epochs + 1):
            np.random.shuffle(X)
            acc_loss = 0
            counter = 0
            for i in range(n_batch):
                slice_batch = slice(i * batch_size, (i + 1) * batch_size) if i != n_batch - 1 else slice(
                    i * batch_size,
                    None)
                X_batch = X[slice_batch, :]
                batch_loss, _ = self.session.run([self.loss, self.opt_op], {self.X: X_batch})
                acc_loss += batch_loss
                if verbose and counter % print_every == 0:
                    print(f" Epoch {epoch} - batch {i} - neg_ELBO = {batch_loss}")
                counter += 1
            if verbose:
                print(f'\nEpoch {epoch} - Avg loss = {acc_loss / n_batch}')
                print('\n' + ('-' * 70))
            if self.save_model and (epoch+1) % save_every_epochs == 0:
                saver.save(self.session, "ckpts/ad_vae.ckpt")

    def generate(self, n=1, mu_prior=None, sigma_prior=None):
        """
        Generate new examples sampling from the latent distribution
        :param n: number of examples to generate
        :param mu_prior:
        :param sigma_prior:
        :return: a matrix of size [n, p] where p is the number of variables of X_train
        """
        if mu_prior is None:
            mu_prior = self.mu_prior
        if sigma_prior is None:
            sigma_prior = self.sigma_prior
        z = np.random.multivariate_normal(mu_prior, np.diag(sigma_prior), [n])
        return self.session.run(self.decoder, feed_dict={self.z: z})

    def reconstruct(self, X):
        return self.session.run(self.decoder, feed_dict={self.X: X})

    def reconstructed_probability(self, X, L=100):
        reconstructed_prob = np.zeros((X.shape[0],), dtype='float32')
        mu_hat, sigma_hat = self.session.run([self.mu_post, self.sigma_post], {self.X: X})
        for l in range(L):
            mu_hat = mu_hat.reshape(X.shape)
            sigma_hat = sigma_hat.reshape(X.shape) + 0.00001
            for i in range(X.shape[0]):
                p_l = multivariate_normal.pdf(X[i, :], mu_hat[i, :], np.diag(sigma_hat[i, :]))
                reconstructed_prob[i] += p_l
        reconstructed_prob /= L
        return reconstructed_prob

    def is_outlier(self, X, L=100, alpha=0.05):
        p_hat = self.reconstructed_probability(X, L)
        return p_hat < alpha

    def open(self):
        if not hasattr(self, 'session') or self.session is None:
            if self.graph is None:
                self._build_graph(self.input_shape, self.latent_size)
            else:
                self.session = tf.InteractiveSession(graph=self.graph)

    def close(self):
        if hasattr(VAE, 'session') and VAE.session is not None:
            VAE.session.close()
            VAE.session = None

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.close()

    def __delete__(self, instance):
        self.close()

    def __setattr__(self, key, value):
        if key == 'session':
            if hasattr(self, 'session') and self.session is not None:
                self.close()
            VAE.session = value
        else:
            self.__dict__[key] = value

    def __delattr__(self, item):
        if item == 'session':
            self.close()
            del VAE.__dict__['session']
        else:
            del self.__dict__[item]

    def __enter__(self):
        self.open()

# Data Input

In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import ipywidgets as widgets
from IPython.display import clear_output

import sys
import h5py
from pathlib import Path

In [5]:
PATH_LIB = r"E:\ningze\process_analysis"
sys.path.append(PATH_LIB)
import process_analysis as pa

In [6]:
%matplotlib widget

In [7]:
from process_analysis.io.live_hdf import File as HdfFile
from process_analysis.io.live_hdf import Group as HdfGroup
from process_analysis.io.live_hdf import Dataset as HdfDataset

In [8]:
PATH_DATA = "E:/ningze/data.h5"
KEY_CONST = '3_time_series/0/constants'
KEY_TS = '3_time_series/0/time_series'

In [9]:
f = HdfFile(PATH_DATA)

In [10]:
df_const = f[KEY_CONST].value.set_index('index')
df_const.index.name = 'segment_id'
df_const.head()

Unnamed: 0_level_0,experiment,segment,TSp_ActSpeed,notch,io_label
segment_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,0,0,2386.593105,0,True
1,0,1,2386.593497,0,True
2,0,2,2386.587616,0,True
3,0,3,2386.595233,0,True
4,0,4,2386.601852,0,True


In [11]:
df_const[df_const['io_label']==False]

Unnamed: 0_level_0,experiment,segment,TSp_ActSpeed,notch,io_label
segment_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
126,1,36,2983.5183,6,False
127,1,37,2983.505328,6,False
128,1,38,2983.506711,6,False
129,1,39,2983.51088,6,False
130,1,40,2983.508447,6,False
131,1,41,2983.512393,6,False
132,1,42,2983.506904,7,False
133,1,43,2983.518411,7,False
134,1,44,2983.518,7,False
135,1,45,2983.5172,7,False


In [12]:
dfs = dict()
for e in f[KEY_TS]:
    if isinstance(e,HdfDataset):
        dfs[int(e.name)]=e.value.set_index('time')

In [13]:
plt.close('all')
fig, ax = plt.subplots()

def mk_plot(val):  
    seg_id = df_const[(df_const['experiment']==sli_exp.value) & (df_const['segment']==sli_seg.value)].iloc[0].name
    
    lbl.value=str(df_const.loc[seg_id]['io_label'])
    
    
    ax.clear()
    ax.plot(dfs[seg_id]['curr_norm'])
    
#Controls:
lbl = widgets.Label('Empty')
sli_exp = widgets.IntSlider(min=min(df_const['experiment']),max=max(df_const['experiment']))
sli_exp.observe(mk_plot,'value')

sli_seg = widgets.IntSlider(min=min(df_const['segment']),max=max(df_const['segment']))
sli_seg.observe(mk_plot,'value')
io = widgets.HBox([widgets.VBox([widgets.Label('Experiment'),sli_exp]),widgets.VBox([widgets.Label('Segment'),sli_seg]),widgets.VBox([widgets.Label('io_label'),lbl])])

mk_plot(None)
io

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

HBox(children=(VBox(children=(Label(value='Experiment'), IntSlider(value=0, max=2))), VBox(children=(Label(val…

In [14]:
data_x_good = []
data_y_good = []
data_x_bad = []
data_y_bad = []

for i,k in enumerate(dfs.keys()):
    data = pa.resample(dfs[k]['curr_norm'],0.1) #Jonas: Neues Notebook: curr_norm
    
    if df_const['io_label'].loc[k] == True:
        data_x_good.append(data)
        data_y_good.append(df_const['io_label'].loc[k])
    else:
        data_x_bad.append(data)
        data_y_bad.append(df_const['io_label'].loc[k])

In [15]:
TRAIN_X = data_x_good[:200]
TRAIN_Y = data_y_good[:200]

VALID_X = data_x_good[180:200]
VALID_Y = data_y_good[180:200]

TEST_X = data_x_good[200:]+data_x_bad
TEST_Y = data_y_good[200:]+data_y_bad

In [16]:
plt.close('all')

fig, ax = plt.subplots()

def on_change(value):
    ax.clear()
    ax.plot(TRAIN_X[value.new])
    with out:
        clear_output()
        print(TRAIN_Y[value.new])
    

slider = widgets.IntSlider(min=0,max=len(TRAIN_X))
slider.observe(on_change,'value')
out = widgets.Output()


widgets.HBox([slider,out])

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

HBox(children=(IntSlider(value=0, max=200), Output()))

In [17]:
import keras

def Resampling_length(x:list):
    
    neu_list=[]
    
    maxlen=len(max(x,key=len))
    
    maxlen=166
    
    for i in range (len(x)) :
        je_len=len(x[i])
        factor=round(je_len/maxlen/10,3)
        
        if je_len<maxlen:
            re_array=pa.resample(x[i],factor)
            neu_list.append(re_array)
       
        else:
            neu_list.append(x[i])
    
    neu_list=keras.preprocessing.sequence.pad_sequences(neu_list, maxlen=maxlen, dtype='float32', padding='pre', truncating='pre', value=0.0)
    #print(type(neu_list))
    return neu_list              

In [18]:
TRAIN_X_neu=Resampling_length(TRAIN_X)


VALID_X_neu=Resampling_length(VALID_X)


TEST_X_neu=Resampling_length(TEST_X)

In [19]:
plt.close('all')

fig, ax = plt.subplots()

def on_change(value):
    ax.clear()
    ax.plot(VALID_X_neu[value.new])
    with out:
        clear_output()
        print(VALID_Y[value.new])
    

slider = widgets.IntSlider(min=0,max=len(VALID_X_neu))
slider.observe(on_change,'value')
out = widgets.Output()


widgets.HBox([slider,out])

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

HBox(children=(IntSlider(value=0, max=20), Output()))

In [20]:
TRAIN_X=pd.DataFrame(TRAIN_X_neu)  #Dataframe entfalten
TEST_X=pd.DataFrame(TEST_X_neu)
VALID_X=pd.DataFrame(VALID_X_neu)
# TRAIN_X.fillna(0,inplace = True) #NaN Fill
VALID_X.fillna(0,inplace=True)
TEST_X.fillna(0,inplace=True)
TRAIN_X=np.array(TRAIN_X,dtype="object")
TEST_X=np.array(TEST_X,dtype="object")
VALID_X=np.array(VALID_X,dtype="object")

In [21]:
data_valid=pd.DataFrame(VALID_X)
data_valid.shape

(20, 166)

In [22]:
from sklearn.preprocessing import Normalizer
normalizer=Normalizer(norm='l2')

data_train=normalizer.transform(TRAIN_X)
data_valid=normalizer.transform(data_valid)
data_test=normalizer.transform(TEST_X)

In [23]:
plt.close('all')

fig, ax = plt.subplots()

def on_change(value):
    ax.clear()
    ax.plot(data_train[value.new])
    with out:
        clear_output()
        print(TRAIN_Y[value.new])
    

slider = widgets.IntSlider(min=0,max=len(data_train))
slider.observe(on_change,'value')
out = widgets.Output()


widgets.HBox([slider,out])

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

HBox(children=(IntSlider(value=0, max=200), Output()))

In [24]:
data_train.shape[1]

166

In [45]:
data_test.shape

(70, 166)

# Parameter erstellen 

In [37]:
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior() 

In [53]:
latent_size = 16
encoders_sizes = np.linspace(latent_size, data_train.shape[1], 64).astype('int')
vae = VAE((data_train.shape[1],), encode_sizes=encoders_sizes, latent_size=latent_size, lr=0.0005)



In [36]:
history=vae.fit(data_train, epochs=100, batch_size=1)
p_x = vae.reconstructed_probability(data_train)
np.save('P_x', p_x)



 Epoch 1 - batch 0 - neg_ELBO = 136.18426513671875
 Epoch 1 - batch 50 - neg_ELBO = 107.23762512207031
 Epoch 1 - batch 100 - neg_ELBO = 74.89546203613281
 Epoch 1 - batch 150 - neg_ELBO = 14.104019165039062

Epoch 1 - Avg loss = 69.95962010622024

----------------------------------------------------------------------
 Epoch 2 - batch 0 - neg_ELBO = 4.3364715576171875
 Epoch 2 - batch 50 - neg_ELBO = 4.104082107543945
 Epoch 2 - batch 100 - neg_ELBO = 3.3016879558563232
 Epoch 2 - batch 150 - neg_ELBO = 2.953464984893799

Epoch 2 - Avg loss = 3.266316763162613

----------------------------------------------------------------------
 Epoch 3 - batch 0 - neg_ELBO = 2.9889235496520996
 Epoch 3 - batch 50 - neg_ELBO = 2.8736207485198975
 Epoch 3 - batch 100 - neg_ELBO = 2.3951451778411865
 Epoch 3 - batch 150 - neg_ELBO = 2.3085007667541504

Epoch 3 - Avg loss = 2.420292662382126

----------------------------------------------------------------------
 Epoch 4 - batch 0 - neg_ELBO = 2.177148

 Epoch 26 - batch 100 - neg_ELBO = 1.5238009691238403
 Epoch 26 - batch 150 - neg_ELBO = 1.1002066135406494

Epoch 26 - Avg loss = 1.0322312223911285

----------------------------------------------------------------------
 Epoch 27 - batch 0 - neg_ELBO = 1.2031705379486084
 Epoch 27 - batch 50 - neg_ELBO = 1.494158387184143
 Epoch 27 - batch 100 - neg_ELBO = 1.1084623336791992
 Epoch 27 - batch 150 - neg_ELBO = 0.9042102694511414

Epoch 27 - Avg loss = 1.016342033445835

----------------------------------------------------------------------
 Epoch 28 - batch 0 - neg_ELBO = 0.8246552348136902
 Epoch 28 - batch 50 - neg_ELBO = 0.8920706510543823
 Epoch 28 - batch 100 - neg_ELBO = 1.5128121376037598
 Epoch 28 - batch 150 - neg_ELBO = 0.8926951289176941

Epoch 28 - Avg loss = 1.008898805975914

----------------------------------------------------------------------
 Epoch 29 - batch 0 - neg_ELBO = 0.9851495027542114
 Epoch 29 - batch 50 - neg_ELBO = 0.9261157512664795
 Epoch 29 - batch 100 

 Epoch 51 - batch 150 - neg_ELBO = 0.8635830879211426

Epoch 51 - Avg loss = 0.7750945723056794

----------------------------------------------------------------------
 Epoch 52 - batch 0 - neg_ELBO = 0.7884067296981812
 Epoch 52 - batch 50 - neg_ELBO = 0.7020179033279419
 Epoch 52 - batch 100 - neg_ELBO = 0.8674929141998291
 Epoch 52 - batch 150 - neg_ELBO = 0.7125254273414612

Epoch 52 - Avg loss = 0.7790319967269898

----------------------------------------------------------------------
 Epoch 53 - batch 0 - neg_ELBO = 0.7073894143104553
 Epoch 53 - batch 50 - neg_ELBO = 0.718885600566864
 Epoch 53 - batch 100 - neg_ELBO = 0.832739531993866
 Epoch 53 - batch 150 - neg_ELBO = 0.5579002499580383

Epoch 53 - Avg loss = 0.767430075109005

----------------------------------------------------------------------
 Epoch 54 - batch 0 - neg_ELBO = 0.5914533138275146
 Epoch 54 - batch 50 - neg_ELBO = 0.5586820840835571
 Epoch 54 - batch 100 - neg_ELBO = 0.7456687688827515
 Epoch 54 - batch 150 


Epoch 76 - Avg loss = 0.6805380128324032

----------------------------------------------------------------------
 Epoch 77 - batch 0 - neg_ELBO = 1.0740811824798584
 Epoch 77 - batch 50 - neg_ELBO = 0.5371919274330139
 Epoch 77 - batch 100 - neg_ELBO = 0.6009485125541687
 Epoch 77 - batch 150 - neg_ELBO = 0.5125572681427002

Epoch 77 - Avg loss = 0.6686445070803165

----------------------------------------------------------------------
 Epoch 78 - batch 0 - neg_ELBO = 0.7537063360214233
 Epoch 78 - batch 50 - neg_ELBO = 1.0419402122497559
 Epoch 78 - batch 100 - neg_ELBO = 0.6730476021766663
 Epoch 78 - batch 150 - neg_ELBO = 0.7009143233299255

Epoch 78 - Avg loss = 0.6829746857285499

----------------------------------------------------------------------
 Epoch 79 - batch 0 - neg_ELBO = 0.5128962993621826
 Epoch 79 - batch 50 - neg_ELBO = 0.909134030342102
 Epoch 79 - batch 100 - neg_ELBO = 0.5785324573516846
 Epoch 79 - batch 150 - neg_ELBO = 0.6191568970680237

Epoch 79 - Avg loss

In [29]:
# latent_size = 8
# encoders_sizes = np.linspace(data_train.shape[1], latent_size, 7).astype('int')[1:-1]
# with VAE((data_train.shape[1],), encode_sizes=encoders_sizes, latent_size=latent_size, lr=0.00001) as vae:
#         vae.fit(data_train, epochs=200, batch_size=256)
#         p_x = vae.reconstructed_probability(data_train)
# np.save('P_x', p_x)

In [125]:
# VAE.fit(acc_loss)

In [124]:
# plt.figure()
# plt.plot(history.acc_loss['Avg loss'],label="Training  Loss")
# #plt.plot(history.history["val_loss"],label="Validation Loss")
# plt.legend()

In [38]:
encoders_sizes.shape


(64,)

In [39]:
type(p_x)


numpy.ndarray

In [68]:
px=p_x.reshape(200,1)

In [69]:
px.shape

(200, 1)

In [41]:
p_x

array([inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf,
       inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf,
       inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf,
       inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf,
       inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf,
       inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf,
       inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf,
       inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf,
       inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf,
       inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf,
       inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf,
       inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf,
       inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf,
       inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, in

In [42]:
out=vae.is_outlier

In [44]:
type(out)

method

In [56]:
test_pred=history.is_outlier(data_test)

AttributeError: 'NoneType' object has no attribute 'is_outlier'