> Copyright 2022 University of Luxembourg
> 
> Licensed under the Apache License, Version 2.0 (the "License");  
> you may not use this file except in compliance with the License.  
> You may obtain a copy of the License at  
>
>    https://www.apache.org/licenses/LICENSE-2.0
>
> Unless required by applicable law or agreed to in writing, software  
> distributed under the License is distributed on an "AS IS" BASIS,  
> WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
> See the License for the specific language governing permissions and  
> limitations under the License.  
>
***

Author: Andrzej Mizera (andrzej.mizera@uni.lu)

***

# Auxiliary functions used by the AtMonSat anomaly detection algorithm

In [None]:
from scipy import interpolate

---
### For the computation of the Interpolated Iterations Since Last Change (IISLC) features.

In [None]:
def getInterpolationValues(start_value,num_iterations):
    flin = interpolate.interp1d([0,num_iterations], [start_value,0])
    
    if (ISLC_implementation == 'uint32'):
        return flin(np.arange(1, num_iterations, 1)).astype('uint32').astype('float32')

    return flin(np.arange(1, num_iterations, 1)).astype('float32')

In [None]:
def getIterationsSinceLastChangeMicro(df,features,updated=False):
    
    fs = list(features) 

    num_iterations_since_last_change_vec = np.zeros(len(fs),np.float32)
    
    list_of_num_iterations_since_last_change = [num_iterations_since_last_change_vec.tolist()]
    
    i = 0
    
    while (i+1 < len(df.index)):
        
        next_num_iterations_since_last_change_vec = np.zeros(len(fs),np.float32)
        
        features_to_interpolate = []    
        for f_ind,f in enumerate(fs):
            
            if (df[f].iat[i] == df[f].iat[i+1]):
                next_num_iterations_since_last_change_vec[f_ind] = num_iterations_since_last_change_vec[f_ind] + 1
            else:
                features_to_interpolate.append(f_ind)
         
        list_of_num_iterations_since_last_change.append(next_num_iterations_since_last_change_vec.tolist())
        
        # Perform interpolation
        #print(features_to_interpolate)
        for f_ind in features_to_interpolate:
            
            # Number of iterations back to previous change 
            nilc = int(min(num_iterations_since_last_change_vec)) + 1
#            nilc_list.append(nilc)

            if updated:
                
                interp_start_value = list_of_num_iterations_since_last_change[-1-nilc][f_ind]
                
                if (interp_start_value == 0):
                
                    interp_start_value = num_iterations_since_last_change_vec[f_ind] + 1
                
                interp_vals = getInterpolationValues(interp_start_value,nilc)
                
            else:
            
                interp_vals = getInterpolationValues(list_of_num_iterations_since_last_change[-1-nilc][f_ind],nilc)
            
            for interp_val_ind, interp_val in enumerate(interp_vals):
                list_of_num_iterations_since_last_change[-nilc+interp_val_ind][f_ind] = interp_val

        num_iterations_since_last_change_vec = next_num_iterations_since_last_change_vec
        
        i = i + 1
        
    return np.asarray(list_of_num_iterations_since_last_change,np.float32)

---
### For splitting time-series data into input-ouput pairs for model training, validation, and testing.

In [None]:
# Create subsequences of the "look_back" length from time-series, "ts_in" and "ts_out"
# and the next "pred_length" values as labels.
def create_subseq(ts_in, ts_out, look_back, pred_length, features=None):
    
    sub_seq, next_values = [], []
    for i in range(len(ts_in)-look_back-pred_length+1):  
        sub_seq.append(ts_in[i:i+look_back])
        if features is not None:
            next_values.append(ts_out[i+look_back:i+look_back+pred_length,features])
        else:
            next_values.append(ts_out[i+look_back:i+look_back+pred_length])
    
    return (ts_in.shape[1], ts_out.shape[1], np.array(sub_seq), np.array(next_values))

In [None]:
# For the AutoEncoder model
def create_subseq_AE(ts_in, look_back, features=None):
    _l = len(ts_in)
    
    Xs = []
    Ys = []

    # For an autoencoder - Ys are the same as Xs. There is no need to pull the next sequence of values!
    for i in range(0, (_l - look_back)):
        Xs.append(ts_in[i:i+look_back])
        if features is not None:
            Ys.append(ts_in[i:i+look_back,features])
        else:
            Ys.append(ts_in[i:i+look_back])
        
    return (ts_in.shape[1], ts_in.shape[1], np.array(Xs), np.array(Ys))

---
### Loss functions

In [None]:
if PCA_higher_order_analysis:
    if not kPCA:
        pca_variances = pca.explained_variance_[first_higher_order_PCA:]
    else:
        pca_variances = pca.lambdas_[first_higher_order_PCA:]

    pca_variances = tf.constant(pca_variances,dtype=tf.float32)

    def pca_score_error(y_true, y_pred):
        
        y_pred = tf.convert_to_tensor(y_pred)
        y_true = tf.cast(y_true, y_pred.dtype)
    
        return tf.math.reduce_sum(tf.math.divide(tf.math.square(y_true - y_pred),pca_variances))
    
    loss_fun = pca_score_error

else:

    def vec_length_error(y_true, y_pred):
        y_pred = tf.convert_to_tensor(y_pred)
        y_true = tf.cast(y_true, y_pred.dtype)

        return tf.math.sqrt(tf.math.reduce_sum(tf.math.square(y_pred - y_true)))

    loss_fun = vec_length_error