In [1]:
if 'np' not in globals():
    import numpy as np

In [5]:
def calculate_absolute_prediction_errors(y_true, y_pred):
    errors = abs(y_pred.flatten() - y_true.flatten())
    return errors

In [3]:
def calculate_squared_prediction_errors(y_true, y_pred):
    errors = np.square(y_pred.flatten() - y_true.flatten())
    return errors

In [1]:
def calculate_3sigma_threshold(data):
    upper_err_threshold = np.mean(data) + 3 * np.std(data)
    lower_err_threshold = np.mean(data) - 3 * np.std(data)
    return (lower_err_threshold, upper_err_threshold)

In [None]:
def calculate_3sigma_anomalies(data, lo_3sigma, up_3sigma):
    # lo_test, up_test = calculate_3sigma_threshold(data)
    anomalies = [(value > up_3sigma or value < lo_3sigma) for value in data]
    anomalies = np.array(anomalies)
    return anomalies

In [3]:
def calculate_zscore_anomalies(data):
    '''
        Z-score points measure how far in terms of std deviation a data point is away from the mean of the dataset
        Any point above 3 or below -3 means that it is more than 3 standard deviations away from the mean
    '''
    anomalies = np.zeros(data.shape)
    z_scores = np.zeros(data.shape)

    try:
        std_dev = np.std(data)
        if std_dev==0 or np.isna(std_dev) or std_dev==np.inf:
            std_dev=1 # doing what scikit-learn does when std==0
            
        z_scores = ((data-np.mean(data))/std_dev)/ std_dev
        anomalies = np.array([(value >= 3 or value <= -3) for value in z_scores])
    except:
        pass
    
    return anomalies, z_scores

In [5]:
def calculate_modified_zscore_anomalies(data):
    '''
        Z-score points measure how far in terms of std deviation a data point is away from the median of the dataset
        Any point above 3 or below -3 means that it is more than 3 standard deviations away from the median
    '''
    anomalies = np.zeros(data.shape)
    mod_z_scores = np.zeros(data.shape)
    try:
        median = np.median(data)
        deviations = data - median
        mad = np.median(np.abs(deviations))
        
        mean_dev = np.mean(deviations)
        if mean_dev==0 or np.isna(mean_dev) or mean_dev == np.inf:
            mean_dev = 1 # doing what scikit-learn does when std==0
        
        k_mad = 1.4826
        k_meanAD = 1.253314 #https://www.ibm.com/docs/en/cognos-analytics/11.1.0?topic=terms-modified-z-score
        
        if mad==0:
            mod_z_scores = deviations/(k_meanAD * mean_dev)
        else:
            mod_z_scores = deviations/(k_mad*mad)
        
        anomalies = np.array([(value >= 3 or value <= -3) for value in mod_z_scores])
    except:
        pass
        
    return anomalies, mod_z_scores

In [3]:
def calculate_absolute_prediction_errors2(y_true, y_pred):
    errors = abs(y_pred - y_true)
    return errors

In [2]:
def calculate_squared_prediction_errors2(y_true, y_pred):
    errors = np.square(y_pred - y_true)
    return errors

In [None]:
# AIC calculation taken fromhttps://ieeexplore.ieee.org/document/1100705
def calculate_aic(n, mse, num_params):
    aic = 0
    try:
        aic = n * np.log(mse) + 2 * num_params
    except: aic = 0
    return aic