### Adaptive Fault Detection with VAR Models

##### Import libraries needed

In [15]:
# importing packages and libraries
from pandas import read_csv
import pandas as pd
import numpy as np
import pickle
import os
from statsmodels.tsa.api import VAR
import matplotlib.pyplot as plt

# Ignoe harmless warnings
import warnings
warnings.filterwarnings("ignore")

# Define the plot size default
from pylab import rcParams
rcParams['figure.figsize'] = (12, 5)

### Defining methods or functions

In [16]:
# Plotting multiple series
def plot_multiple_series(actual, pred, attr):
    for i in range(len(attr)):
        title = "Prediction of {}".format(attr[i])
        plt.title(title)
        plt.xlabel("Timestep")
        plt.ylabel("Values")
        plt.plot(actual.iloc[:,i], label="actual")
        plt.plot(pred.iloc[:,i], label="forecast")
        plt.legend()
        plt.show()


# Root mean squared error
def root_mse(x, y):
    if len(x) != len(y):
        return "Error: The two arguments must have the same length"
    mse = np.square(np.subtract(x, y)).mean()
    return np.sqrt(mse)

# Plotting series
def plot_series(series, attr):
    for i in range(len(attr)):
        title = "Plot of "+str(attr[i])
        actual = series.iloc[:,i]
        plt.title(title)
        plt.xlabel("Timestep")
        plt.ylabel(attr[i])
        plt.plot(actual)
        plt.show()

# Normalisation of time series
def normalise_timeseries(data):
    # Calculate the mean and standard deviation for each feature
    means = np.mean(data, axis=0)
    stds = np.std(data, axis=0)
    
    # Normalise each feature using standard deviation
    normalised_data = (data - means) / stds
    return pd.DataFrame(normalised_data)


# Denomalisation of time series
def denormalise_timeseries(data, means, stds):
    denormalised_data = (data * stds) + means
    return pd.DataFrame(denormalised_data)


# Augmented Dickey-Fuller Test
def adf_test(series, title=''):
    '''
    Hypothesis Test for Stationarity
    Pass in a time series and an optional title, return an ADF report
    '''
    print(f'Augmented Dickey-Fuller Test: {title}')
    result = adfuller(series.dropna(),autolag='AIC')
    labels = ['ADF test statistics','p-value','#lags','#observations'] # use help(adfuller) to understand why these labels are chosen
    
    outcome = pd.Series(result[0:4],index=labels)
    
    for key,val in result[4].items():
        outcome[f'critical value ({key})'] = val
        
    print(outcome.to_string()) # this will not print the line 'dtype:float64'
    
    if result[1] <= 0.05:
        print('Strong evidence against the null hypothesis') # Ho is Data is not stationary, check help(adfuller)
        print('Reject the null hypothesis')
        print('Data is Stationary')
    else:
        print('Weak evidence against the Null hypothesis')
        print('Fail to reject the null hypothesis')
        print('Data has a unit root and is non stationary')


# Loading expert models in a dictionary
def load_expert_models(expert_path):
    files = os.listdir(expert_path)
    pickle_files = [file for file in files if file.endswith('.pkl')]
    models = {}

    for file in pickle_files:
        with open(file, 'rb') as f:
            models[file.split('.')[0]] = pickle.load(f)

    return models


In [52]:
# Assigning variables
file = 'test_series_reduced.csv'
expert_path = 'expert_models'
df_raw = read_csv(file, header=0, index_col=0)
attr = list(pd.read_csv(file).columns.values)[1:]
series = df_raw.iloc[:40000,:]
# plot_series(series, attr)
nobs = 3000
steps = 15
begin = 2000
finish = 2200
normalised_data = normalise_timeseries(df_raw)
testData = normalised_data.copy()
train = testData.iloc[:-nobs]
test = testData.iloc[-nobs:]
# len(train), len(test)
input1 = testData.iloc[begin:finish,:]
# plot_series(input1, attr)
prediction_error = {}

In [29]:
# Threshold prediction error value
threshold = 0

In [49]:
# Load the expert models
my_experts = load_expert_models(expert_path)
# print(my_experts)
print(my_experts['var_ctrl_stuckat0_perm_reduced'])


<statsmodels.tsa.vector_ar.var_model.VARResultsWrapper object at 0x0000021B8BFA0190>


In [50]:
pred = my_experts['var_ctrl_stuckat0_perm_reduced'].forecast(input1.values, steps=steps)
pred_df = pd.DataFrame(pred, columns=input1.columns)
print(pred_df)

    Tank2OutFlow  Tank2.puddle  Tank3OutFlow  Tank2.level  wt3_valve
0       0.167070     -1.384515     -0.463802    -0.241040  -0.905951
1       0.141919     -1.383891     -0.721586    -0.194511  -0.866651
2       0.125196     -1.383256     -0.843049    -0.098893  -0.833221
3       0.142421     -1.382417     -0.884744    -0.013308  -0.802514
4       0.172772     -1.381530     -0.949448     0.097097  -0.757113
5       0.164618     -1.380516     -0.942693     0.197030  -0.733433
6       0.127007     -1.379444     -0.979027     0.312421  -0.678512
7       0.121581     -1.378168     -0.919090     0.415575  -0.645130
8       0.151352     -1.376834     -0.915884     0.523388  -0.629673
9       0.167571     -1.375457     -0.884227     0.637498  -0.942391
10      0.147800     -1.374225     -1.295399     0.790190  -1.088435
11      0.121310     -1.373215     -1.472037     0.976467  -1.175066
12      0.122841     -1.372202     -1.544801     1.167983  -1.203015
13      0.147034     -1.371339    

In [53]:
for model_name, model in my_experts.items():
    predictions = my_experts[model_name].forecast(input1.values, steps=steps)
    predictions_df = pd.DataFrame(predictions, columns=input1.columns)
    expected = testData.iloc[finish:finish+steps,:].reset_index(drop=True)
    RMSE = []
    for feature in attr:
        RMSE.append(root_mse(predictions_df[feature], expected[feature]))

    prediction_error[model_name] = RMSE
    
print(prediction_error)

{'var_ctrl_stuckat0_perm_reduced': [0.007102182269043727, 0.008426043640523146, 0.734199579539484, 0.9042479663906864, 0.29496410176651794], 'var_ctrl_stuckat1_perm_reduced': [0.006846122248261372, 0.009497363321898862, 0.16098232318874026, 0.29123533230704285, 0.1680542097506921], 'var_ctrl_valueFlip_perm_reduced': [0.005517063708583547, 0.006751388299625716, 0.8405323572154951, 1.2433226706807265, 0.7744986923149207], 'var_golden_model_reduced': [0.012114486610909534, 0.006626271854935035, 0.3489223646683037, 0.6767479842540826, 0.2109692138410525]}
