### Using the model from the experiment that provided the best MSE value
Look up the results in MLFlow for the best experiment and retrieve model name and MLFlow run_id, then fill them in the appropriate cells in this notebook

In [None]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
os.environ["RAY_IGNORE_UNHANDLED_ERRORS"] = "1"
os.environ["RAY_memory_monitor_refresh_ms"] = "0" # do not kill raylet if low on memmory
os.environ["RAY_TASK_MAX_RETRIES"] = "2"

In [None]:
import ray
import mlflow
import uuid

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Bidirectional,LSTM, Dense, Dropout, Flatten, Input
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ReduceLROnPlateau, ModelCheckpoint, EarlyStopping
from tensorflow.keras.regularizers import l1, l2, l1_l2

In [None]:
import numpy as np
import pandas as pd

In [None]:
import pytz, datetime, time
from datetime import timedelta, datetime
local_tz = pytz.timezone('America/Toronto') # Set local timezone

In [None]:
import matplotlib
import matplotlib.dates as md
import matplotlib.pyplot as plt
%matplotlib inline
matplotlib.rcParams['timezone'] = 'America/Toronto'

In [None]:
from keras.models import load_model, save_model
from sklearn.metrics import r2_score, mean_absolute_percentage_error, mean_squared_error, mean_absolute_error

In [None]:
%run '../lib/utils_anomaly_detection.ipynb'

In [None]:
from pathlib import Path
import os, sys

dataFileList = !ls ../../data/rq2-valid/*msg-w-spikes*.csv
dataLatencyFileList =  !ls ../../data/rq2-valid/*avg-latency*.csv

In [None]:
pd.set_option('display.max_rows', 5000)
pd.pandas.set_option('display.max_columns', None)

In [None]:
MLFLOW_URI='http://localhost:8080/'

In [None]:
mlflow_run_id = 'de58401dd5a546359785695d5f867c66'

In [None]:
model_name = 'CNN_valueStdScaled-T_15-LY_1-DLY_1-NN_32-LR_0.001-epochs_200-d056827f705b4f54aeaa6f4f15beddce.keras'

In [None]:
mlflow.set_tracking_uri(MLFLOW_URI)
mlflow.set_registry_uri(MLFLOW_URI)

In [None]:
#model = mlflow.tensorflow.load_model(f'models:/{model_name}/1')

model_params = mlflow.get_run(mlflow_run_id).to_dictionary()['data']['params']

In [None]:
modelTypeName = "CNN_valueStdScaled"

In [None]:
dataColumnName = 'valueStdScaled'

In [None]:
model_params

In [None]:
T = int(model_params['time_window'])

In [None]:
"""
 This function uses one Tensor formatted input from the X_test dataset in order to predict ahead by a number
 of given steps. Then it readjusts by using another true value from X_test (as the nex index) before starting
 a new prediction cycle
"""
def generate_nsteps_forecast(x_test, nn_model, pred_ahead):
    max_len = x_test.shape[0]
    y_pred = []
    last_x = x_test[0]
    index = 0
    while len(y_pred) < max_len:
        sequence = 0
        while sequence < pred_ahead:
            try:
                x_crt_input = last_x.reshape(1, -1, 1)
                p_vector = nn_model.predict(x_crt_input, verbose=0)
                p = p_vector[0,0] # 1x1 array -> scalar
            except:
                print(f'Prediction error for x={x_crt_input} at sequence={sequence} for start index={index} when pred_ahead={pred_ahead}')
                print(f'Model config was:{nn_model.get_config()}')
                p = 0
                
            # update the predictions list
            y_pred.append(p)

            # make the new input
            last_x = np.roll(last_x, -1)
            last_x[-1] = p
            
            # increase index for the next run
            sequence += 1

        index += sequence
        if index < max_len:
            last_x = x_test[index]
            #print(f"Arrived at index = {index} of {max_len} with value X={last_x}")
    
    if len(y_pred) > max_len:
        # predicted too much, cutoff the tail
        y_pred = y_pred[0:max_len]
        
    return np.array(y_pred)

In [None]:
@ray.remote
def run_n_step_evaluation(model_name, run_id, predict_ahead, trial_fname):
    import mlflow
    mlflow.set_tracking_uri(MLFLOW_URI)
    mlflow.set_registry_uri(MLFLOW_URI)
    #model_params = mlflow.get_run(mlflow_run_id).to_dictionary()['data']['params']
    model = mlflow.tensorflow.load_model(f'models:/{model_name}/1')

    n_neurons = model_params['n_layer_size']
    learning_rate = model_params['learning_rate']
    n_epochs = model_params['epochs']
    model_exp = model_params['model_exp']
    #T = model_params['time_window']

    X_train, Y_train, X_test, Y_test = prepare_dataset(dataFrame[dataColumnName], T)

    y_predict = model.predict(X_test, verbose=0)
    y_pred_nsteps = generate_nsteps_forecast(X_test, model, predict_ahead)

    n_step_metrics = {}
    one_step_metrics = {}

    #one-step-prediction computations
    errors_ae = calculate_absolute_prediction_errors(Y_test, y_predict)
    anomalies_ae = calculate_3sigma_anomalies(errors_ae)
    errors_se = calculate_squared_prediction_errors(Y_test, y_predict)
    anomalies_se = calculate_3sigma_anomalies(errors_se)
    anomalies_3sigma_Y_test = calculate_3sigma_anomalies(Y_test)
    anomalies_3sigma_y_predict = calculate_3sigma_anomalies(y_predict)
    
    anomalies_Y_test, z_scores_Y_test = calculate_zscore_anomalies(Y_test)
    anomalies_y_predict, z_scores_y_predict = calculate_zscore_anomalies(y_predict)
    anomalies_errors_ae, z_scores_errors_ae = calculate_zscore_anomalies(errors_ae)
    anomalies_errors_se, z_scores_errors_se = calculate_zscore_anomalies(errors_se)
    
    anomalies_Y_test_mod, z_scores_Y_test_mod = calculate_modified_zscore_anomalies(Y_test)
    anomalies_y_predict_mod, z_scores_y_predict_mod = calculate_modified_zscore_anomalies(y_predict)
    anomalies_errors_ae_mod, z_scores_errors_ae_mod = calculate_modified_zscore_anomalies(errors_ae)
    anomalies_errors_se_mod, z_scores_errors_se_mod = calculate_modified_zscore_anomalies(errors_se)
    
    try:
        r2 = r2_score(Y_test, y_predict)
    except:
        r2 = 110
    if np.isnan(r2):
        r2 = 110

    try:
        mae = mean_absolute_error(Y_test, y_predict)
    except:
        mae = 100
    if np.isnan(mae):
        mae = 100

    try:
        mape = mean_absolute_percentage_error(Y_test, y_predict)
    except:
        mape = 100
    if np.isnan(mape):
        mape = 100

    try:
        mse = mean_squared_error(Y_test, y_predict)
    except:
        mse = 100
    if np.isnan(mse):
        mse = 100
    
    try:
        pcc = np.corrcoef(Y_test, y_predict.flatten())[0,1]
    except:
        pcc = 100
    if np.isnan(pcc):
        pcc = 100

    one_step_metrics= {
                        'r2_1Step': r2,
                        'mae_1Step': mae,
                        'mape_1Step': mape,
                        'mse_1Step': mse,
                        'pcc_1Step': pcc
    }

    #n-step predictions computations
    errors_ae2 = calculate_absolute_prediction_errors(Y_test, y_pred_nsteps)
    anomalies_ae2 = calculate_3sigma_anomalies(errors_ae2)        
    errors_se2 = calculate_squared_prediction_errors(Y_test, y_pred_nsteps)
    anomalies_se2 = calculate_3sigma_anomalies(errors_se2)
    anomalies_y_pred_nsteps_mod, z_scores_y_pred_nsteps_mod = calculate_modified_zscore_anomalies(y_pred_nsteps)
    anomalies_errors_ae2_mod, z_scores_errors_ae2_mod = calculate_modified_zscore_anomalies(errors_ae2)
    anomalies_errors_se2_mod, z_scores_errors_se2_mod = calculate_modified_zscore_anomalies(errors_se2)

    try:
        r2_nStep = r2_score(Y_test, y_pred_nsteps)
    except:
        r2_nStep = 100

    try:
        mae_nStep = mean_absolute_error(Y_test, y_pred_nsteps)
    except:
        mae_nStep = 100

    try:
        mape_nStep = mean_absolute_percentage_error(Y_test, y_pred_nsteps)
    except:
        mape_nStep = 100

    try:
        mse_nStep = mean_squared_error(Y_test, y_pred_nsteps)
    except:
        mse_nStep = 100

    try:
        pcc_nStep = np.corrcoef(Y_test, y_pred_nsteps.flatten())[0,1]
    except:
        pcc_nStep = 100

    crt_step = f'predict_ahead_{predict_ahead}'
    n_step_metrics = {
        'r2_nStep': r2_nStep,
        'mae_nStep': mae_nStep,
        'mape_nStep': mape_nStep,
        'mse_nStep': mse_nStep,
        'pcc_nStep': pcc_nStep
    }
    anomalies_3sigma_y_pred_nsteps = calculate_3sigma_anomalies(y_pred_nsteps)
    anomalies_y_pred_nsteps, z_scores_y_pred_nsteps = calculate_zscore_anomalies(y_pred_nsteps)
    anomalies_errors_ae2, z_scores_errors_ae2 = calculate_zscore_anomalies(errors_ae2)
    anomalies_errors_se2, z_scores_errors_se2 = calculate_zscore_anomalies(errors_se2)

    result = {
        'one_step_metrics' : one_step_metrics,
        'n_step_metrics' : n_step_metrics
    }
    
    with mlflow.start_run(run_id=run_id, nested=True):
        fig = plt.figure(figsize=(20,15))
        title = f'Anomalies Y_test for {trial_fname}'
        plt.title(title)
        plt.plot(Y_test,label="Original Data", alpha=0.6, c='gray')
        plt.scatter(np.where(anomalies_Y_test==True)[0], Y_test[np.where(anomalies_Y_test==1)], 
                    alpha=0.8, color='green', s=250, label="Z-Score Anomalies")
        plt.scatter(np.where(anomalies_3sigma_Y_test==True)[0], Y_test[np.where(anomalies_3sigma_Y_test==1)], 
                    alpha=0.8, color='red', s=150, label="3-Sigma Anomalies")
        plt.scatter(np.where(anomalies_Y_test_mod==True)[0], Y_test[np.where(anomalies_Y_test_mod==1)], 
                    alpha=0.8, color='blue', s=100, label="Modified Z-Score Anomalies")    
        plt.legend()
        figName = f"Y_test_anomalies-T_{T}-fname-{trial_fname}.png"
        mlflow.log_figure(fig, figName)
        fig.clf()
        plt.close()

        fig = plt.figure(figsize=(20,15))
        title = "Predict Anomalies T=" + str(T) + " with predict 1 on "+ str(model_exp) + "for " + str(trial_fname)
        plt.title(title)
        plt.plot(y_predict,label="Predict 1-step Forecast", alpha=0.6, c='red', linewidth=3)
        plt.plot(Y_test,label="Original Data", alpha=0.6, c='black')
        plt.scatter(np.where(anomalies_ae==True), y_predict[np.where(anomalies_ae==True)], 
                    alpha=0.8, color='green', s=350, label="3-Sigma Anomalies AE")
        plt.scatter(np.where(anomalies_se==True), y_predict[np.where(anomalies_se==True)], 
                    alpha=0.8, color='magenta', s=300, label = "3-Sigma Anomalies SE")
        plt.scatter(np.where(anomalies_errors_ae==True), y_predict[np.where(anomalies_errors_ae==True)], 
                    alpha=0.8, color='blue', s=250, label = "Z-score Anomalies AE")
        plt.scatter(np.where(anomalies_errors_se==True), y_predict[np.where(anomalies_errors_se==True)], 
                    alpha=0.8, color='cyan', s=200, label = "Z-score Anomalies SE")
        plt.scatter(np.where(anomalies_errors_ae_mod==True), y_predict[np.where(anomalies_errors_ae_mod==True)], 
                    alpha=0.8, color='lightgreen', s=150, label = "Modified Z-score Anomalies AE")
        plt.scatter(np.where(anomalies_errors_se_mod==True), y_predict[np.where(anomalies_errors_se_mod==True)], 
                    alpha=0.8, color='orange', s=50, label = "Modified Z-score Anomalies SE")    
        plt.legend()    
        figName = f"Y_predict-1-step-anomalies-T_{T}-fname-{trial_fname}.png"
        mlflow.log_figure(fig, figName)
        fig.clf()
        plt.close()

        fig = plt.figure(figsize=(20,15))
        title = "Compare forecasts T=" + str(T) + " predict_ahead=" + str(predict_ahead) + " with predict 1" + \
                 "for " + str(trial_fname)
        plt.title(title)
        plt.plot(Y_test,label="Original Data", alpha=0.6, c='red',linewidth=2)
        plt.plot(y_predict,label="Predicted Data 1-step", alpha=0.6, c='black', linewidth=2)
        plt.plot(y_pred_nsteps,label="Predicted Data " + str(predict_ahead) + "-steps", alpha=0.6, c='blue', linewidth=2)
        plt.legend()
        figName = f"compare-forecasts-1_{predict_ahead}-fname-{trial_fname}.png"
        mlflow.log_figure(fig, figName)
        fig.clf()
        plt.close()
        
        fig = plt.figure(figsize=(20,15)) 
        title = "Predict Anomalies T=" + str(T) + " with predict " + str(predict_ahead) + " on " + str(model_exp) + "for " + str(trial_fname)
        plt.title(title)
        plt.plot(y_pred_nsteps,label="Predict " + str(predict_ahead) + "-step Forecast", alpha=0.6, c='red', linewidth=3)
        plt.plot(Y_test,label="Original Data", alpha=0.6, c='black')
        plt.scatter(np.where(anomalies_ae2==True), y_pred_nsteps[np.where(anomalies_ae2==True)], 
                    alpha=0.8, color='green', s=350, label="Anomalies AE")
        plt.scatter(np.where(anomalies_se2==True), y_pred_nsteps[np.where(anomalies_se2==True)], 
                    alpha=0.8, color='magenta', s=300, label = "Anomalies SE")
        plt.scatter(np.where(anomalies_errors_ae2==True), y_pred_nsteps[np.where(anomalies_errors_ae2==True)], 
                    alpha=0.8, color='blue', s=250, label = "Z-score Anomalies AE")
        plt.scatter(np.where(anomalies_errors_se2==True), y_pred_nsteps[np.where(anomalies_errors_se2==True)], 
                    alpha=0.8, color='cyan', s=200, label = "Z-score Anomalies SE")
        plt.scatter(np.where(anomalies_errors_ae2_mod==True), y_pred_nsteps[np.where(anomalies_errors_ae2_mod==True)], 
                    alpha=0.8, color='lime', s=150, label = "Modified Z-score Anomalies AE")
        plt.scatter(np.where(anomalies_errors_se2_mod==True), y_pred_nsteps[np.where(anomalies_errors_se2_mod==True)], 
                    alpha=0.8, color='orange', s=50, label = "Modified Z-score Anomalies SE")        
        plt.legend();    
        figName = f"Y-predict-anomalies-step-{predict_ahead}-with-T_{T}-fname-{trial_fname}.png"
        mlflow.log_figure(fig, figName)
        fig.clf()
        plt.close()    
        
        fname = f'Results-trial-{trial_fname}-{predict_ahead}-step-ahead.json'
        mlflow.log_dict(result, fname)        
    
    return result

In [None]:
import os

In [None]:
ray.shutdown()
ray.init()

In [None]:
results_trials = {}
for filePos in range(len(dataFileList)):
    
    data = pd.read_csv(dataFileList[filePos], index_col='EventDateTime', parse_dates=['EventDateTime'])
    dataLatency = pd.read_csv(dataLatencyFileList[filePos], index_col='EventDateTime', parse_dates=['EventDateTime'])
    print(f'Processing files at position {filePos} in list')
    %run '../lib/prepareDataSet.ipynb'

    trial_fname = os.path.basename(dataFileList[filePos])
    results_predict_ahead = {}
    for predict_ahead in [5, 10, 15, 30, 60, 90, 120]:
        res = run_n_step_evaluation.remote(model_name, mlflow_run_id, predict_ahead, trial_fname)
        tag = f'predict_ahead_{predict_ahead}'
        results_predict_ahead[tag] = res
        
    results_trials[trial_fname] = results_predict_ahead    

In [None]:
data_trials = {}
for trial in results_trials.keys():
    data_trial = {}
    for item in results_trials[trial].keys():
        try:
            res = ray.get(results_trials[trial][item])
        except:
            print(f'Error getting results for key:{item}')
            res = None
        data_trial[item] = res
        
    data_trials[trial] = data_trial

In [None]:
data_trials

In [None]:
ray.shutdown()

In [None]:
def find_best_result():
    min_mse = 1000
    tag = ''
    result = {}
    for trial in data_trials.keys():
        crt_trial = data_trials[trial]
        for predict_ahead in crt_trial.keys():
            crt_one_step = crt_trial[predict_ahead]['one_step_metrics']
            if crt_one_step['mse_1Step'] < min_mse:
                min_mse = crt_one_step['mse_1Step']
                tag = str(predict_ahead) + '-' + str(trial)
                result = crt_trial[predict_ahead]

    return result, tag
                

In [None]:
find_best_result()