### Imports

In [None]:
# Import AEMpy library
import aempy

# Import Extra libraries
import mxnet as mx
from mxnet import gluon
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import json, datetime
import seaborn as sns

### Connect to AEM System Console and get the Error logs

In [None]:
system = aempy.System()

In [None]:
# Request error logs
errorlogs = system.log_error()

In [None]:
# There is a builtin parsing in AEMpy to automatically convert logs to pandas dataframe
dfErrors = system.error_log_to_df(errorlogs)

### Question: What is the distribution of the message levels?

In [None]:
# What is the distribution of the message levels?
dfErrors.groupby("level")["level"].count()

In [None]:
# Plot the messages with 1 min frequency
system.plot(dfErrors)

### Analyze a more complex error log file

In [None]:
# Read a local log file
dfErrors = system.error_logfile_to_df("error.log.2020-07-29")

In [None]:
# What is the distribution of the message levels?
dfErrors.groupby("level")["level"].count()

In [None]:
# Plot the messages with 1 min frequency
system.plot(dfErrors)

#### Filter and plot the errors

In [None]:
# Filter to keep only errors
df_ERROR = dfErrors[dfErrors["level"] == "ERROR"]

In [None]:
df_ERROR.head(10)

In [None]:
# Plot the ERROR messages with 1 min frequency
# See available frequencies here: https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases
system.plot(df_ERROR, "1min") # "1ms")

#### Group the errors per seconds

In [None]:
# Amount of errors per minute
t = (df_ERROR.assign(counter = 1)
             .set_index('date')
             .groupby([pd.Grouper(freq="1min"), 'level']).sum()
             .squeeze()
             .unstack())

t.head()

### Detect Anomalies and predict errors

In [None]:
# Technical conversion to Data Frame
df_logs = pd.DataFrame({"logdate":t.index.to_pydatetime(), 
        "error_count":t['ERROR'].values}, columns=["logdate", "error_count"])
df_logs.head()

#### Set the length of the prediction in seconds

In [None]:
prediction_length = 10

#### Format the data into a dataset that fit to the Machine Learning Framework (Apache MXNet Gluon)

In [None]:
train_ds = []
for x in range(0,len(df_logs),prediction_length):
    log_window = df_logs.loc[x:x+prediction_length]
    ts = pd.Timestamp(log_window['logdate'].iloc[0], freq="min")
    train_ds.append({'target':log_window['error_count'].astype('float').values, 'start':ts})

#### Import Gluon

In [None]:
from gluonts.model.simple_feedforward import SimpleFeedForwardEstimator
from gluonts.trainer import Trainer
from gluonts.evaluation.backtest import make_evaluation_predictions

#### Instantiate the Machine Learning model for prediction

In [None]:
estimator = SimpleFeedForwardEstimator(
    num_hidden_dimensions=[10],
    prediction_length=prediction_length,
    context_length=100,
    freq="1min",
    trainer=Trainer(ctx="cpu", epochs=50, learning_rate=0.001, hybridize=True, num_batches_per_epoch=200,),
)

#### Train the model

In [None]:
predictor = estimator.train(train_ds)

#### Predict

In [None]:
forecast_it, ts_it = make_evaluation_predictions(
    dataset=train_ds[-2:],  # test dataset
    predictor=predictor,  # predictor
    num_samples=100,  # number of sample paths we want for evaluation
)

In [None]:
forecasts = list(forecast_it)
tss = list(ts_it)

#### Plot the anomaly detection and prediction

In [None]:
# Plot the prediction
plot_length = 150 
prediction_intervals = (50.0, 90.0)
legend = ["observations", "median prediction"] + [f"{k}% prediction interval" for k in prediction_intervals][::-1]

fig, ax = plt.subplots(1, 1, figsize=(10, 7))
tss[0][-plot_length:].plot(ax=ax)  # plot the time series
forecasts[0].plot(prediction_intervals=prediction_intervals, color='g')
plt.grid(which="both")
plt.legend(legend, loc="upper left")
plt.show()