<a href="https://colab.research.google.com/github/abstractguy/lstm_autoencoder_classifier/blob/master/lstm_autoencoder_classifier.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# LSTM Autoencoder for Rare Event Binary Classification

This is a continuation of the regular autoencoder for rare event classification presented in
https://towardsdatascience.com/extreme-rare-event-classification-using-autoencoders-in-keras-a565b386f098
and code present in
https://github.com/cran2367/autoencoder_classifier/blob/master/autoencoder_classifier.ipynb
Here we will show an implementation of building a binary classifier using LSTM Autoencoders. 
Similar to the previous post, the purpose is to show the implementation steps. The Autoencoder tuning for performance improvement can be done.

LSTM requires closer attention to preparing the data. Here we have all the steps, and few tests to validate the data preparation.

The dataset used here is taken from here,

**Dataset: Rare Event Classification in Multivariate Time Series** https://arxiv.org/abs/1809.10717 (please cite this article, if using the dataset).

In [0]:
# Hardcode non-shared parameters.
AI = 'LSTM_autoencoder'
main_ticker = 'AAPL'
tickers_list = ['INTC', 'AAPL', 'NVDA', 'CSCO', 'AMD', 'AMZN', 'GOOG', 'MSFT', 'S', 'BAC', 'XLNX', 'WFC', '^DJI', '^GSPC', '^NYA', '^IXIC']
unit_scaler = 3.4
length = 9
random_seed = 123 # Used to help randomly select the data points.
test_size = 0.2
gain = 0.07

In [0]:
# Hardcode shared parameters.
if AI == 'LSTM':
    trends = True
    batch_size = 64
    epochs = 200
    learning_rate = 0.0000013

elif AI == 'LSTM_autoencoder':
    trends = False
    batch_size = 32
    epochs = 200
    learning_rate = 0.0002

In [38]:
!pip install pytrends



In [0]:
%matplotlib inline

from collections import OrderedDict
from math import sqrt
from os import chdir
from os.path import exists
from datetime import datetime
from tqdm import tqdm
from numpy.random import seed
from numpy import append, array, concatenate, count_nonzero, empty, empty_like, expand_dims, mean, nan, power, var, where, zeros
from pandas import concat, DataFrame, date_range, read_csv, Series
from pandas_datareader.data import DataReader
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.metrics import auc, classification_report, confusion_matrix, f1_score, mean_squared_error, precision_recall_curve, precision_recall_fscore_support, recall_score, roc_curve
from tensorflow import set_random_seed
from keras.preprocessing.sequence import TimeseriesGenerator
from keras.models import load_model, Model, Sequential
from keras.layers import Dense, Dropout, LSTM, RepeatVector, TimeDistributed
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, TensorBoard, TerminateOnNaN
from keras.optimizers import Adam
from keras.utils import plot_model
from keras import optimizers, Sequential
from google.colab.drive import mount
from tensorboardcolab import TensorBoardColab, TensorBoardColabCallback
from pytrends.request import TrendReq
from pylab import rcParams

import matplotlib.pyplot as plt
import seaborn as sns

In [40]:
path = '/content/gdrive/'
mount(path)
path = path + 'My Drive/LSTM_autoencoder/'
chdir(path)

Drive already mounted at /content/gdrive/; to attempt to forcibly remount, call drive.mount("/content/gdrive/", force_remount=True).


In [0]:
sign = lambda x: (1, -1)[x < 0]

def curve_shift(dataset, shift_by):
    '''
    This function will shift the binary labels in a dataframe.
    The curve shift will be with respect to the 1s. 
    For example, if shift is -2, the following process
    will happen: if row n is labeled as 1, then
    - Make row (n+shift_by):(n+shift_by-1) = 1.
    - Remove row n.
    i.e. the labels will be shifted up to 2 rows up.
    
    Inputs:
    dataset  A pandas dataframe with a binary labeled column. 
             This labeled column should be named as 'y'.
    shift_by An integer denoting the number of rows to shift.
    
    Output
    dataset  A dataframe with the binary labels shifted by shift.
    '''

    vector = dataset['y'].copy()
    for s in range(abs(shift_by)):
        tmp = vector.shift(sign(shift_by))
        tmp = tmp.fillna(0)
        vector += tmp
    labelcol = 'y'
    # Add vector to the DataFrame.
    dataset.insert(loc=0, column=labelcol+'tmp', value=vector)
    # Remove the rows with labelcol == 1.
    dataset = dataset.drop(dataset[dataset[labelcol] == 1].index)
    # Drop labelcol and rename the tmp col as labelcol.
    dataset = dataset.drop(labelcol, axis=1)
    dataset = dataset.rename(columns={labelcol+'tmp': labelcol})
    # Make the labelcol binary.
    dataset.loc[dataset[labelcol] > 0, labelcol] = 1

    return dataset

def add_ticker_to_tickers(symbol, ticker, tickers):
    minimum_date = ticker.index.min()
    overall_minimum_date = tickers.index.min()
    if minimum_date > overall_minimum_date:
        start = overall_minimum_date
        tickers = tickers[tickers.index >= minimum_date]

    tickers[symbol][tickers.index.isin(ticker.index)] = ticker

    return tickers

def get_stock(symbols, path=path, trends=True):
    csv_path_X = path + 'dataset_X.csv'
    csv_path_y = path + 'dataset_y.csv'

    if exists(csv_path_X) and exists(csv_path_y):
        tickers_X = read_csv(csv_path_X)
        tickers_y = read_csv(csv_path_y)
    else:
        start = datetime(1970, 1, 1)
        end = datetime.now()
        dates = date_range(start=start, end=end)
        columns = concatenate([symbols, [(symbol + '_trend') for symbol in symbols]]) if trends else symbols
        tickers_X = DataFrame(nan, index=dates, columns=columns)
        tickers_y = DataFrame(nan, index=dates, columns=columns)
        tickers_X.index.name = 'Date'
        tickers_y.index.name = 'Date'
        if trends:
            pytrends = TrendReq(hl='en-US', tz=360)

        for symbol in tqdm(symbols, unit='symbol'):
            try:
                ticker = DataReader(symbol, 'yahoo', start=start, end=end)
                ticker_Open = ticker.Open
                ticker_Close = ticker.Close
                ticker_X = (ticker_Close - ticker_Open).dropna()
                ticker_y = (ticker_Close / ticker_Open).dropna()
                tickers_X = add_ticker_to_tickers(symbol, ticker_X, tickers_X)
                tickers_y = add_ticker_to_tickers(symbol, ticker_y, tickers_y)
                if trends:
                    pytrends.build_payload([symbol], timeframe='today 5-y')
                    ticker = pytrends.interest_over_time()[symbol]
                    tickers_X = add_ticker_to_tickers(symbol + '_trend', ticker.copy(), tickers_X)
                    tickers_y = add_ticker_to_tickers(symbol + '_trend', ticker.copy(), tickers_y)
            except:
                pass

        tickers_X = tickers_X.fillna(method='backfill').dropna()
        tickers_y = tickers_y.fillna(method='backfill').dropna()

        tickers_X.to_csv(csv_path_X)
        tickers_y.to_csv(csv_path_y)

    return tickers_X, tickers_y

def delta_time_series(data):
    return data[1:] - data[:-1]

def plot_dataset(dataset):
    plt.plot(dataset)
    plt.xlabel('Days')
    plt.ylabel('Derivatives')
    plt.show()

def get_y_from_generator(generator):
    """Get all targets y from a TimeseriesGenerator instance."""
    y = None

    for i in range(len(generator)):
        batch_y = generator[i][1]

        if y is None:
            y = batch_y
        else:
            y = append(y, batch_y)

    y = y.reshape((-1, 1))
    print(y.shape)
    return y

def binary_accuracy(a, b, name='training'):
    """Helper function to compute the match score of two binary numpy arrays."""
    a = a[:,0] > 0
    b = b[:,0] > 0
    assert len(a) == len(b)
    print('Binary accuracy (' + name + ' data):', (a == b).sum() / len(a))

def compute_units(X_train):
    sample_size, feature_size = X_train.shape
    units = int(((sample_size / unit_scaler) / length) - feature_size)
    print('Units:', units)
    return units

In [0]:
tickers_list = [main_ticker] + tickers_list
tickers_list = list(OrderedDict((ticker, True) for ticker in tickers_list).keys())
dataset_csv_X, dataset_csv_y = get_stock(tickers_list, trends=trends)

In [0]:
dataset = dataset_csv_y.copy()

In [44]:
# Ground truth.
dataset['y'] = (dataset[main_ticker] > (1.0 + (gain / 2.0))).astype(int)
print('Percentage of ones (keep less than 5%):', count_nonzero(dataset.y) / dataset.y.size)

Percentage of ones (keep less than 5%): 0.026330224904004388


In [0]:
dataset = dataset.reset_index()

In [0]:
seed(7)
set_random_seed(11)
rcParams['figure.figsize'] = 8, 6
LABELS = ['Normal', 'Break']

In [47]:
print('Before shifting') # Positive labeled rows before shifting.
one_indexes = dataset.index[dataset['y'] == 1]
display(dataset.iloc[(one_indexes[0]-3):(one_indexes[0]+2), 0:5].head(n=5))
dataset = curve_shift(dataset, shift_by=-1)

print('After shifting') # Validating if the shift happened correctly.
display(dataset.iloc[(one_indexes[0]-4):(one_indexes[0]+1), 0:5].head(n=5))  

Before shifting


Unnamed: 0,index,Date,AAPL,INTC,NVDA
3,3,2004-08-22,1.007129,1.003208,1.005578
4,4,2004-08-23,1.007129,1.003208,1.005578
5,5,2004-08-24,1.022073,0.977006,0.949219
6,6,2004-08-25,1.037025,1.011987,1.020129
7,7,2004-08-26,1.049031,0.995883,1.003953


After shifting


Unnamed: 0,y,index,Date,AAPL,INTC
2,0.0,2,2004-08-21,1.007129,1.003208
3,0.0,3,2004-08-22,1.007129,1.003208
4,0.0,4,2004-08-23,1.007129,1.003208
5,1.0,5,2004-08-24,1.022073,0.977006
8,0.0,8,2004-08-27,0.990484,1.007319


In [0]:
dataset = dataset.drop(columns=['Date'])

In [0]:
# Converts the DataFrame to a numpy array.
input_X = dataset.loc[:,dataset.columns != 'y'].values
input_y = dataset['y'].values

n_features = input_X.shape[1] # Number of features.

In [0]:
def temporalize(X, y, lookback):
    output_X = []
    output_y = []
    for i in range(len(X)-lookback-1):
        t = []
        for j in range(1,lookback+1):
            # Gather past records upto the lookback period.
            t.append(X[[(i+j+1)], :])
        output_X.append(t)
        output_y.append(y[i+lookback+1])
    return output_X, output_y

In [51]:
# Test: The 3D tensors (arrays) for LSTM are forming correctly.
print('First instance of y = 1 in the original data')
display(dataset.iloc[(where(array(input_y) == 1)[0][0]-5):(where(array(input_y) == 1)[0][0]+1), ])

# Equivalent to 5 days of past data.
lookback = 5
# Temporalize the data.
X, y = temporalize(X = input_X, y = input_y, lookback = lookback)

print('For the same instance of y = 1, we are keeping past 5 samples in the 3D predictor array, X.')
display(DataFrame(concatenate(X[where(array(y) == 1)[0][0]], axis=0))) 

First instance of y = 1 in the original data


Unnamed: 0,y,index,AAPL,INTC,NVDA,CSCO,AMD,AMZN,GOOG,MSFT,S,BAC,XLNX,WFC,^DJI,^GSPC,^NYA,^IXIC
0,0.0,0,0.974611,0.992783,1.00258,1.010582,0.98989,0.959513,1.0034,0.991591,1.005288,1.003549,0.995074,1.000173,0.995838,0.996402,0.997792,0.996703
1,0.0,1,1.002931,0.986314,1.064655,0.996306,1.018723,1.027301,1.07227,1.00258,0.985492,1.011313,0.988037,1.000862,1.006905,1.006525,1.006597,1.010156
2,0.0,2,1.007129,1.003208,1.005578,1.013207,1.010717,0.98897,0.98781,0.9989,1.003168,0.998304,1.003197,0.998103,0.996237,0.997569,0.99544,0.997239
3,0.0,3,1.007129,1.003208,1.005578,1.013207,1.010717,0.98897,0.98781,0.9989,1.003168,0.998304,1.003197,0.998103,0.996237,0.997569,0.99544,0.997239
4,0.0,4,1.007129,1.003208,1.005578,1.013207,1.010717,0.98897,0.98781,0.9989,1.003168,0.998304,1.003197,0.998103,0.996237,0.997569,0.99544,0.997239
5,1.0,5,1.022073,0.977006,0.949219,0.991118,0.962158,0.983132,0.942736,0.994161,1.010526,1.004417,0.975026,1.000516,1.002356,1.000465,0.999541,0.994564


For the same instance of y = 1, we are keeping past 5 samples in the 3D predictor array, X.


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
0,8.0,0.990484,1.007319,1.018096,1.014062,1.008432,0.989583,0.981961,0.998545,1.001028,1.00747,0.997872,0.997619,1.002058,1.002425,1.002443,1.003806
1,9.0,1.003529,0.982265,0.97052,0.9855,0.969064,0.960632,0.96894,1.0,1.008763,0.995088,0.979694,0.998461,0.993005,0.992219,0.993652,0.989605
2,10.0,1.003529,0.982265,0.97052,0.9855,0.969064,0.960632,0.96894,1.0,1.008763,0.995088,0.979694,0.998461,0.993005,0.992219,0.993652,0.989605
3,11.0,1.003529,0.982265,0.97052,0.9855,0.969064,0.960632,0.96894,1.0,1.008763,0.995088,0.979694,0.998461,0.993005,0.992219,0.993652,0.989605
4,12.0,1.012328,0.99486,0.992038,0.979634,0.993913,0.993747,1.000684,1.000366,1.009231,1.00514,0.999636,1.006165,1.005132,1.004631,1.006186,1.000305


The two tables are the same. This testifies that we are correctly taking 5 samples (= lookback), X(t):X(t-5) to predict y(t).

In [0]:
X_train, X_test, y_train, y_test = train_test_split(array(X), array(y), test_size=test_size, random_state=random_seed)
X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size=test_size, random_state=random_seed)

In [53]:
X_train.shape

(3404, 5, 1, 17)

In [0]:
X_train_y0 = X_train[y_train==0]
X_train_y1 = X_train[y_train==1]
X_valid_y0 = X_valid[y_valid==0]
X_valid_y1 = X_valid[y_valid==1]

In [55]:
X_train_y0.shape

(3351, 5, 1, 17)

#### Reshaping the data
The tensors we have here are 4-dimensional. We will reshape them into the desired 3-dimensions corresponding to sample x lookback x features.

In [0]:
X_train = X_train.reshape(X_train.shape[0], lookback, n_features)
X_train_y0 = X_train_y0.reshape(X_train_y0.shape[0], lookback, n_features)
X_train_y1 = X_train_y1.reshape(X_train_y1.shape[0], lookback, n_features)

X_test = X_test.reshape(X_test.shape[0], lookback, n_features)

X_valid = X_valid.reshape(X_valid.shape[0], lookback, n_features)
X_valid_y0 = X_valid_y0.reshape(X_valid_y0.shape[0], lookback, n_features)
X_valid_y1 = X_valid_y1.reshape(X_valid_y1.shape[0], lookback, n_features)

### Standardize the data
It is usually better to use a standardized data (transformed to Gaussian, mean 0 and sd 1) for autoencoders.

One common mistake is: we normalize the entire data and then split into train-test. This is not correct. Test data should be completely unseen to anything during the modeling. We should normalize the test data using the feature summary statistics computed from the training data. For normalization, these statistics are the mean and variance for each feature. 

The same logic should be used for the validation set. This makes the model more stable for a test data.

To do this, we will require two UDFs.

- `flatten`: This function will re-create the original 2D array from which the 3D arrays were created. This function is the inverse of `temporalize`, meaning `X = flatten(temporalize(X))`.
- `scale`: This function will scale a 3D array that we created as inputs to the LSTM.

In [0]:
def flatten(X):
    '''
    Flatten a 3D array.
    Input        X            A 3D array for lstm, where the array is sample x timesteps x features.
    Output       flattened_X  A 2D array, sample x features.
    '''
    flattened_X = empty((X.shape[0], X.shape[2])) # Sample x features array.
    for i in range(X.shape[0]):
        flattened_X[i] = X[i, (X.shape[1]-1), :]
    return(flattened_X)

def scale(X, scaler):
    '''
    Scale 3D array.
    Inputs       X            A 3D array for lstm, where the array is sample x timesteps x features.
                 scaler       A scaler object, e.g., sklearn.preprocessing.StandardScaler, sklearn.preprocessing.normalize
    Output       X            Scaled 3D array.
    '''
    for i in range(X.shape[0]):
        X[i, :, :] = scaler.transform(X[i, :, :])
        
    return X

In [0]:
# Initialize a scaler using the training data.
scaler = StandardScaler().fit(flatten(X_train_y0))
X_train_y0_scaled = scale(X_train_y0, scaler)
X_train_y1_scaled = scale(X_train_y1, scaler)
X_train_scaled = scale(X_train, scaler)

In [59]:
# Test scaling validity.
a = flatten(X_train_y0_scaled)
print('Column-wise mean (should be all zeros):', mean(a, axis=0).round(6))
print('Column-wise variance (should be all ones):', var(a, axis=0))

Column-wise mean (should be all zeros): [ 0. -0.  0.  0.  0.  0. -0. -0.  0. -0. -0.  0. -0. -0.  0.  0.  0.]
Column-wise variance (should be all ones): [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]


In [0]:
# Scale test and validation sets.
X_valid_scaled = scale(X_valid, scaler)
X_valid_y0_scaled = scale(X_valid_y0, scaler)
X_test_scaled = scale(X_test, scaler)

## LSTM Autoencoder training

First we will initialize the Autoencoder architecture. We are building a simple autoencoder. More complex architectures and other configurations should be explored.

In [0]:
timesteps =  X_train_y0_scaled.shape[1] # Equal to the lookback.
n_features =  X_train_y0_scaled.shape[2] # 59.

In [62]:
lstm_autoencoder = Sequential()
# Encoder.
lstm_autoencoder.add(LSTM(32, activation='relu', input_shape=(timesteps, n_features), return_sequences=True))
lstm_autoencoder.add(LSTM(16, activation='relu', return_sequences=False))
lstm_autoencoder.add(RepeatVector(timesteps))
# Decoder.
lstm_autoencoder.add(LSTM(16, activation='relu', return_sequences=True))
lstm_autoencoder.add(LSTM(32, activation='relu', return_sequences=True))
lstm_autoencoder.add(TimeDistributed(Dense(n_features)))

lstm_autoencoder.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_5 (LSTM)                (None, 5, 32)             6400      
_________________________________________________________________
lstm_6 (LSTM)                (None, 16)                3136      
_________________________________________________________________
repeat_vector_2 (RepeatVecto (None, 5, 16)             0         
_________________________________________________________________
lstm_7 (LSTM)                (None, 5, 16)             2112      
_________________________________________________________________
lstm_8 (LSTM)                (None, 5, 32)             6272      
_________________________________________________________________
time_distributed_2 (TimeDist (None, 5, 17)             561       
Total params: 18,481
Trainable params: 18,481
Non-trainable params: 0
_________________________________________________________________


As a rule-of-thumb, look at the number of parameters. If not using any regularization, keep this less than the number of samples. If using regularization, depending on the degree of regularization you can let more parameters in the model that is greater than the sample size. For example, if using dropout with 0.5, you can have up to double the sample size (loosely speaking).

In [0]:
# Path to model weights (saved periodically).
filepath = path + 'LSTM_autoencoder.h5'

# Gradient descent optimization.
optimizer = Adam(lr=learning_rate, clipnorm=1., clipvalue=0.5)

# Training configuration.
lstm_autoencoder.compile(loss='mean_squared_error', optimizer=optimizer)

# Save model weights after each epoch if validation loss decreased.
checkpointer = ModelCheckpoint(filepath=filepath, save_best_only=True, verbose=1)

# Control learning rate schedule when validation is not improving.
reduce_lr = ReduceLROnPlateau(factor=0.1, patience=5, verbose=1, min_lr=learning_rate / 1000)

# Various graphics.
tbc = TensorBoardColab()

# Shouldn't happen.
term_on_NaN = TerminateOnNaN()

lstm_autoencoder_history = lstm_autoencoder.fit(X_train_y0_scaled, 
                                                X_train_y0_scaled, 
                                                epochs=epochs, 
                                                batch_size=batch_size, 
                                                validation_data=(X_valid_y0_scaled, 
                                                                 X_valid_y0_scaled), 
                                                callbacks=[checkpointer, 
                                                           reduce_lr, 
                                                           TensorBoardColabCallback(tbc), 
                                                           term_on_NaN], 
                                                verbose=1).history

Wait for 8 seconds...
TensorBoard link:
https://40d0e554.ngrok.io
Train on 3351 samples, validate on 839 samples
Epoch 1/200

Epoch 00001: val_loss improved from inf to 1.07687, saving model to /content/gdrive/My Drive/LSTM_autoencoder/LSTM_autoencoder.h5
Epoch 2/200

Epoch 00002: val_loss improved from 1.07687 to 1.04784, saving model to /content/gdrive/My Drive/LSTM_autoencoder/LSTM_autoencoder.h5
Epoch 3/200

Epoch 00003: val_loss improved from 1.04784 to 0.96710, saving model to /content/gdrive/My Drive/LSTM_autoencoder/LSTM_autoencoder.h5
Epoch 4/200

Epoch 00004: val_loss improved from 0.96710 to 0.93157, saving model to /content/gdrive/My Drive/LSTM_autoencoder/LSTM_autoencoder.h5
Epoch 5/200

Epoch 00005: val_loss improved from 0.93157 to 0.89889, saving model to /content/gdrive/My Drive/LSTM_autoencoder/LSTM_autoencoder.h5
Epoch 6/200

Epoch 00006: val_loss improved from 0.89889 to 0.86889, saving model to /content/gdrive/My Drive/LSTM_autoencoder/LSTM_autoencoder.h5
Epoch 7/2

In [0]:
plt.plot(lstm_autoencoder_history['loss'], linewidth=2, label='Train')
plt.plot(lstm_autoencoder_history['val_loss'], linewidth=2, label='Valid')
plt.legend(loc='upper right')
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.show()

### Sanity check
Doing a sanity check by validating the reconstruction error 
on the train data. Here we will reconstruct the entire train 
data with both 0 and 1 labels.

**Expectation**: the reconstruction error of 0 labeled data should
be smaller than 1.

**Caution**: do not use this result for model evaluation. It may
result into overfitting issues.

In [0]:
train_x_predictions = lstm_autoencoder.predict(X_train_scaled)
mse = mean(power(flatten(X_train_scaled) - flatten(train_x_predictions), 2), axis=1)

error_df = DataFrame({'Reconstruction_error': mse,
                       'True_class': y_train.tolist()})

groups = error_df.groupby('True_class')
fig, ax = plt.subplots()

for name, group in groups:
    ax.plot(group.index, group.Reconstruction_error, marker='o', ms=3.5, linestyle='',
            label = 'Break' if name == 1 else 'Normal')
ax.legend()
plt.title('Reconstruction error for different classes')
plt.ylabel('Reconstruction error')
plt.xlabel('Data point index')
plt.show();

## Predictions using the Autoencoder

In [0]:
valid_x_predictions = lstm_autoencoder.predict(X_valid_scaled)
mse = mean(power(flatten(X_valid_scaled) - flatten(valid_x_predictions), 2), axis=1)

error_df = DataFrame({'Reconstruction_error': mse,
                        'True_class': y_valid.tolist()})

precision_rt, recall_rt, threshold_rt = precision_recall_curve(error_df.True_class, error_df.Reconstruction_error)
plt.plot(threshold_rt, precision_rt[1:], label='Precision', linewidth=5)
plt.plot(threshold_rt, recall_rt[1:], label='Recall', linewidth=5)
plt.title('Precision and recall for different threshold values')
plt.xlabel('Threshold')
plt.ylabel('Precision/Recall')
plt.legend()
plt.show()

In [0]:
test_x_predictions = lstm_autoencoder.predict(X_test_scaled)
mse = mean(power(flatten(X_test_scaled) - flatten(test_x_predictions), 2), axis=1)

error_df = DataFrame({'Reconstruction_error': mse,
                      'True_class': y_test.tolist()})

threshold_fixed = 0.3
groups = error_df.groupby('True_class')
fig, ax = plt.subplots()

for name, group in groups:
    ax.plot(group.index, group.Reconstruction_error, marker='o', ms=3.5, linestyle='',
            label= 'Break' if name == 1 else 'Normal')
ax.hlines(threshold_fixed, ax.get_xlim()[0], ax.get_xlim()[1], colors='r', zorder=100, label='Threshold')
ax.legend()
plt.title('Reconstruction error for different classes')
plt.ylabel('Reconstruction error')
plt.xlabel('Data point index')
plt.show();

In [0]:
pred_y = [1 if e > threshold_fixed else 0 for e in error_df.Reconstruction_error.values]

In [0]:
conf_matrix = confusion_matrix(error_df.True_class, pred_y)

plt.figure(figsize=(6, 6))
sns.heatmap(conf_matrix, xticklabels=LABELS, yticklabels=LABELS, annot=True, fmt='d');
plt.title('Confusion matrix')
plt.ylabel('True class')
plt.xlabel('Predicted class')
plt.show()

In [0]:
false_pos_rate, true_pos_rate, thresholds = roc_curve(error_df.True_class, error_df.Reconstruction_error)
roc_auc = auc(false_pos_rate, true_pos_rate,)

plt.plot(false_pos_rate, true_pos_rate, linewidth=5, label='AUC = %0.3f'% roc_auc)
plt.plot([0,1],[0,1], linewidth=5)

plt.xlim([-0.01, 1])
plt.ylim([0, 1.01])
plt.legend(loc='lower right')
plt.title('Receiver operating characteristic curve (ROC)')
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.show()