<a href="https://colab.research.google.com/github/fastforwardlabs/anomaly_detection/blob/master/seq2seq/Seq2SeqAnomalyDetection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
!wget http://www.timeseriesclassification.com/Downloads/ECG5000.zip

!mkdir data
!mkdir models
!mv ECG5000.zip data/ECG5000.zip
!unzip data/ECG5000.zip -d data/

Archive:  data/ECG5000.zip
  inflating: data/ECG5000.txt        
  inflating: data/ECG5000_TEST.arff  
  inflating: data/ECG5000_TEST.ts    
  inflating: data/ECG5000_TEST.txt   
  inflating: data/ECG5000_TRAIN.arff  
  inflating: data/ECG5000_TRAIN.ts   
  inflating: data/ECG5000_TRAIN.txt  


In [3]:
from keras.layers import Input, LSTM, Dense, Bidirectional, Concatenate
from keras.models import Model
from keras.optimizers import Adam
import numpy as np

def model(n_features, encoder_dim = [20], decoder_dim = [20], dropout=0., learning_rate=.001, 
          loss='mean_squared_error', output_activation='sigmoid'):
    """ Build seq2seq model.
    
    Arguments:
        - n_features (int): number of features in the data
        - encoder_dim (list): list with number of units per encoder layer
        - decoder_dim (list): list with number of units per decoder layer
        - dropout (float): dropout for LSTM units
        - learning_rate (float): learning rate used during training
        - loss (str): loss function used
        - output_activation (str): activation type for the dense output layer in the decoder
    """
    
    enc_dim = len(encoder_dim)
    dec_dim = len(decoder_dim)
    
    # seq2seq = encoder + decoder
    # encoder
    encoder_hidden = encoder_inputs = Input(shape=(None, n_features), name='encoder_input')
    
    # add encoder hidden layers
    encoder_lstm = []
    for i in range(enc_dim-1):
        encoder_lstm.append(Bidirectional(LSTM(encoder_dim[i], dropout=dropout, 
                                               return_sequences=True,name='encoder_lstm_' + str(i))))
        encoder_hidden = encoder_lstm[i](encoder_hidden)
    
    encoder_lstm.append(Bidirectional(LSTM(encoder_dim[-1], dropout=dropout, return_state=True, 
                                           name='encoder_lstm_' + str(enc_dim-1))))
    encoder_outputs, forward_h, forward_c, backward_h, backward_c = encoder_lstm[-1](encoder_hidden)
    
    # only need to keep encoder states
    state_h = Concatenate()([forward_h, backward_h])
    state_c = Concatenate()([forward_c, backward_c])
    encoder_states = [state_h, state_c]
    
    # decoder
    decoder_hidden = decoder_inputs = Input(shape=(None, n_features), name='decoder_input')
    
    # add decoder hidden layers
    # check if dimensions are correct
    dim_check = [(idx,dim) for idx,dim in enumerate(decoder_dim) if dim!=encoder_dim[-1]*2]
    if len(dim_check)>0:
        raise ValueError('\nDecoder (layer,units) {0} is not compatible with encoder hidden ' \
                         'states. Units should be equal to {1}'.format(dim_check,encoder_dim[-1]*2))
    
    # initialise decoder states with encoder states
    decoder_lstm = []
    for i in range(dec_dim):
        decoder_lstm.append(LSTM(decoder_dim[i], dropout=dropout, return_sequences=True,
                                 return_state=True, name='decoder_lstm_' + str(i)))
        decoder_hidden, _, _ = decoder_lstm[i](decoder_hidden, initial_state=encoder_states)
    
    # add linear layer on top of LSTM
    decoder_dense = Dense(n_features, activation=output_activation, name='dense_output')
    decoder_outputs = decoder_dense(decoder_hidden)
    
    # define seq2seq model
    model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
    optimizer = Adam(lr=learning_rate)
    model.compile(optimizer=optimizer, loss=loss)
    
    # define encoder model returning encoder states
    encoder_model = Model(encoder_inputs, encoder_states * dec_dim)

    # define decoder model
    # need state inputs for each LSTM layer
    decoder_states_inputs = []
    for i in range(dec_dim):
        decoder_state_input_h = Input(shape=(decoder_dim[i],), name='decoder_state_input_h_' + str(i))
        decoder_state_input_c = Input(shape=(decoder_dim[i],), name='decoder_state_input_c_' + str(i))
        decoder_states_inputs.append([decoder_state_input_h, decoder_state_input_c])
    decoder_states_inputs = [state for states in decoder_states_inputs for state in states]
    
    decoder_inference = decoder_inputs
    decoder_states = []
    for i in range(dec_dim):
        decoder_inference, state_h, state_c = decoder_lstm[i](decoder_inference, 
                                                              initial_state=decoder_states_inputs[2*i:2*i+2])
        decoder_states.append([state_h,state_c])
    decoder_states = [state for states in decoder_states for state in states]
    
    decoder_outputs = decoder_dense(decoder_inference)
    decoder_model = Model([decoder_inputs] + decoder_states_inputs,
                          [decoder_outputs] + decoder_states)
    
    return model, encoder_model, decoder_model

Using TensorFlow backend.


In [0]:
import argparse
from keras.callbacks import ModelCheckpoint
import numpy as np
import pandas as pd
import pickle
import random
from scipy.io import arff

# from model import model

np.random.seed(2018)
np.random.RandomState(2018)
random.seed(2018)

# default args
DATASET = './data/ECG5000_TEST.arff'
SAVE_PATH = './models/'
MODEL_NAME = 'seq2seq'
DATA_RANGE = [0,2627]

# data preprocessing
STANDARDIZED = False
MINMAX = False
CLIP = [99999]

# architecture
TIMESTEPS = 140 # length of 1 ECG
ENCODER_DIM = [20]
DECODER_DIM = [40]
OUTPUT_ACTIVATION = 'sigmoid'

# training
EPOCHS = 100
BATCH_SIZE = 32
LEARNING_RATE = .005
LOSS = 'mean_squared_error'
DROPOUT = 0.
VALIDATION_SPLIT = 0.2
SAVE = True
PRINT_PROGRESS = True
CONTINUE_TRAINING = False
LOAD_PATH = SAVE_PATH

def train(model,X,args):
    """ Train seq2seq-LSTM model. """
    
    # clip data per feature
    for col,clip in enumerate(args.clip):
        X[:,:,col] = np.clip(X[:,:,col],-clip,clip)
    
    # apply scaling and save data preprocessing method
    axis = (0,1) # scaling per feature over all observations
    if args.standardized:
        print('\nStandardizing data')
        mu, sigma = np.mean(X,axis=axis), np.std(X,axis=axis)
        X = (X - mu) / (sigma + 1e-10)
        
        with open(args.save_path + 'preprocess_' + args.model_name + '.pickle', 'wb') as f:
            pickle.dump(['standardized',args.clip,axis,mu,sigma], f)
    
    if args.minmax:
        print('\nMinmax scaling of data')
        xmin, xmax = X.min(axis=axis), X.max(axis=axis)
        min, max = 0, 1
        X = ((X - xmin) / (xmax - xmin)) * (max - min) + min
        
        with open(args.save_path + 'preprocess_' + args.model_name + '.pickle', 'wb') as f:
            pickle.dump(['minmax',args.clip,axis,xmin,xmax,min,max], f)
            
    # define inputs
    encoder_input_data = X
    decoder_input_data = X
    decoder_target_data = np.roll(X, -1, axis=1) # offset decoder_input_data by 1 across time axis

    # set training arguments
    if args.print_progress:
        verbose = 1
    else:
        verbose = 0

    kwargs = {}
    kwargs['epochs'] = args.epochs
    kwargs['batch_size'] = args.batch_size
    kwargs['shuffle'] = True
    kwargs['validation_split'] = args.validation_split
    kwargs['verbose'] = verbose

    if args.save: # create callback
        print('\nSave stuff')
        checkpointer = ModelCheckpoint(filepath=args.save_path + args.model_name + '_weights.h5',verbose=0,
                                       save_best_only=True,save_weights_only=True)
        kwargs['callbacks'] = [checkpointer]
        
        # save model architecture
        with open(args.save_path + args.model_name + '.pickle', 'wb') as f:
            pickle.dump([X.shape[1],X.shape[2],args.encoder_dim,
                         args.decoder_dim,args.output_activation],f)
    
    model.fit([encoder_input_data, decoder_input_data], decoder_target_data, **kwargs)

def run(args):
    """ Load data, generate training batch, initiate and train model. """
    
    print('\nLoad dataset')
    data = arff.loadarff(args.dataset)
    data = pd.DataFrame(data[0])
    data.drop(columns='target',inplace=True)
    
    print('\nGenerate training batch')
    args.n_features = 1 # only 1 feature in the ECG dataset
    X = data.values[args.data_range[0]:args.data_range[1],:]
    X = np.reshape(X, (X.shape[0],X.shape[1],args.n_features))
    
    print('\nInitiate outlier detector model')
    s2s, enc, dec = model(args.n_features,encoder_dim=args.encoder_dim,decoder_dim=args.decoder_dim,
                          dropout=args.dropout,learning_rate=args.learning_rate,loss=args.loss,
                          output_activation=args.output_activation)
    
    if args.continue_training:
        print('\nLoad pre-trained model')
        s2s.load_weights(args.load_path + args.model_name + '_weights.h5') # load pretrained model weights
    
    if args.print_progress:
        s2s.summary()
    
    print('\nTrain outlier detector')
    train(s2s,X,args)
    
# if __name__ == '__main__':
import sys; sys.argv=['']; del sys
    
parser = argparse.ArgumentParser(description="Train seq2seq-LSTM outlier detector.")
parser.add_argument('--dataset',type=str,choices=DATASET,default=DATASET)
parser.add_argument('--data_range',type=int,nargs=2,default=DATA_RANGE)
parser.add_argument('--timesteps',type=int,default=TIMESTEPS)
parser.add_argument('--encoder_dim',type=int,nargs='+',default=ENCODER_DIM)
parser.add_argument('--decoder_dim',type=int,nargs='+',default=DECODER_DIM)
parser.add_argument('--output_activation',type=str,default=OUTPUT_ACTIVATION)
parser.add_argument('--dropout',type=float,default=DROPOUT)
parser.add_argument('--learning_rate',type=float,default=LEARNING_RATE)
parser.add_argument('--loss',type=str,default=LOSS)
parser.add_argument('--validation_split',type=float,default=VALIDATION_SPLIT)
parser.add_argument('--epochs',type=int,default=EPOCHS)
parser.add_argument('--batch_size',type=int,default=BATCH_SIZE)
parser.add_argument('--clip',type=float,nargs='+',default=CLIP)
parser.add_argument('--standardized', default=STANDARDIZED, action='store_true')
parser.add_argument('--minmax', default=MINMAX, action='store_true')
parser.add_argument('--print_progress', default=PRINT_PROGRESS, action='store_true')
parser.add_argument('--save', default=SAVE, action='store_true')
parser.add_argument('--save_path',type=str,default=SAVE_PATH)
parser.add_argument('--load_path',type=str,default=LOAD_PATH)
parser.add_argument('--model_name',type=str,default=MODEL_NAME)
parser.add_argument('--continue_training', default=CONTINUE_TRAINING, action='store_true')
args = parser.parse_args()

run(args)

In [9]:
for col,clip in enumerate(args.clip):
  print(col,clip)

0 99999


In [0]:
s2s, enc, dec = model(args.n_features,encoder_dim=args.encoder_dim,decoder_dim=args.decoder_dim,
                          dropout=args.dropout,learning_rate=args.learning_rate,loss=args.loss,
                          output_activation=args.output_activation)
s2s.load_weights(SAVE_PATH + MODEL_NAME + '_weights.h5')



In [15]:
data = arff.loadarff(args.dataset)
data = pd.DataFrame(data[0])
data.drop(columns='target',inplace=True)

print('\nGenerate training batch')
args.n_features = 1 # only 1 feature in the ECG dataset
X = data.values[args.data_range[0]:args.data_range[1],:]
X = np.reshape(X, (X.shape[0],X.shape[1],args.n_features))
s2s.predict(X[0])


Generate training batch


ValueError: ignored