# Feature Engineering through LSTM Autoencoders

In [1]:
import tensorflow as tf
import numpy as np

## Static Feature Extraction

In [2]:
sequence = np.array([[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]]) # reshape input into [samples, timesteps, features]

In [3]:
n_features = len(sequence[0])

In [4]:
sequence = sequence.reshape((len(sequence), n_features, 1))

In [5]:
sequence.shape

(1, 9, 1)

## Encoder-Decoder Definition

In [7]:
n_emb = 100

In [8]:
x = tf.keras.Input(shape=(n_features, 1,))
y = tf.keras.layers.LSTM(n_emb, activation='relu')(x)
encoder = tf.keras.models.Model(inputs=[x], outputs=[y], name='encoder')

In [9]:
encoder.summary()

Model: "encoder"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         [(None, 9, 1)]            0         
_________________________________________________________________
lstm (LSTM)                  (None, 100)               40800     
Total params: 40,800
Trainable params: 40,800
Non-trainable params: 0
_________________________________________________________________


In [11]:
x = tf.keras.Input(shape=(n_emb,))
y = tf.keras.layers.RepeatVector(n_features)(x)
y = tf.keras.layers.LSTM(n_emb, activation='relu', return_sequences=True)(y)
y = tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(1))(y)
decoder = tf.keras.models.Model(inputs=[x], outputs=[y], name='decoder')

In [12]:
decoder.summary()

Model: "decoder"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_4 (InputLayer)         [(None, 100)]             0         
_________________________________________________________________
repeat_vector (RepeatVector) (None, 9, 100)            0         
_________________________________________________________________
lstm_1 (LSTM)                (None, 9, 100)            80400     
_________________________________________________________________
time_distributed (TimeDistri (None, 9, 1)              101       
Total params: 80,501
Trainable params: 80,501
Non-trainable params: 0
_________________________________________________________________


In [13]:
model = tf.keras.models.Model(inputs=[encoder.input], outputs=[decoder(encoder.output)], name='encoder-decoder')

In [14]:
model.summary()

Model: "encoder-decoder"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         [(None, 9, 1)]            0         
_________________________________________________________________
lstm (LSTM)                  (None, 100)               40800     
_________________________________________________________________
decoder (Model)              (None, 9, 1)              80501     
Total params: 121,301
Trainable params: 121,301
Non-trainable params: 0
_________________________________________________________________


## Model Training

In [16]:
n_epochs = 300

In [17]:
model.compile(optimizer='adam', loss='mse')

In [18]:
model.fit(sequence, sequence, epochs=n_epochs, verbose=0)

<tensorflow.python.keras.callbacks.History at 0x2b904e689b0>

## Model Prediction

In [19]:
yrec = model.predict(sequence, verbose=0)

In [20]:
print(yrec[0,:,0])

[0.11004128 0.20612499 0.30250552 0.3993719  0.49703354 0.59586954
 0.69636166 0.79909474 0.90476537]


## Example Encoder Usage

In [21]:
yemb = encoder.predict(sequence, verbose=0)

In [22]:
print(yemb[0,:])

[4.05242480e-02 0.00000000e+00 0.00000000e+00 0.00000000e+00
 1.08890153e-01 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 3.81078832e-02 1.09215647e-01 0.00000000e+00 0.00000000e+00
 1.33222342e-01 0.00000000e+00 0.00000000e+00 3.65813039e-02
 0.00000000e+00 0.00000000e+00 1.03376940e-01 0.00000000e+00
 4.70779603e-03 0.00000000e+00 0.00000000e+00 7.33726174e-02
 0.00000000e+00 0.00000000e+00 0.00000000e+00 8.87544975e-02
 8.15993920e-02 0.00000000e+00 0.00000000e+00 3.36888507e-02
 6.12602532e-02 8.11658055e-02 0.00000000e+00 0.00000000e+00
 0.00000000e+00 2.01341640e-02 0.00000000e+00 7.55051970e-02
 2.41826512e-02 9.24694240e-02 0.00000000e+00 0.00000000e+00
 4.63189408e-02 0.00000000e+00 1.55089656e-04 1.27178490e-01
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 1.16371885e-01 0.00000000e+00 9.65901744e-03 0.00000000e+00
 0.00000000e+00 1.68051049e-02 2.29653828e-02 0.00000000e+00
 0.00000000e+00 1.645142