# Sequence Classification with LSTM

In [1]:
import numpy as np
from keras.datasets import imdb
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout, Flatten
from keras.layers.embeddings import Embedding
from keras.preprocessing import sequence

Using Theano backend.


In [2]:
# fix random seed
seed = 7
np.random.seed(seed)

In [4]:
# load dataset, only keep the top 5000 words, zero the rest
top_words = 5000
(X_train, Y_train), (X_val, Y_val) = imdb.load_data(nb_words=top_words)

# pad input sequence
maxlen = 500
X_train = sequence.pad_sequences(X_train, maxlen=maxlen)
X_val = sequence.pad_sequences(X_val, maxlen=maxlen)

In [5]:
Y_train = Y_train.reshape(Y_train.shape[0], 1)
Y_val = Y_val.reshape(Y_train.shape[0], 1)

In [13]:
# define and build a model
def create_model():
    model = Sequential()
    model.add(Embedding(top_words, 32, input_length=maxlen, dropout=0.2))
    model.add(LSTM(64, stateful=False, return_sequences=True))
    model.add(Dropout(0.2))
    model.add(Flatten())
    model.add(Dense(32, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    print(model.summary())
    return model

lstm = create_model()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
embedding_5 (Embedding)          (None, 500, 32)       160000      embedding_input_5[0][0]          
____________________________________________________________________________________________________
lstm_5 (LSTM)                    (None, 500, 64)       24832       embedding_5[0][0]                
____________________________________________________________________________________________________
dropout_2 (Dropout)              (None, 500, 64)       0           lstm_5[0][0]                     
____________________________________________________________________________________________________
flatten_2 (Flatten)              (None, 32000)         0           dropout_2[0][0]                  
___________________________________________________________________________________________

In [14]:
# train model
lstm.fit(X_train, Y_train, validation_data=(X_val, Y_val), batch_size=64, nb_epoch=3, verbose=1)

Train on 25000 samples, validate on 25000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x11146f750>

In [16]:
# evaluate mode
scores = lstm.evaluate(X_val, Y_val)
print("Acc: %.2f%%"%(scores[1]*100))

Acc: 87.46%
