In [1]:
### Limit the amount of memory for each session

import tensorflow as tf
from keras.backend.tensorflow_backend import set_session
config = tf.ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = 0.1
set_session(tf.Session(config=config))


Using TensorFlow backend.


In [2]:
'''Trains a LSTM on the IMDB sentiment classification task.
The dataset is actually too small for LSTM to be of any advantage
compared to simpler, much faster methods such as TF-IDF + LogReg.
Notes:
- RNNs are tricky. Choice of batch size is important,
choice of loss and optimizer is critical, etc.
Some configurations won't converge.
- LSTM loss decrease patterns during training can be quite different
from what you see with CNNs/MLPs/etc.
'''
from __future__ import print_function

from keras.preprocessing import sequence
from keras.models import Sequential
from keras.layers import Dense, Embedding, LSTM, Dropout, Activation, Conv1D, GlobalMaxPooling1D,GlobalAveragePooling1D
from keras.datasets import imdb

In [3]:
import keras
keras.__version__

'2.0.8'

In [8]:
max_features = 20000
embedding_dims=128
maxlen = 80  # cut texts after this number of words (among top max_features most common words)
batch_size = 32

print('Loading data...')
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)
print(len(x_train), 'train sequences')
print(len(x_test), 'test sequences')

print('Pad sequences (samples x time)')
x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
x_test = sequence.pad_sequences(x_test, maxlen=maxlen)
print('x_train shape:', x_train.shape)
print('x_test shape:', x_test.shape)

Loading data...
25000 train sequences
25000 test sequences
Pad sequences (samples x time)
x_train shape: (25000, 80)
x_test shape: (25000, 80)


In [10]:
x_train=x_train[:5000]
y_train=y_train[:5000]
x_test=x_test[:5000]
y_test=y_test[:5000]

In [11]:
print('Build model...')
model = Sequential()
model.add(Embedding(max_features, 128))
model.add(LSTM(128, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(1, activation='sigmoid'))



Build model...


In [12]:
# try using different optimizers and different optimizer configs
model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_3 (Embedding)      (None, None, 128)         2560000   
_________________________________________________________________
lstm_3 (LSTM)                (None, 128)               131584    
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 129       
Total params: 2,691,713
Trainable params: 2,691,713
Non-trainable params: 0
_________________________________________________________________


In [14]:
print('Train...')
model.fit(x_train, y_train, batch_size=batch_size, epochs=5,validation_data=(x_test, y_test))

Train...
Train on 5000 samples, validate on 25000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x11cada400>

In [15]:
score, acc = model.evaluate(x_test, y_test,batch_size=batch_size)
print('Test score:', score)
print('Test accuracy:', acc)

Test score: 0.790746094265
Test accuracy: 0.78088


In [16]:
d=model.layers[0]

In [17]:
help(d)

Help on Embedding in module keras.layers.embeddings object:

class Embedding(keras.engine.topology.Layer)
 |  Turns positive integers (indexes) into dense vectors of fixed size.
 |  eg. [[4], [20]] -> [[0.25, 0.1], [0.6, -0.2]]
 |  
 |  This layer can only be used as the first layer in a model.
 |  
 |  # Example
 |  
 |  ```python
 |    model = Sequential()
 |    model.add(Embedding(1000, 64, input_length=10))
 |    # the model will take as input an integer matrix of size (batch, input_length).
 |    # the largest integer (i.e. word index) in the input should be no larger than 999 (vocabulary size).
 |    # now model.output_shape == (None, 10, 64), where None is the batch dimension.
 |  
 |    input_array = np.random.randint(1000, size=(32, 10))
 |  
 |    model.compile('rmsprop', 'mse')
 |    output_array = model.predict(input_array)
 |    assert output_array.shape == (32, 10, 64)
 |  ```
 |  
 |  # Arguments
 |    input_dim: int > 0. Size of the vocabulary,
 |        i.e. maximum in

In [18]:
wd=imdb.get_word_index()

Downloading data from https://s3.amazonaws.com/text-datasets/imdb_word_index.json

In [19]:
revwd=dict( (p,k) for k,p in wd.items())

In [20]:
" ".join(revwd[i-3] for i in x_test[10] if i >2)

"plays the role so deliciously nolan plays the other woman of the house the housekeeper who herself on her talents and sloppy and often typical behavior with his cigar martin landau plays identical twins in this one each who accuse the other of murdering their uncle for money well you'll just have to watch and see the outcome but i can assure you that it's always worth watching this one for the cast and the crew"

In [21]:
filters = 250
kernel_size = 3
hidden_dims = 250

model = Sequential()

# we start off with an efficient embedding layer which maps
# our vocab indices into embedding_dims dimensions
model.add(Embedding(max_features,
                    embedding_dims,
                    input_length=maxlen))
model.add(Dropout(0.2))

# we add a Convolution1D, which will learn filters
# word group filters of size filter_length:
model.add(Conv1D(filters,
                 kernel_size,
                 padding='valid',
                 activation='relu',
                 strides=1))
# we use max pooling:
model.add(GlobalMaxPooling1D())

# We add a vanilla hidden layer:
model.add(Dense(hidden_dims))
model.add(Dropout(0.2))
model.add(Activation('relu'))

# We project onto a single unit output layer, and squash it with a sigmoid:
model.add(Dense(1))
model.add(Activation('sigmoid'))



In [22]:
filters = 250
kernel_size = 3
hidden_dims = 250

model = Sequential()

# we start off with an efficient embedding layer which maps
# our vocab indices into embedding_dims dimensions
model.add(Embedding(max_features,
                    embedding_dims,
                    input_length=maxlen))
model.add(Dropout(0.2))

# we add a Convolution1D, which will learn filters
# word group filters of size filter_length:
model.add(Conv1D(filters,
                 kernel_size,
                 padding='valid',
                 activation='relu',
                 strides=1))
model.add(Dropout(0.2))
model.add(Conv1D(filters,
                 2,
                 padding='valid',
                 activation='relu',
                 strides=1))
# we use max pooling:
model.add(GlobalMaxPooling1D())


# We add a vanilla hidden layer:
model.add(Dense(hidden_dims))
model.add(Dropout(0.2))
model.add(Activation('relu'))

# We project onto a single unit output layer, and squash it with a sigmoid:
model.add(Dense(1))
model.add(Activation('sigmoid'))

In [23]:
print('Build model...')
model = Sequential()

# we start off with an efficient embedding layer which maps
# our vocab indices into embedding_dims dimensions
model.add(Embedding(max_features,
                    embedding_dims,
                    input_length=maxlen))

# we add a GlobalAveragePooling1D, which will average the embeddings
# of all words in the document
model.add(GlobalAveragePooling1D())

# We project onto a single unit output layer, and squash it with a sigmoid:
model.add(Dense(1, activation='sigmoid'))


Build model...


In [24]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_6 (Embedding)      (None, 80, 128)           2560000   
_________________________________________________________________
global_average_pooling1d_1 ( (None, 128)               0         
_________________________________________________________________
dense_8 (Dense)              (None, 1)                 129       
Total params: 2,560,129
Trainable params: 2,560,129
Non-trainable params: 0
_________________________________________________________________
