In [3]:
import numpy
from tensorflow.keras.datasets import imdb
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Embedding
from tensorflow.keras.preprocessing import sequence
# fix random seed for reproducibility
import tensorflow as tf
numpy.random.seed(7)

In [4]:
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
  # Restrict TensorFlow to only allocate 2 GB of memory on the first GPU
  try:
    tf.config.experimental.set_virtual_device_configuration(
        gpus[0],
        [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=2000)]) # limit in megabytes
    logical_gpus = tf.config.experimental.list_logical_devices('GPU')
    print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
  except RuntimeError as e:
    # Virtual devices must be set before GPUs have been initialized
    print(e)

1 Physical GPUs, 1 Logical GPUs


In [5]:
# load the dataset but only keep the top n words, zero the rest
top_words = 5000
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=top_words)

  x_train, y_train = np.array(xs[:idx]), np.array(labels[:idx])
  x_test, y_test = np.array(xs[idx:]), np.array(labels[idx:])


In [6]:

# truncate and pad input sequences
max_review_length = 500
X_train = sequence.pad_sequences(X_train, maxlen=max_review_length)
X_test = sequence.pad_sequences(X_test, maxlen=max_review_length)

In [7]:
y_train.shape

(25000,)

In [16]:
X_train.shape
X_train[3]


array([ 687,   23,    4,    2,    2,    6, 3693,   42,   38,   39,  121,
         59,  456,   10,   10,    7,  265,   12,  575,  111,  153,  159,
         59,   16, 1447,   21,   25,  586,  482,   39,    4,   96,   59,
        716,   12,    4,  172,   65,    9,  579,   11,    2,    4, 1615,
          5,    2,    7,    2,   17,   13,    2,   12,   19,    6,  464,
         31,  314,   11,    2,    6,  719,  605,   11,    8,  202,   27,
        310,    4, 3772, 3501,    8, 2722,   58,   10,   10,  537, 2116,
        180,   40,   14,  413,  173,    7,  263,  112,   37,  152,  377,
          4,  537,  263,  846,  579,  178,   54,   75,   71,  476,   36,
        413,  263, 2504,  182,    5,   17,   75, 2306,  922,   36,  279,
        131, 2895,   17, 2867,   42,   17,   35,  921,    2,  192,    5,
       1219, 3890,   19,    2,  217, 4122, 1710,  537,    2, 1236,    5,
        736,   10,   10,   61,  403,    9,    2,   40,   61, 4494,    5,
         27, 4494,  159,   90,  263, 2311, 4319,  3

In [9]:
embedding_vecor_length = 32
model = Sequential()
model.add(Embedding(top_words, embedding_vecor_length, input_length=max_review_length))
model.add(LSTM(100))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model.summary())
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=3, batch_size=64)

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 500, 32)           160000    
_________________________________________________________________
lstm (LSTM)                  (None, 100)               53200     
_________________________________________________________________
dense (Dense)                (None, 1)                 101       
Total params: 213,301
Trainable params: 213,301
Non-trainable params: 0
_________________________________________________________________
None
Epoch 1/3
Epoch 2/3
Epoch 3/3


<tensorflow.python.keras.callbacks.History at 0x7f440ef7c580>

In [10]:

# Final evaluation of the model
scores = model.evaluate(X_test, y_test, verbose=0)
print("Accuracy: %.2f%%" % (scores[1]*100))

Accuracy: 87.04%
