In [1]:
import numpy as np
from keras.preprocessing import sequence
from keras.models import Sequential
from keras.layers import Dense, Activation, Embedding
from keras.layers import LSTM, SpatialDropout1D
from keras.datasets import imdb

Using TensorFlow backend.


In [2]:
np.random.seed(42)

In [3]:
# Максимальное кол-во слов по частоте использования
max_features = 5000
# Максимальная длина рецензии в словах
maxlen = 80

In [4]:
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=max_features)

## Предварительная обработка рецензий

In [5]:
X_train = sequence.pad_sequences(X_train, maxlen=maxlen)
X_test = sequence.pad_sequences(X_test, maxlen=maxlen)

## Создаем нейронную сеть

In [6]:
model = Sequential()
# Слой для векторного представления слов
model.add(Embedding(max_features, 32))
model.add(SpatialDropout1D(0.2))
# Слой долго-краткосрочной памяти
model.add(LSTM(100, dropout=0.2, recurrent_dropout=0.2)) 
# Полносвязный слой
model.add(Dense(1, activation="sigmoid"))

In [7]:
model.compile(loss='binary_crossentropy',
             optimizer='adam',
             metrics=['accuracy'])

In [8]:
print(model.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, None, 32)          160000    
_________________________________________________________________
spatial_dropout1d_1 (Spatial (None, None, 32)          0         
_________________________________________________________________
lstm_1 (LSTM)                (None, 100)               53200     
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 101       
Total params: 213,301
Trainable params: 213,301
Non-trainable params: 0
_________________________________________________________________
None


In [10]:
model.fit(X_train, y_train, batch_size=64, epochs=7,
         validation_data=(X_test, y_test), verbose=1)

Train on 25000 samples, validate on 25000 samples
Epoch 1/7
Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7


<keras.callbacks.History at 0x230077e7dd8>

In [12]:
scores = model.evaluate(X_test, y_test,
                        batch_size=64)
print("Точность на тестовых данных: %.2f%%" % (scores[1] * 100))

Точность на тестовых данных: 83.09%


In [13]:
model_json = model.to_json()
with open("imdb_model.json", "w") as f:
    f.write(model_json)

model.save_weights("imdb_model.h5")