In [1]:
from __future__ import absolute_import, division, print_function, unicode_literals

from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Embedding, Dropout, LSTM
from tensorflow.keras.datasets import imdb
import numpy as np
from keras.datasets import reuters
from keras.utils.np_utils import to_categorical

import os

os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "4"

In [2]:
# (x_train, y_train), (x_test, y_test) = reuters.load_data(path="reuters.npz",
#                                                          num_words=None,
#                                                          skip_top=0,
#                                                          maxlen=None,
#                                                          test_split=0.2,
#                                                          seed=113,
#                                                          start_char=1,
#                                                          oov_char=2,
#                                                          index_from=3)

(X_train_seq, Y_train), (X_test_seq, Y_test) = reuters.load_data()

In [3]:
vocabulary_size = -1

for seq in X_train_seq:
    max_index = max(seq)
    if max_index > vocabulary_size:
        vocabulary_size = max_index

for seq in X_test_seq:
    max_index = max(seq)
    if max_index > vocabulary_size:
        vocabulary_size = max_index

vocabulary_size += 1

X_train = sequence.pad_sequences(X_train_seq, maxlen=100)
X_test = sequence.pad_sequences(X_test_seq, maxlen=100)

Y_train = to_categorical(Y_train)
Y_test = to_categorical(Y_test)

In [4]:
print(np.shape(X_train_seq), np.shape(Y_train), np.shape(X_test_seq), np.shape(Y_test))
print(np.shape(X_train), np.shape(X_test))

(8982,) (8982, 46) (2246,) (2246, 46)
(8982, 100) (2246, 100)


In [5]:
model = Sequential()
model.add(Embedding(output_dim=300, input_dim=vocabulary_size, input_length=100))

model.add(LSTM(256,return_sequences=True))  #返回所有节点的输出
model.add(LSTM(128,return_sequences=True))  #返回所有节点的输出
# model.add(LSTM(64,return_sequences=True))  #返回最后一个节点的输出
# model.add(Flatten())
# model.add(Dense(64, activation='softmax'))


model.add(Dense(1024, activation='relu'))
model.add(Dropout(0.5))
model.add(Flatten())
model.add(Dense(46, activation='softmax'))

model.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy'])
# model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy'])
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 100, 300)          9294600   
_________________________________________________________________
lstm (LSTM)                  (None, 100, 256)          570368    
_________________________________________________________________
lstm_1 (LSTM)                (None, 100, 128)          197120    
_________________________________________________________________
dense (Dense)                (None, 100, 1024)         132096    
_________________________________________________________________
dropout (Dropout)            (None, 100, 1024)         0         
_________________________________________________________________
flatten (Flatten)            (None, 102400)            0         
_________________________________________________________________
dense_1 (Dense)              (None, 46)                4

In [6]:
model.fit(X_train, Y_train, epochs=10, batch_size=128)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f9a4f0bc9b0>

In [7]:
model.evaluate(X_train,Y_train)



[0.09753095358610153, 0.9654865264892578]

In [8]:
model.evaluate(X_test,Y_test)



[1.8723424673080444, 0.7003561854362488]