In [1]:
%env KERAS_BACKEND tensorflow
import keras
from keras.datasets import reuters
from keras.layers import Activation, Dense, Dropout
from keras.models import Sequential
from keras.preprocessing.text import Tokenizer
import numpy

env: KERAS_BACKEND=tensorflow


Using TensorFlow backend.


In [2]:
max_words = 2048
batch_size = 64
epochs = 8

In [3]:
print('loading data ...')
(x_train, y_train), (x_test, y_test) = reuters.load_data(num_words = max_words, test_split = 0.3)
print('train sequences: %d; test sequences: %d' % (len(x_train), len(x_test)))
num_classes = numpy.max(y_train) + 1
print('classes: %d\n' % (num_classes))

loading data ...
train sequences: 7859; test sequences: 3369
classes: 46



In [4]:
print('x train shape: %s; x test shape: %s' % (x_train.shape, x_test.shape))
print('x train head: %s' % (x_train[0]))
print('x test head: %s' % (x_test[0]))
print('y train shape: %s; y test shape: %s' % (y_train.shape, y_test.shape))
print('y train head: %s' % (y_train[ : 3]))
print('y test head: %s' % (y_test[ : 3]))

x train shape: (7859,); x test shape: (3369,)
x train head: [1, 2, 2, 8, 43, 10, 447, 5, 25, 207, 270, 5, 2, 111, 16, 369, 186, 90, 67, 7, 89, 5, 19, 102, 6, 19, 124, 15, 90, 67, 84, 22, 482, 26, 7, 48, 4, 49, 8, 864, 39, 209, 154, 6, 151, 6, 83, 11, 15, 22, 155, 11, 15, 7, 48, 9, 2, 1005, 504, 6, 258, 6, 272, 11, 15, 22, 134, 44, 11, 15, 16, 8, 197, 1245, 90, 67, 52, 29, 209, 30, 32, 132, 6, 109, 15, 17, 12]
x test head: [1, 275, 492, 26, 14, 492, 26, 384, 219, 93, 102, 146, 94, 321, 17, 12]
y train shape: (7859,); y test shape: (3369,)
y train head: [3 4 3]
y test head: [3 3 9]


In [5]:
print('vectorizing sequence data ...')
tokenizer = Tokenizer(num_words=max_words)
x_train = tokenizer.sequences_to_matrix(x_train, mode = 'binary')
x_test = tokenizer.sequences_to_matrix(x_test, mode = 'binary')
print('x train shape: %s; x test shape: %s' % (x_train.shape, x_test.shape))
print('x train head: %s' % (x_train[0, : ]))
print('x test head: %s' % (x_test[0, : ]))

vectorizing sequence data ...
x train shape: (7859, 2048); x test shape: (3369, 2048)
x train head: [ 0.  1.  1. ...,  0.  0.  0.]
x test head: [ 0.  1.  0. ...,  0.  0.  0.]


In [6]:
print('converting class vector to binary class matrix ...')
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)
print('y train shape: %s; y test shape: %s' % (y_train.shape, y_test.shape))
print('y train head: %s' % (y_train[0, : ]))
print('y test head: %s' % (y_test[0, : ]))

converting class vector to binary class matrix ...
y train shape: (7859, 46); y test shape: (3369, 46)
y train head: [ 0.  0.  0.  1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
y test head: [ 0.  0.  0.  1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]


In [7]:
print('making model ...')
model = Sequential()
model.add(Dense(512, input_shape = (max_words,)))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes))
model.add(Activation('softmax'))
model.compile(loss = 'categorical_crossentropy', optimizer = 'adam', metrics = ['accuracy'])

making model ...


In [8]:
print('training ...')
history = model.fit(x_train, y_train, batch_size = batch_size, epochs = epochs, verbose = 1, validation_split = 0.1)

training ...
Train on 7073 samples, validate on 786 samples
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8


In [9]:
print('evaluating ...')
score = model.evaluate(x_test, y_test, batch_size = batch_size, verbose = 1)
print('score:', score[0])
print('accuracy:', score[1])

evaluating ...
('accuracy:', 0.80617393878349108)
