In [21]:
from tensorflow.keras.datasets import reuters
(train_x, train_y,), (test_x, test_y) = reuters.load_data(num_words=10000)

In [22]:
train_x

array([list([1, 2, 2, 8, 43, 10, 447, 5, 25, 207, 270, 5, 3095, 111, 16, 369, 186, 90, 67, 7, 89, 5, 19, 102, 6, 19, 124, 15, 90, 67, 84, 22, 482, 26, 7, 48, 4, 49, 8, 864, 39, 209, 154, 6, 151, 6, 83, 11, 15, 22, 155, 11, 15, 7, 48, 9, 4579, 1005, 504, 6, 258, 6, 272, 11, 15, 22, 134, 44, 11, 15, 16, 8, 197, 1245, 90, 67, 52, 29, 209, 30, 32, 132, 6, 109, 15, 17, 12]),
       list([1, 3267, 699, 3434, 2295, 56, 2, 7511, 9, 56, 3906, 1073, 81, 5, 1198, 57, 366, 737, 132, 20, 4093, 7, 2, 49, 2295, 2, 1037, 3267, 699, 3434, 8, 7, 10, 241, 16, 855, 129, 231, 783, 5, 4, 587, 2295, 2, 2, 775, 7, 48, 34, 191, 44, 35, 1795, 505, 17, 12]),
       list([1, 53, 12, 284, 15, 14, 272, 26, 53, 959, 32, 818, 15, 14, 272, 26, 39, 684, 70, 11, 14, 12, 3886, 18, 180, 183, 187, 70, 11, 14, 102, 32, 11, 29, 53, 44, 704, 15, 14, 19, 758, 15, 53, 959, 47, 1013, 15, 14, 19, 132, 15, 39, 965, 32, 11, 14, 147, 72, 11, 180, 183, 187, 44, 11, 14, 102, 19, 11, 123, 186, 90, 67, 960, 4, 78, 13, 68, 467, 511, 110,

In [23]:
# encode the data using one-hot encoding before feeding it to the network because the network expects contigous batches of data.
# we will convert the list of integers into a tensor of shape (samples, word_indices) where each sample is a one-hot encoded tensor of shape (word_indices, 8000) 
import numpy as np
def vectorize_sequences(sequences, dimension=10000):  
    results = np.zeros((len(sequences), dimension)) # Create an all-zero matrix of shape (len(sequences), dimension), each row is a one-hot encoded vector of shape (dimension,) where all entries are 0 except for the indices that are present in the sequence
    for i, sequence in enumerate(sequences):
        for j in sequence:
            results[i, j] = 1.               # set the index that is present in the sequence to 1, all other indices remain 0
    return results
train_x = vectorize_sequences(train_x)
test_x = vectorize_sequences(test_x)

In [24]:
train_x

array([[0., 1., 1., ..., 0., 0., 0.],
       [0., 1., 1., ..., 0., 0., 0.],
       [0., 1., 1., ..., 0., 0., 0.],
       ...,
       [0., 1., 1., ..., 0., 0., 0.],
       [0., 1., 1., ..., 0., 0., 0.],
       [0., 1., 1., ..., 0., 0., 0.]])

In [25]:
#encode the labels using one-hot encoding
from tensorflow.keras.utils import to_categorical
train_y = to_categorical(train_y)
test_y = to_categorical(test_y)

In [26]:
train_y

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)

In [27]:
from tensorflow import keras
model = keras.Sequential([
    layers.Dense(64, activation='relu', input_shape=(10000,)),
    layers.Dense(64, activation='relu'),
    layers.Dense(46, activation='softmax')
])

In [28]:
model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])

# set aside a validation set
val_x = train_x[:1000]
partial_train_x = train_x[1000:]
val_y = train_y[:1000]
partial_train_y = train_y[1000:]


In [29]:
history = model.fit(partial_train_x, partial_train_y, epochs=9, batch_size=512, validation_data=(val_x, val_y))

Epoch 1/9
Epoch 2/9
Epoch 3/9
Epoch 4/9
Epoch 5/9
Epoch 6/9
Epoch 7/9
Epoch 8/9
Epoch 9/9


In [30]:
model.evaluate(test_x, test_y)



[0.9570677876472473, 0.7831701040267944]