In [5]:
# Loading Dependencies

from tensorflow import keras

In [16]:
# Loading Reuters Dataset
from tensorflow.keras.datasets import reuters
(train_data, train_labels),(test_data, test_labels) = reuters.load_data(num_words = 10000)

In [17]:
len(train_data)


8982

In [18]:
len(test_data)

2246

In [21]:
train_data[10]

[1,
 245,
 273,
 207,
 156,
 53,
 74,
 160,
 26,
 14,
 46,
 296,
 26,
 39,
 74,
 2979,
 3554,
 14,
 46,
 4689,
 4329,
 86,
 61,
 3499,
 4795,
 14,
 61,
 451,
 4329,
 17,
 12]

In [22]:
max([max(sequence) for sequence in train_data])

9999

In [26]:
# Decoding reviews back to English
word_index = keras.datasets.reuters.get_word_index()
reverse_word_index = dict([(value,key) for (key,value) in word_index.items()])
decode_reviews = ' '.join([reverse_word_index.get(i - 3,',') for i in train_data[1]])
print(decode_reviews)
train_labels[1]

, generale de banque sa lt , br and lt heller overseas corp of chicago have each taken 50 pct stakes in , company sa , factors generale de banque said in a statement it gave no financial details of the transaction sa , , turnover in 1986 was 17 5 billion belgian francs reuter 3


4

In [28]:
# Preparing the data by Encoding it.

import numpy as np
# Vectorizing training and test data
def vectorize_sequence(sequences, dimension = 10000):
    results = np.zeros((len(sequences),dimension))
    for i , sequence in enumerate(sequences):
        results[i,sequence] = 1.
        return results
x_train = vectorize_sequence(train_data)
x_test  = vectorize_sequence(test_data)

In [29]:
x_train    #Vectorised training data

array([[0., 1., 1., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [30]:
x_test   #Vectorised testing data

array([[0., 1., 1., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [31]:
# Vectorizing training and test labels
def to_one_hot(labels,dimention = 46):
    results = np.zeros((len(labels),dimention))
    for i , label in enumerate(labels):
        results[i,label] = 1.
    return results
one_hot_train_labels = to_one_hot(train_labels)
one_hot_test_labels  = to_one_hot(test_labels)

In [32]:
one_hot_train_labels

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [33]:
one_hot_test_labels

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 1., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [None]:
# Vectorizing training and test labels using Keras( Alternate way)
import numpy as np
from tensorflow.keras.utils.np_utils import to_catedgorical

one_hot_train_labels = to_catedgorical(train_data)
one_hot_test_labels  = to_catedgorical(test_data)

# ModuleNotFoundError: No module named 'tensorflow.keras.utils.np_utils'

In [39]:
# Model Definition

from tensorflow.keras import layers
from tensorflow.keras import models

model = models.Sequential()
model.add(layers.Dense(64, activation = 'relu', input_shape = (10000,)))
model.add(layers.Dense(64, activation = 'relu'))
model.add(layers.Dense(46,  activation = 'softmax'))

In [40]:
# Model Compilation

model.compile(
            optimizer = 'rmsprop',
            loss      = 'categorical_crossentropy',
            metrics   = ['accuracy'])

In [41]:
# Validation approach by setting apart 1000 samples in the training data as validation set.
x_val = x_train[:10000]
partial_x_train = x_train[10000:]

y_val = one_hot_train_labels[:10000]
partial_y_train = one_hot_train_labels[10000:]


In [None]:
# Training the Model

history = model.fit(partial_x_train,
                    partial_y_train,
                    epochs = 20,
                    batch_size= 512,
                    validation_data=(x_val,y_val))