In [1]:
# Reuters data classifications
import numpy as np 
np.random.seed(2)  # for reproducibility
from tensorflow.keras.datasets import reuters
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout , Activation
from tensorflow.keras import utils
from tensorflow.keras.preprocessing.text import Tokenizer

from sklearn import metrics

In [8]:
# defining parameters
max_words = 1000
batch_size = 32
epoch = 5

# Load dataset
print('loading dataset...')
(x_train , y_train) , (x_test , y_test) =  reuters.load_data(num_words= max_words , test_split= 0.2)

print(len(x_train) , 'Train sequence')
print(len(x_test), 'Test sequence')

loading dataset...
8982 Train sequence
2246 Test sequence


In [5]:
# see first data
x_train[0]

[1,
 2,
 2,
 8,
 43,
 10,
 447,
 5,
 25,
 207,
 270,
 5,
 2,
 111,
 16,
 369,
 186,
 90,
 67,
 7,
 89,
 5,
 19,
 102,
 6,
 19,
 124,
 15,
 90,
 67,
 84,
 22,
 482,
 26,
 7,
 48,
 4,
 49,
 8,
 864,
 39,
 209,
 154,
 6,
 151,
 6,
 83,
 11,
 15,
 22,
 155,
 11,
 15,
 7,
 48,
 9,
 2,
 2,
 504,
 6,
 258,
 6,
 272,
 11,
 15,
 22,
 134,
 44,
 11,
 15,
 16,
 8,
 197,
 2,
 90,
 67,
 52,
 29,
 209,
 30,
 32,
 132,
 6,
 109,
 15,
 17,
 12]

In [9]:
# Vectorize the data

print('vectorizing the data....')
tokenizer = Tokenizer(num_words= max_words)
x_train = tokenizer.sequences_to_matrix(x_train , mode='binary')
x_test = tokenizer.sequences_to_matrix(x_test , mode='binary')


vectorizing the data....


In [10]:
print('x_train shape' , x_train.shape)
print('x_test shape' , x_test.shape)

x_train shape (8982, 1000)
x_test shape (2246, 1000)


In [11]:
# Number pf classes
num_classes = np.max(y_train) +1
print(num_classes , 'Number of classes')

46 Number of classes


In [12]:
print('Converting class vector to binary class matrix (for use with categorical_crossentropy)')

y_train = utils.to_categorical(y_train , num_classes= num_classes)
y_test = utils.to_categorical(y_test , num_classes= num_classes)

Converting class vector to binary class matrix (for use with categorical_crossentropy)


In [13]:
print('Y train shape :', y_train.shape)
print('Y test shape :' , y_test.shape)

Y train shape : (8982, 46)
Y test shape : (2246, 46)


# Build model

In [14]:
print('Building model...')

model = Sequential()
model.add(Dense(512 , input_shape=(max_words,)))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes))
model.add(Activation('softmax'))

Building model...


In [15]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 512)               512512    
_________________________________________________________________
activation (Activation)      (None, 512)               0         
_________________________________________________________________
dropout (Dropout)            (None, 512)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 46)                23598     
_________________________________________________________________
activation_1 (Activation)    (None, 46)                0         
Total params: 536,110
Trainable params: 536,110
Non-trainable params: 0
_________________________________________________________________


In [16]:
# Compiling the model

model.compile(
    loss = 'categorical_crossentropy',
    optimizer= 'adam',
    metrics=['accuracy']
)

history  = model.fit( x_train , y_train , epochs=epoch , batch_size= batch_size , validation_split=0.1)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [18]:
# Evaluating the model
score = model.evaluate(x_test , y_test , verbose= 2 , batch_size = batch_size)

print(' Test score : ' ,score)

71/71 - 0s - loss: 0.8634 - accuracy: 0.7956
 Test score :  [0.8634244799613953, 0.7956367135047913]


In [33]:
y_pred = model.predict_classes(x_test)
metrics.accuracy_score(np.argmax(y_test , axis=-1) , y_pred)

0.7956366874443455

In [34]:
cm = metrics.confusion_matrix(np.argmax(y_test , axis=-1) , y_pred)

In [35]:
cm[0 , :]

array([8, 0, 0, 0, 1, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0], dtype=int64)

In [36]:
cr = metrics.classification_report(np.argmax(y_test , axis=-1) , y_pred)

In [None]:
print(cr)