<a href="https://colab.research.google.com/github/appliedcode/mthree-c422/blob/mthree-c422-Likhitha/Reuters_classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Embedding, GlobalAveragePooling1D, Dense
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.datasets import reuters
import numpy as np

In [None]:
vocab_size = 10000
maxlen = 200

(x_train, y_train), (x_test, y_test) = reuters.load_data(num_words=vocab_size)
x_train = pad_sequences(x_train, maxlen=maxlen, padding='post', truncating='post')
x_test  = pad_sequences(x_test,  maxlen=maxlen, padding='post', truncating='post')
num_classes = np.max(y_train) + 1

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/reuters.npz
[1m2110848/2110848[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [None]:
model = Sequential([
    Embedding(input_dim=vocab_size, output_dim=32, input_length=maxlen),
    GlobalAveragePooling1D(),
    Dense(64, activation='relu'),
    Dense(num_classes, activation='softmax')
])
model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)
model.summary()




In [None]:
history = model.fit(
    x_train, y_train,
    epochs=10,
    batch_size=512,
    validation_split=0.2
)


Epoch 1/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 47ms/step - accuracy: 0.2896 - loss: 3.4549 - val_accuracy: 0.3612 - val_loss: 2.5944
Epoch 2/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step - accuracy: 0.3715 - loss: 2.5699 - val_accuracy: 0.3662 - val_loss: 2.4597
Epoch 3/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 36ms/step - accuracy: 0.3710 - loss: 2.4670 - val_accuracy: 0.3712 - val_loss: 2.3620
Epoch 4/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step - accuracy: 0.3726 - loss: 2.3620 - val_accuracy: 0.3767 - val_loss: 2.2752
Epoch 5/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 32ms/step - accuracy: 0.3910 - loss: 2.2552 - val_accuracy: 0.4068 - val_loss: 2.1993
Epoch 6/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 30ms/step - accuracy: 0.4155 - loss: 2.1607 - val_accuracy: 0.4380 - val_loss: 2.1464
Epoch 7/10
[1m15/15[0m [32m━━━━

In [None]:
test_loss, test_acc = model.evaluate(x_test, y_test)
print(f"Test Accuracy: {test_acc:.4f}")

[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.5038 - loss: 1.9788
Test Accuracy: 0.4973


In [None]:
sample_indices = np.random.choice(len(x_test), 5, replace=False)
word_index = reuters.get_word_index()
reverse_index = {v:k for k,v in word_index.items()}
for idx in sample_indices:
    decoded = ' '.join([reverse_index.get(i-3,'?') for i in x_test[idx] if i>3])
    pred = np.argmax(model.predict(x_test[idx:idx+1]))
    print(f"Article {idx}: Predicted topic {pred}, True topic {y_test[idx]}\nText excerpt: {decoded[:200]}...\n")


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/reuters_word_index.json
[1m550378/550378[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 95ms/step
Article 228: Predicted topic 4, True topic 20
Text excerpt: pegasus gold inc said it cut its long term debt to 85 mln dlrs from 100 mln dlrs when its european voluntarily converted some of their debt holdings into 1 5 mln shares of pegasus common the company s...

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
Article 2032: Predicted topic 4, True topic 13
Text excerpt: new zealand ports reopened at hrs local time gmt march 22 after being closed since march 19 because of a strike over pay claims by a federation spokesman said but industrial action by other port worke...

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
Article 1856: Predicted topic 3, True topic 3
Text excerpt: shr 88