# Reuters News Topic Classification on Google Colab

**Problem Statement:**  
Build and train a text-classification model to categorize Reuters news articles into topics using the built-in Reuters dataset—no external API keys needed.


In [None]:
# 1. Import Libraries
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Embedding, GlobalAveragePooling1D, Dense
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.datasets import reuters
import numpy as np


In [None]:
# 2. Load & Preprocess Data
vocab_size = 10000
maxlen = 200

(x_train, y_train), (x_test, y_test) = reuters.load_data(num_words=vocab_size)
x_train = pad_sequences(x_train, maxlen=maxlen, padding='post', truncating='post')
x_test  = pad_sequences(x_test,  maxlen=maxlen, padding='post', truncating='post')
num_classes = np.max(y_train) + 1


In [None]:
# 3. Define the Model
model = Sequential([
    Embedding(input_dim=vocab_size, output_dim=32, input_length=maxlen),
    GlobalAveragePooling1D(),
    Dense(64, activation='relu'),
    Dense(num_classes, activation='softmax')
])
model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)
model.summary()


In [None]:
# 4. Train the Model
history = model.fit(
    x_train, y_train,
    epochs=10,
    batch_size=512,
    validation_split=0.2
)


In [None]:
# 5. Evaluate on Test Set
test_loss, test_acc = model.evaluate(x_test, y_test)
print(f"Test Accuracy: {test_acc:.4f}")


In [None]:
# 6. Sample Predictions
sample_indices = np.random.choice(len(x_test), 5, replace=False)
word_index = reuters.get_word_index()
reverse_index = {v:k for k,v in word_index.items()}
for idx in sample_indices:
    decoded = ' '.join([reverse_index.get(i-3,'?') for i in x_test[idx] if i>3])
    pred = np.argmax(model.predict(x_test[idx:idx+1]))
    print(f"Article {idx}: Predicted topic {pred}, True topic {y_test[idx]}\nText excerpt: {decoded[:200]}...\n")
