In [1]:
# Task 3
#-----------------------------------------
# Step 0: Import Required Libraries
# -----------------------------------------
import numpy as np
from tensorflow.keras.datasets import reuters
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense

# -----------------------------------------
# Step 1: Load the Reuters Dataset
# -----------------------------------------
# The Reuters dataset contains short news articles classified into 46 categories.
# Limit the vocabulary to the 10,000 most common words for simplicity.
max_words = 10000  # Vocabulary size
maxlen = 200       # Sequence length after padding (truncate/extend to 200 words)

print("Loading Reuters dataset...")
(x_train, y_train), (x_test, y_test) = reuters.load_data(num_words=max_words)

print(f"Training samples: {len(x_train)}")
print(f"Test samples: {len(x_test)}")
print(f"Number of categories: {np.max(y_train) + 1}")  # should be 46

# -----------------------------------------
# Step 2: Pad Sequences to the Same Length
# -----------------------------------------
# LSTM models require fixed-length input.
# pad_sequences will ensure each news article has exactly 200 tokens.
x_train = pad_sequences(x_train, maxlen=maxlen)
x_test = pad_sequences(x_test, maxlen=maxlen)

# -----------------------------------------
# Step 3: One-Hot Encode the Labels
# -----------------------------------------
# Labels are integers (0–45). We need to convert them to one-hot vectors
# for multi-class classification with 'categorical_crossentropy' loss.
num_classes = np.max(y_train) + 1  # 46 classes
y_train = to_categorical(y_train, num_classes)
y_test = to_categorical(y_test, num_classes)

# -----------------------------------------
# Step 4: Build the LSTM Model
# -----------------------------------------
model = Sequential()

# Layer 1: Embedding
# - Turns word indices into dense vectors of length 128
# - Input dimension: vocabulary size (max_words)
# - Output dimension: embedding size (128)
# - Input length: length of each padded sequence (maxlen)
model.add(Embedding(input_dim=max_words, output_dim=128, input_length=maxlen))

# Layer 2: LSTM Layer
# - 64 units to capture sequential dependencies in text
model.add(LSTM(64))

# Layer 3: Output Layer
# - Dense layer with softmax activation to output probabilities for each of 46 classes
model.add(Dense(num_classes, activation='softmax'))

# -----------------------------------------
# Step 5: Compile the Model
# -----------------------------------------
# Loss: categorical_crossentropy (multi-class classification)
# Optimizer: Adam (efficient and adaptive)
# Metric: accuracy (how often predictions match labels)
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

# Print the model architecture
print(model.summary())

# -----------------------------------------
# Step 6: Train the Model
# -----------------------------------------
# - epochs: number of times the model sees the entire dataset
# - batch_size: number of samples per gradient update
# - validation_split: percentage of training data used for validation
history = model.fit(x_train, y_train,
                    epochs=5,
                    batch_size=64,
                    validation_split=0.2,
                    verbose=1)

# -----------------------------------------
# Step 7: Evaluate the Model on Test Data
# -----------------------------------------
# This gives us a final accuracy score on unseen data
test_loss, test_acc = model.evaluate(x_test, y_test, verbose=0)
print(f"Test Accuracy: {test_acc:.4f}")


Loading Reuters dataset...
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/reuters.npz
[1m2110848/2110848[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Training samples: 8982
Test samples: 2246
Number of categories: 46




None
Epoch 1/5
[1m113/113[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 254ms/step - accuracy: 0.3542 - loss: 2.8876 - val_accuracy: 0.4780 - val_loss: 2.0819
Epoch 2/5
[1m113/113[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 238ms/step - accuracy: 0.5068 - loss: 2.0022 - val_accuracy: 0.5426 - val_loss: 1.7936
Epoch 3/5
[1m113/113[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 259ms/step - accuracy: 0.5684 - loss: 1.7102 - val_accuracy: 0.5899 - val_loss: 1.6280
Epoch 4/5
[1m113/113[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 249ms/step - accuracy: 0.6064 - loss: 1.5193 - val_accuracy: 0.6127 - val_loss: 1.5335
Epoch 5/5
[1m113/113[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 246ms/step - accuracy: 0.6354 - loss: 1.4261 - val_accuracy: 0.6422 - val_loss: 1.4792
Test Accuracy: 0.6233
