In [None]:
# Text Classification with LSTM and GRU Networks
# A Complete Tutorial with Real-World Examples

## Overview
# This notebook demonstrates text classification using LSTM and GRU networks
# We'll use the IMDB movie reviews dataset for sentiment analysis
# You'll learn to build, train, and compare both architectures

# Required installations (run in terminal):
# pip install tensorflow numpy pandas matplotlib seaborn scikit-learn wordcloud

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, GRU, Dense, Dropout, Bidirectional
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
import warnings
warnings.filterwarnings('ignore')

print("TensorFlow version:", tf.__version__)

# Set random seeds for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

In [None]:
## 1. Data Loading and Exploration

# Load IMDB dataset - 50,000 movie reviews (25k train, 25k test)
# We'll use top 10,000 most frequent words
vocab_size = 10000
max_length = 500  # Maximum sequence length

print("Loading IMDB dataset...")
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=vocab_size)

print(f"Training samples: {len(X_train)}")
print(f"Testing samples: {len(X_test)}")
print(f"Vocabulary size: {vocab_size}")

# Examine data structure
print(f"\nFirst review (encoded): {X_train[0][:20]}...")
print(f"First review label: {y_train[0]} (0=negative, 1=positive)")
print(f"Review lengths - Min: {min(len(x) for x in X_train)}, Max: {max(len(x) for x in X_train)}")

# Get word index to decode reviews
word_index = imdb.get_word_index()
reverse_word_index = {value: key for key, value in word_index.items()}

def decode_review(encoded_review):
    return ' '.join([reverse_word_index.get(i - 3, '?') for i in encoded_review])

print(f"\nDecoded first review: {decode_review(X_train[0])[:200]}...")

In [None]:
## 2. Data Preprocessing

# Pad sequences to ensure uniform length
print("Padding sequences...")
X_train_padded = pad_sequences(X_train, maxlen=max_length, padding='post')
X_test_padded = pad_sequences(X_test, maxlen=max_length, padding='post')

print(f"Training data shape: {X_train_padded.shape}")
print(f"Testing data shape: {X_test_padded.shape}")

# Visualize sequence lengths
plt.figure(figsize=(12, 4))

plt.subplot(1, 2, 1)
lengths = [len(x) for x in X_train]
plt.hist(lengths, bins=50, alpha=0.7)
plt.axvline(max_length, color='red', linestyle='--', label=f'Max length: {max_length}')
plt.xlabel('Review Length')
plt.ylabel('Frequency')
plt.title('Distribution of Review Lengths')
plt.legend()

plt.subplot(1, 2, 2)
plt.hist(y_train, bins=2, alpha=0.7)
plt.xlabel('Sentiment')
plt.ylabel('Frequency')
plt.title('Class Distribution')
plt.xticks([0, 1], ['Negative', 'Positive'])

plt.tight_layout()
plt.show()

In [None]:
## 3. Model Architecture Comparison

# Define function to create LSTM model
def create_lstm_model(vocab_size, embedding_dim=128, lstm_units=64, dropout_rate=0.5):
    model = Sequential([
        Embedding(vocab_size, embedding_dim, input_length=max_length),
        LSTM(lstm_units, dropout=dropout_rate, recurrent_dropout=dropout_rate),
        Dense(32, activation='relu'),
        Dropout(dropout_rate),
        Dense(1, activation='sigmoid')
    ])
    return model

# Define function to create GRU model
def create_gru_model(vocab_size, embedding_dim=128, gru_units=64, dropout_rate=0.5):
    model = Sequential([
        Embedding(vocab_size, embedding_dim, input_length=max_length),
        GRU(gru_units, dropout=dropout_rate, recurrent_dropout=dropout_rate),
        Dense(32, activation='relu'),
        Dropout(dropout_rate),
        Dense(1, activation='sigmoid')
    ])
    return model

# Define function to create Bidirectional LSTM model
def create_bilstm_model(vocab_size, embedding_dim=128, lstm_units=64, dropout_rate=0.5):
    model = Sequential([
        Embedding(vocab_size, embedding_dim, input_length=max_length),
        Bidirectional(LSTM(lstm_units, dropout=dropout_rate, recurrent_dropout=dropout_rate)),
        Dense(32, activation='relu'),
        Dropout(dropout_rate),
        Dense(1, activation='sigmoid')
    ])
    return model


In [None]:
# Create validation split
X_train_split, X_val_split, y_train_split, y_val_split = train_test_split(
    X_train_padded, y_train, test_size=0.2, random_state=42, stratify=y_train
)

print(f"Training set: {X_train_split.shape[0]} samples")
print(f"Validation set: {X_val_split.shape[0]} samples")

# Define callbacks
early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=3,
    restore_best_weights=True,
    verbose=1
)

reduce_lr = ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.5,
    patience=2,
    min_lr=1e-6,
    verbose=1
)

callbacks = [early_stopping, reduce_lr]

In [None]:
## 5. Train and Compare Models

models = {}
histories = {}

# LSTM Model
print("\n" + "="*50)
print("Training LSTM Model")
print("="*50)

lstm_model = create_lstm_model(vocab_size)
lstm_model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

print(lstm_model.summary())

history_lstm = lstm_model.fit(
    X_train_split, y_train_split,
    batch_size=128,
    epochs=10,
    validation_data=(X_val_split, y_val_split),
    callbacks=callbacks,
    verbose=1
)

models['LSTM'] = lstm_model
histories['LSTM'] = history_lstm

# GRU Model
print("\n" + "="*50)
print("Training GRU Model")
print("="*50)

gru_model = create_gru_model(vocab_size)
gru_model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

print(gru_model.summary())

history_gru = gru_model.fit(
    X_train_split, y_train_split,
    batch_size=128,
    epochs=10,
    validation_data=(X_val_split, y_val_split),
    callbacks=callbacks,
    verbose=1
)

models['GRU'] = gru_model
histories['GRU'] = history_gru

# Bidirectional LSTM Model
print("\n" + "="*50)
print("Training Bidirectional LSTM Model")
print("="*50)

bilstm_model = create_bilstm_model(vocab_size)
bilstm_model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

print(bilstm_model.summary())

history_bilstm = bilstm_model.fit(
    X_train_split, y_train_split,
    batch_size=128,
    epochs=10,
    validation_data=(X_val_split, y_val_split),
    callbacks=callbacks,
    verbose=1
)

models['BiLSTM'] = bilstm_model
histories['BiLSTM'] = history_bilstm

In [None]:
## 6. Visualization of Training Progress

def plot_training_history(histories):
    fig, axes = plt.subplots(2, 2, figsize=(15, 10))
    
    # Training Loss
    axes[0, 0].set_title('Training Loss')
    for name, history in histories.items():
        axes[0, 0].plot(history.history['loss'], label=f'{name}')
    axes[0, 0].set_xlabel('Epoch')
    axes[0, 0].set_ylabel('Loss')
    axes[0, 0].legend()
    axes[0, 0].grid(True)
    
    # Validation Loss
    axes[0, 1].set_title('Validation Loss')
    for name, history in histories.items():
        axes[0, 1].plot(history.history['val_loss'], label=f'{name}')
    axes[0, 1].set_xlabel('Epoch')
    axes[0, 1].set_ylabel('Loss')
    axes[0, 1].legend()
    axes[0, 1].grid(True)
    
    # Training Accuracy
    axes[1, 0].set_title('Training Accuracy')
    for name, history in histories.items():
        axes[1, 0].plot(history.history['accuracy'], label=f'{name}')
    axes[1, 0].set_xlabel('Epoch')
    axes[1, 0].set_ylabel('Accuracy')
    axes[1, 0].legend()
    axes[1, 0].grid(True)
    
    # Validation Accuracy
    axes[1, 1].set_title('Validation Accuracy')
    for name, history in histories.items():
        axes[1, 1].plot(history.history['val_accuracy'], label=f'{name}')
    axes[1, 1].set_xlabel('Epoch')
    axes[1, 1].set_ylabel('Accuracy')
    axes[1, 1].legend()
    axes[1, 1].grid(True)
    
    plt.tight_layout()
    plt.show()

plot_training_history(histories)

In [None]:
## 7. Model Evaluation

def evaluate_model(model, X_test, y_test, model_name):
    print(f"\n{model_name} Evaluation:")
    print("-" * 30)
    
    # Predictions
    y_pred_prob = model.predict(X_test, verbose=0)
    y_pred = (y_pred_prob > 0.5).astype(int).flatten()
    
    # Test accuracy
    test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=0)
    print(f"Test Accuracy: {test_accuracy:.4f}")
    print(f"Test Loss: {test_loss:.4f}")
    
    # Classification report
    print("\nClassification Report:")
    print(classification_report(y_test, y_pred, target_names=['Negative', 'Positive']))
    
    # Confusion matrix
    cm = confusion_matrix(y_test, y_pred)
    
    plt.figure(figsize=(6, 4))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
                xticklabels=['Negative', 'Positive'],
                yticklabels=['Negative', 'Positive'])
    plt.title(f'{model_name} - Confusion Matrix')
    plt.xlabel('Predicted')
    plt.ylabel('Actual')
    plt.show()
    
    return test_accuracy

# Evaluate all models
results = {}
for name, model in models.items():
    accuracy = evaluate_model(model, X_test_padded, y_test, name)
    results[name] = accuracy

# Summary comparison
print("\n" + "="*50)
print("MODEL COMPARISON SUMMARY")
print("="*50)
for name, accuracy in results.items():
    print(f"{name}: {accuracy:.4f}")

In [None]:
## 8. Real-World Application Example

def predict_sentiment(text, model, word_index, max_length=500):
    """
    Predict sentiment of a custom text input
    """
    # Convert text to sequence
    words = text.lower().split()
    sequence = []
    
    for word in words:
        if word in word_index and word_index[word] < vocab_size:
            sequence.append(word_index[word])
    
    # Pad sequence
    sequence = pad_sequences([sequence], maxlen=max_length, padding='post')
    
    # Predict
    prediction = model.predict(sequence, verbose=0)[0][0]
    
    sentiment = "Positive" if prediction > 0.5 else "Negative"
    confidence = prediction if prediction > 0.5 else 1 - prediction
    
    return sentiment, confidence

# Test with custom reviews
test_reviews = [
    "This movie was absolutely fantastic! The acting was superb and the plot was engaging.",
    "Terrible movie. Waste of time. Poor acting and boring storyline.",
    "The movie was okay, nothing special but not bad either.",
    "Amazing cinematography and outstanding performances by all actors. Highly recommended!",
    "Worst movie I've ever seen. Couldn't even finish watching it."
]

print("\n" + "="*50)
print("CUSTOM REVIEW PREDICTIONS")
print("="*50)

# Use the best performing model (typically BiLSTM)
best_model_name = max(results, key=results.get)
best_model = models[best_model_name]

print(f"Using {best_model_name} model (accuracy: {results[best_model_name]:.4f})")
print("-" * 50)

for i, review in enumerate(test_reviews, 1):
    sentiment, confidence = predict_sentiment(review, best_model, word_index)
    print(f"Review {i}: {review}")
    print(f"Prediction: {sentiment} (confidence: {confidence:.4f})")
    print("-" * 50)

In [None]:
## 9. Key Takeaways and Next Steps

print("\n" + "="*50)
print("KEY TAKEAWAYS")
print("="*50)

print("""
1. LSTM vs GRU Performance:
   - Both models show similar performance on this task
   - GRU is slightly faster to train due to fewer parameters
   - LSTM has separate forget and input gates, potentially better for complex sequences

2. Bidirectional Models:
   - BiLSTM often performs better as it processes sequences in both directions
   - Captures context from both past and future words

3. Model Architecture Insights:
   - Embedding layer converts words to dense vectors
   - Dropout prevents overfitting
   - Dense layers add classification capacity

4. Performance Factors:
   - Sequence length impacts performance
   - Vocabulary size affects model complexity
   - Early stopping prevents overfitting

NEXT STEPS FOR IMPROVEMENT:
- Try pre-trained embeddings (Word2Vec, GloVe)
- Experiment with attention mechanisms
- Use larger models or ensemble methods
- Try different preprocessing techniques
- Implement cross-validation for robust evaluation
""")


In [None]:
## 10. Production-Ready Prediction Function

class SentimentClassifier:
    def __init__(self, model, word_index, vocab_size=10000, max_length=500):
        self.model = model
        self.word_index = word_index
        self.vocab_size = vocab_size
        self.max_length = max_length
    
    def preprocess_text(self, text):
        """Preprocess text for prediction"""
        words = text.lower().split()
        sequence = []
        
        for word in words:
            if word in self.word_index and self.word_index[word] < self.vocab_size:
                sequence.append(self.word_index[word])
        
        return pad_sequences([sequence], maxlen=self.max_length, padding='post')
    
    def predict(self, text):
        """Predict sentiment with confidence score"""
        processed_text = self.preprocess_text(text)
        prediction = self.model.predict(processed_text, verbose=0)[0][0]
        
        sentiment = "Positive" if prediction > 0.5 else "Negative"
        confidence = prediction if prediction > 0.5 else 1 - prediction
        
        return {
            'sentiment': sentiment,
            'confidence': float(confidence),
            'raw_score': float(prediction)
        }
    
    def batch_predict(self, texts):
        """Predict sentiment for multiple texts"""
        results = []
        for text in texts:
            results.append(self.predict(text))
        return results

# Create production classifier instance
classifier = SentimentClassifier(best_model, word_index)

# Example usage
sample_text = "This movie exceeded all my expectations. Brilliant storytelling!"
result = classifier.predict(sample_text)
print(f"\nProduction Classifier Result: {result}")

print("\n" + "="*50)
print("TUTORIAL COMPLETE!")
print("="*50)
print("You now have a complete text classification system using LSTM/GRU networks!")
print("The SentimentClassifier class is ready for production use.")