# INTRODUCTION
A GRU is a type of Recurrent Neural Network (RNN) that helps remember long-term dependencies — but with a simpler design than an LSTM.

It uses gates (like switches) to decide:
	•	What information to keep
	•	What to forget
	•	What new information to add

In [2]:
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, GRU, Dense

# 1️⃣ Load the IMDB dataset
max_features = 10000   # number of unique words to use
maxlen = 100           # cut texts after this many words

(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=max_features)

# 2️⃣ Pad sequences so all reviews have the same length
X_train = sequence.pad_sequences(X_train, maxlen=maxlen)
X_test = sequence.pad_sequences(X_test, maxlen=maxlen)

# 3️⃣ Build a simple GRU model
model = Sequential([
    Embedding(max_features, 128, input_length=maxlen),  # word embeddings
    GRU(64, return_sequences=False),                    # GRU layer
    Dense(1, activation='sigmoid')                      # output layer
])

# 4️⃣ Compile the model
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

# 5️⃣ Train the model
model.fit(X_train, y_train,
          epochs=3,
          batch_size=64,
          validation_data=(X_test, y_test))

# 6️⃣ Evaluate performance
loss, acc = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {acc:.3f}")

Epoch 1/3




[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 22ms/step - accuracy: 0.7792 - loss: 0.4457 - val_accuracy: 0.8432 - val_loss: 0.3595
Epoch 2/3
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 22ms/step - accuracy: 0.8924 - loss: 0.2692 - val_accuracy: 0.8548 - val_loss: 0.3343
Epoch 3/3
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 22ms/step - accuracy: 0.9318 - loss: 0.1834 - val_accuracy: 0.8505 - val_loss: 0.3699
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.8505 - loss: 0.3699
Test Accuracy: 0.851


In [4]:
model.summary()

>GRU is like a lighter, faster version of LSTM — it captures temporal patterns efficiently with fewer parameters.
>LSTM, however, offers finer control over memory and is preferred when handling very long dependencies in large datasets.