In [None]:
import numpy as np
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing import sequence

In [3]:
# 1. Load IMDB dataset
max_features = 10000   # Vocabulary size
max_len = 200          # Cut reviews after 200 words

In [4]:
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=max_features)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 0us/step


In [5]:
# Pad sequences to same length
X_train = sequence.pad_sequences(X_train, maxlen=max_len)
X_test = sequence.pad_sequences(X_test, maxlen=max_len)

In [9]:
print("Training shape:", X_train.shape, y_train.shape)
print("Vocabulary size:", max_features)

Training shape: (25000, 200) (25000,)
Vocabulary size: 10000


In [12]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, LSTM, Dense

In [13]:
# One to One RNN
model_one_to_one = Sequential([
    Embedding(max_features, 32, input_length=1),
    SimpleRNN(16, activation='tanh'),
    Dense(1, activation='sigmoid')
])



In [14]:
model_one_to_one.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [15]:
# Use only first word of each review
X_train_1 = X_train[:, :1]
X_test_1 = X_test[:, :1]

model_one_to_one.fit(X_train_1, y_train, epochs=2, batch_size=64, validation_split=0.2)

Epoch 1/2
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.4965 - loss: 0.6933 - val_accuracy: 0.5102 - val_loss: 0.6926
Epoch 2/2
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.5506 - loss: 0.6851 - val_accuracy: 0.5142 - val_loss: 0.6961


<keras.src.callbacks.history.History at 0x1d368d0b190>

In [17]:
# One to Many RNN
from tensorflow.keras.utils import to_categorical

In [33]:
seq_len = 5
X_seq, y_seq = [], []

# Build input/output pairs: input = first 5 words, output = next 5 words
for review in X_train[:5000]:
    if len(review) > 2*seq_len:
        X_seq.append(review[:seq_len])            # first 5 words
        y_seq.append(review[seq_len:2*seq_len])   # next 5 words

In [34]:
X_seq = np.array(X_seq)
y_seq = np.array(y_seq)

In [35]:
# One-hot encode y
y_seq_cat = np.array([to_categorical(seq, num_classes=max_features) for seq in y_seq])

print("X shape:", X_seq.shape)       # (samples, 5)
print("y shape:", y_seq_cat.shape)   # (samples, 5, vocab_size)

X shape: (5000, 5)
y shape: (5000, 5, 10000)


In [36]:
# Model: input sequence of 5 words → output sequence of 5 words
model_one_to_many = Sequential([
    Embedding(max_features, 32, input_length=seq_len),
    SimpleRNN(64, return_sequences=True),
    Dense(max_features, activation='softmax')
])

In [37]:
model_one_to_many.compile(optimizer='adam', loss='categorical_crossentropy')
model_one_to_many.summary()


In [38]:
# Fit model
model_one_to_many.fit(X_seq, y_seq_cat, epochs=2, batch_size=64)

Epoch 1/2
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 38ms/step - loss: 6.4315
Epoch 2/2
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 39ms/step - loss: 3.6136


<keras.src.callbacks.history.History at 0x1d35bf4b7d0>

In [39]:
# Many to One RNN
model_many_to_one = Sequential([
    Embedding(max_features, 64, input_length=max_len),
    LSTM(128),
    Dense(1, activation='sigmoid')
])

In [40]:
model_many_to_one.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model_many_to_one.fit(X_train, y_train, epochs=3, batch_size=64, validation_split=0.2)

Epoch 1/3
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 106ms/step - accuracy: 0.7193 - loss: 0.5276 - val_accuracy: 0.8144 - val_loss: 0.4112
Epoch 2/3
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 103ms/step - accuracy: 0.8752 - loss: 0.3100 - val_accuracy: 0.8568 - val_loss: 0.3272
Epoch 3/3
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 103ms/step - accuracy: 0.9138 - loss: 0.2263 - val_accuracy: 0.8316 - val_loss: 0.3792


<keras.src.callbacks.history.History at 0x1d35e1b04d0>

In [41]:
loss, acc = model_many_to_one.evaluate(X_test, y_test)
print("Test Accuracy:", acc)

[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 25ms/step - accuracy: 0.8310 - loss: 0.3795
Test Accuracy: 0.8309999704360962


In [46]:
seq_len = 5
X_seq2, y_seq2 = [], []

# Build input/output pairs: sliding window
for review in X_train[:2000]:   # use subset for speed/memory
    for i in range(len(review) - seq_len):
        X_seq2.append(review[i:i+seq_len])      # input sequence
        y_seq2.append(review[i+1:i+seq_len+1])  # shifted target

In [47]:
X_seq2 = np.array(X_seq2)
y_seq2 = np.array(y_seq2)

In [48]:
print("X shape:", X_seq2.shape)
print("y shape:", y_seq2.shape)

X shape: (390000, 5)
y shape: (390000, 5)


In [49]:
# Model: sequence input -> sequence output
model_many_to_many = Sequential([
    Embedding(max_features, 64, input_length=seq_len),
    LSTM(128, return_sequences=True),
    Dense(max_features, activation='softmax')
])

In [50]:
# NOTE: use sparse_categorical_crossentropy
model_many_to_many.compile(optimizer='adam', loss='sparse_categorical_crossentropy')
model_many_to_many.summary()

In [51]:
# Fit (labels are integers, not one-hot!)
model_many_to_many.fit(X_seq2, y_seq2, epochs=2, batch_size=128)

Epoch 1/2
[1m3047/3047[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m383s[0m 125ms/step - loss: 4.9746
Epoch 2/2
[1m3047/3047[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m401s[0m 132ms/step - loss: 4.3295


<keras.src.callbacks.history.History at 0x1d35e8b1090>

In [53]:
# Shapes of U, W, V, b, c
for layer in model_many_to_many.layers:
    weights = layer.get_weights()
    print(layer.name, "weights shapes:", [w.shape for w in weights])

embedding_8 weights shapes: [(10000, 64)]
lstm_2 weights shapes: [(64, 512), (128, 512), (512,)]
dense_8 weights shapes: [(128, 10000), (10000,)]
