### Use a pre-trained convolution neural network (VGG16) for image classification

In [3]:
import numpy as np
from tensorflow.keras.datasets import imdb
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [5]:
# Load the IMDB dataset
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=10000)

In [7]:
# Pad the sequences
x_train = pad_sequences(x_train, maxlen=200)
x_test = pad_sequences(x_test, maxlen=200)

In [9]:
# Build the model
model = Sequential()
model.add(Embedding(input_dim=10000, output_dim=50, input_length=200))
model.add(SimpleRNN(100))
model.add(Dense(1, activation='sigmoid'))



In [11]:
# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [13]:
# Show the model architecture
model.summary()

In [15]:
# Train the model
model.fit(x_train, y_train, epochs=3, batch_size=32)

Epoch 1/3
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 42ms/step - accuracy: 0.5859 - loss: 0.6514
Epoch 2/3
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 42ms/step - accuracy: 0.7686 - loss: 0.4884
Epoch 3/3
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 42ms/step - accuracy: 0.7654 - loss: 0.4875


<keras.src.callbacks.history.History at 0x1f77fbc3bf0>

In [17]:
# Evaluate the model
loss, accuracy = model.evaluate(x_test, y_test)
print(f"Test Accuracy: {accuracy * 100:.2f}%")

[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 16ms/step - accuracy: 0.7678 - loss: 0.5031
Test Accuracy: 77.14%


In [19]:
# Extract and save the word embeddings
embedding_layer = model.layers[0]
word_vectors = embedding_layer.get_weights()[0]

In [21]:
# Prepare index mappings
word_to_index = imdb.get_word_index()
word_to_index = {word: index + 3 for word, index in word_to_index.items()}
word_to_index["<PAD>"] = 0
word_to_index["<START>"] = 1
word_to_index["<UNK>"] = 2
word_to_index["<UNUSED>"] = 3
index_to_word = {index: word for word, index in word_to_index.items()}

In [23]:
# Print a few special tokens
print(index_to_word[0], index_to_word[1], index_to_word[2], index_to_word[3])

<PAD> <START> <UNK> <UNUSED>


In [27]:
# Save embeddings to file
with open("word_embeddings_ex11.txt", "w", encoding="utf-8") as f:
    for i in range(1, 10000):  # Skip 0 (padding)
        word = index_to_word.get(i, "<UNK>")
        vec = " ".join(map(str, word_vectors[i]))
        f.write(f"{word} {vec}\n")

print("✅ Saved word vectors to word_embeddings.txt")

✅ Saved word vectors to word_embeddings.txt
