In [None]:
# imdb_embeddings.py
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np

# Load IMDB
vocab_size = 20000
maxlen = 200
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.imdb.load_data(num_words=vocab_size)

x_train = pad_sequences(x_train, maxlen=maxlen)
x_test  = pad_sequences(x_test, maxlen=maxlen)

# Build model with Embedding
embedding_dim = 128
model = models.Sequential([
    layers.Input(shape=(maxlen,)),
    layers.Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=maxlen, name='imdb_embedding'),
    layers.Bidirectional(layers.LSTM(64, return_sequences=False)),
    layers.Dense(64, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()

# Train
model.fit(x_train, y_train, validation_split=0.1, epochs=6, batch_size=128)

# Evaluate
loss, acc = model.evaluate(x_test, y_test)
print(f"IMDB test accuracy: {acc:.4f}")

# Extract embedding weights
embedding_layer = model.get_layer('imdb_embedding')
embeddings = embedding_layer.get_weights()[0]  # shape: (vocab_size, embedding_dim)
print("embeddings shape:", embeddings.shape)

# Save embeddings to file (numpy)
np.save("imdb_embeddings.npy", embeddings)
model.save("imdb_embedding_model.h5")