In [1]:
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Sample corpus
corpus = [
    "this is a small example",
    "word embeddings are very useful",
    "we can use tensorflow to create embeddings"
]

# Tokenize the corpus
tokenizer = Tokenizer()
tokenizer.fit_on_texts(corpus)
word_index = tokenizer.word_index
sequences = tokenizer.texts_to_sequences(corpus)

# Pad sequences to ensure they are of the same length
padded_sequences = pad_sequences(sequences, padding='post')

# Parameters
vocab_size = len(word_index) + 1  # Add 1 for padding token
embedding_dim = 8  # Dimension of the embedding vector

# Define the Embedding layer
embedding_layer = tf.keras.layers.Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=padded_sequences.shape[1])

# Create a model to hold the embedding layer
model = tf.keras.Sequential([
    embedding_layer
])

# Get the embeddings
embeddings = model.predict(padded_sequences)
print(embeddings)
print(len(embeddings))
print(len(embeddings[0]))
print(embeddings.shape)



[[[ 0.00548052 -0.02341092  0.0202434   0.01607502  0.04088454
   -0.03311248 -0.02566762  0.00827903]
  [-0.01644065 -0.01929971 -0.02698071  0.00988919  0.01893946
    0.0321473  -0.01953237  0.00011854]
  [-0.01835247  0.0265771   0.03485188 -0.0489451  -0.04043774
   -0.02762411 -0.02242715  0.01174999]
  [-0.0211988   0.04182937 -0.00165902  0.03964654  0.00273906
   -0.04110184 -0.0038188   0.01690391]
  [ 0.00911578  0.04142663 -0.032844    0.02366977  0.02482846
   -0.04494765 -0.02582886  0.0326056 ]
  [ 0.01295877  0.02074173 -0.02403092 -0.03012077  0.01039624
   -0.04567426 -0.01065893  0.01123551]
  [ 0.01295877  0.02074173 -0.02403092 -0.03012077  0.01039624
   -0.04567426 -0.01065893  0.01123551]]

 [[ 0.02736186 -0.00745121 -0.04172636  0.00894465  0.0408815
   -0.02503834  0.02074012 -0.0088631 ]
  [ 0.01149571  0.02406755  0.04644359 -0.02890234 -0.03422489
    0.02986237 -0.02856522  0.04309202]
  [-0.0461587   0.03075628  0.04429804  0.04135536  0.02041061
    0.010

In [2]:
print(embeddings)
print(len(embeddings))
print(len(embeddings[0]))
print(embeddings.shape)

[[[ 0.00548052 -0.02341092  0.0202434   0.01607502  0.04088454
   -0.03311248 -0.02566762  0.00827903]
  [-0.01644065 -0.01929971 -0.02698071  0.00988919  0.01893946
    0.0321473  -0.01953237  0.00011854]
  [-0.01835247  0.0265771   0.03485188 -0.0489451  -0.04043774
   -0.02762411 -0.02242715  0.01174999]
  [-0.0211988   0.04182937 -0.00165902  0.03964654  0.00273906
   -0.04110184 -0.0038188   0.01690391]
  [ 0.00911578  0.04142663 -0.032844    0.02366977  0.02482846
   -0.04494765 -0.02582886  0.0326056 ]
  [ 0.01295877  0.02074173 -0.02403092 -0.03012077  0.01039624
   -0.04567426 -0.01065893  0.01123551]
  [ 0.01295877  0.02074173 -0.02403092 -0.03012077  0.01039624
   -0.04567426 -0.01065893  0.01123551]]

 [[ 0.02736186 -0.00745121 -0.04172636  0.00894465  0.0408815
   -0.02503834  0.02074012 -0.0088631 ]
  [ 0.01149571  0.02406755  0.04644359 -0.02890234 -0.03422489
    0.02986237 -0.02856522  0.04309202]
  [-0.0461587   0.03075628  0.04429804  0.04135536  0.02041061
    0.010