In [1]:

import tensorflow as tf
from tensorflow import convert_to_tensor, string
from tensorflow.keras.layers import TextVectorization, Embedding, Layer
from tensorflow.data import Dataset
import numpy as np
import matplotlib.pyplot as plt

In [6]:

output_sequence_length = 5
vocab_size = 10
sentences = [["I am a boy"], ["you too a boy"]]
sentence_data = Dataset.from_tensor_slices(sentences)
# Create the TextVectorization layer
vectorize_layer = TextVectorization(
                  output_sequence_length=output_sequence_length,
                  max_tokens=vocab_size)
# Train the layer to create a dictionary
vectorize_layer.adapt(sentence_data)
# Convert all sentences to tensors
word_tensors = convert_to_tensor(sentences, dtype=tf.string)
# Use the word tensors to get vectorized phrases
vectorized_words = vectorize_layer(word_tensors)
print("Vocabulary: ", vectorize_layer.get_vocabulary())
print("Vectorized words: ", vectorized_words)

Vocabulary:  ['', '[UNK]', 'boy', 'a', 'you', 'too', 'i', 'am']
Vectorized words:  tf.Tensor(
[[6 7 3 2 0]
 [4 5 3 2 0]], shape=(2, 5), dtype=int64)


In [8]:

output_length = 5
word_embedding_layer = Embedding(vocab_size, output_length)
embedded_words = word_embedding_layer(vectorized_words)
print(embedded_words)

tf.Tensor(
[[[-0.01798941 -0.01141007 -0.01965236 -0.01391307  0.02580461]
  [ 0.00942267  0.02321624 -0.03564912 -0.04408618 -0.00852084]
  [ 0.04267063 -0.03018031 -0.02566581  0.01290936 -0.01181199]
  [ 0.04796818  0.04413647  0.02198202 -0.02115368 -0.04999118]
  [-0.04091095 -0.0028476  -0.00813979  0.04155176 -0.01923447]]

 [[ 0.01443329 -0.0470566  -0.00141361 -0.03441573 -0.03382297]
  [-0.04286138 -0.01530708  0.01445475  0.04435326  0.04332342]
  [ 0.04267063 -0.03018031 -0.02566581  0.01290936 -0.01181199]
  [ 0.04796818  0.04413647  0.02198202 -0.02115368 -0.04999118]
  [-0.04091095 -0.0028476  -0.00813979  0.04155176 -0.01923447]]], shape=(2, 5, 5), dtype=float32)


In [9]:

position_embedding_layer = Embedding(output_sequence_length, output_length)
position_indices = tf.range(output_sequence_length)
embedded_indices = position_embedding_layer(position_indices)
print(embedded_indices)

tf.Tensor(
[[-0.0251071  -0.00928924 -0.02392038 -0.04352036 -0.02765547]
 [-0.03625281  0.04192827 -0.02805814  0.04814998  0.01290405]
 [-0.01811836 -0.03747717 -0.01970277  0.04699031 -0.0028955 ]
 [ 0.01793637 -0.04747498  0.01727717 -0.03960899  0.04200044]
 [ 0.04633823  0.01250258 -0.03862096 -0.04872328 -0.04257888]], shape=(5, 5), dtype=float32)


In [10]:

final_output_embedding = embedded_words + embedded_indices
print("Final output: ", final_output_embedding)

Final output:  tf.Tensor(
[[[-0.04309651 -0.02069931 -0.04357274 -0.05743343 -0.00185086]
  [-0.02683014  0.0651445  -0.06370725  0.00406379  0.00438321]
  [ 0.02455227 -0.06765749 -0.04536857  0.05989967 -0.01470749]
  [ 0.06590455 -0.00333851  0.03925919 -0.06076267 -0.00799074]
  [ 0.00542729  0.00965498 -0.04676075 -0.00717152 -0.06181334]]

 [[-0.01067381 -0.05634585 -0.02533399 -0.07793609 -0.06147844]
  [-0.07911418  0.02662119 -0.01360339  0.09250323  0.05622748]
  [ 0.02455227 -0.06765749 -0.04536857  0.05989967 -0.01470749]
  [ 0.06590455 -0.00333851  0.03925919 -0.06076267 -0.00799074]
  [ 0.00542729  0.00965498 -0.04676075 -0.00717152 -0.06181334]]], shape=(2, 5, 5), dtype=float32)
