In [1]:
import tensorflow as tf
import numpy as np

In [6]:
class InputEmbeddings(tf.keras.layers.Layer):
    def __init__(self, d_model, vocab_size, **kwargs):
        """
        :param d_model: The size of each embedding vector.
        :param vocab_size: The size of the vocabulary, defining the number of unique tokens.
        :param kwargs: pass 
        """
        super(InputEmbeddings, self).__init__(**kwargs)
        self.d_model = d_model
        self.vocab_size = vocab_size
        self.embeddings= tf.keras.layers.Embedding(vocab_size, d_model)
        
    def call(self, x):
        return self.embeddings(x) * np.sqrt(self.d_model)
    
    

In [8]:
import tensorflow as tf

# Define the embedding dimensions and vocabulary size.
d_model = 128  # Dimension of each embedding vector
vocab_size = 1000  # Total number of unique tokens

# Create an instance of the InputEmbeddings class.
embedding_layer = InputEmbeddings(d_model=d_model, vocab_size=vocab_size)

# Generate some demo input: a batch of 3 sequences, each of length 10, with random token indices.
demo_input = tf.random.uniform((3, 10), minval=0, maxval=vocab_size, dtype=tf.int32)

# Get the embeddings for the demo input.
embeddings = embedding_layer(demo_input)

# Print the shape of the output embeddings.
print("Shape of output embeddings:", embeddings.shape)
# Output the embeddings to visually inspect them (optional, as it can be large).
print("Output embeddings:", embeddings.numpy())
print("Output embeddings shape:", embeddings.shape)

Shape of output embeddings: (3, 10, 128)
Output embeddings: [[[ 0.5146707   0.1895307  -0.29300112 ... -0.10862558 -0.22504583
    0.16266403]
  [ 0.06925477 -0.45865697  0.22549672 ...  0.5333863   0.05190561
   -0.26408648]
  [ 0.42520645  0.55514103 -0.13055247 ...  0.01275844  0.10262209
   -0.441018  ]
  ...
  [-0.09101738 -0.29055497 -0.01321953 ... -0.50177944 -0.01079204
   -0.13035151]
  [ 0.35089007 -0.11844276 -0.14385305 ...  0.483865   -0.29106033
   -0.41445133]
  [ 0.17892529  0.37451997  0.5143618  ... -0.012647   -0.25266746
    0.08366238]]

 [[-0.55149186 -0.01530908  0.40367183 ... -0.29592443  0.3236189
    0.32945496]
  [ 0.5608614   0.03319309 -0.33201817 ...  0.45180464  0.2990552
   -0.43874127]
  [-0.19392823 -0.36530343 -0.0024843  ... -0.5620586  -0.5649589
    0.48271498]
  ...
  [ 0.27567154 -0.14037745 -0.0081402  ... -0.48876196 -0.1506169
   -0.14031284]
  [ 0.40844035 -0.550867   -0.23408803 ...  0.24759406 -0.49529007
    0.40793145]
  [-0.54291886  0

In [11]:
embeddings.numpy()[0]

array([[ 0.5146707 ,  0.1895307 , -0.29300112, ..., -0.10862558,
        -0.22504583,  0.16266403],
       [ 0.06925477, -0.45865697,  0.22549672, ...,  0.5333863 ,
         0.05190561, -0.26408648],
       [ 0.42520645,  0.55514103, -0.13055247, ...,  0.01275844,
         0.10262209, -0.441018  ],
       ...,
       [-0.09101738, -0.29055497, -0.01321953, ..., -0.50177944,
        -0.01079204, -0.13035151],
       [ 0.35089007, -0.11844276, -0.14385305, ...,  0.483865  ,
        -0.29106033, -0.41445133],
       [ 0.17892529,  0.37451997,  0.5143618 , ..., -0.012647  ,
        -0.25266746,  0.08366238]], dtype=float32)

# Positional Encoding

The main purpose of positional encodings is to give the model some information about the order of words or the relative positions of words within a sequence. This is crucial for tasks involving language where the meaning of a sentence can change dramatically based on the order of words (e.g., "I like dogs more than cats" vs "I like cats more than dogs").



In [None]:
class PositionalEncoding(tf.keras.layers.Layer):
    def __init__(self, d_model:int, max_len:int, dropout:float,**kwargs):
        """
        Initializes the PositionalEncoding layer.

        :param d_model: The size of each embedding vector.
        :param max_len: The maximum number of positions for which embeddings will be created.
        :param droupout: The dropout rate to apply to the output of this layer.
        :param kwargs: pass
        """
        super(PositionalEncoding, self).__init__(**kwargs)
        self.d_model = d_model
        self.max_len = max_len
        self.dropout = tf.Droupout(rate=dropout)
        self.positional_encoding = self._get_positional_encoding()
        
    def _get_positional_encoding(self):
        """
        Generates the positional encodings using sinusoidal patterns.
        
        Returns:
        Tensor: A tensor containing positional encodings of shape (1, max_len, d_model).
        """
        positions = np.arange(self.max_len)[:, np.newaxis]
        div_term = np.exp(np.arange(0, self.d_model, 2) * -(np.log(10000.0) / self.d_model))  # Shape (d_model/2,)
        
        

In [13]:
np.arange(5)[:...]

TypeError: slice indices must be integers or None or have an __index__ method

In [14]:
div_term = np.exp(np.arange(0, d_model, 2) * -(np.log(10000.0) /d_model))  # Shape (d_model/2,)
d_model = 512  # Dimensionality of the embedding
max_len = 10  # Maximum length of the sequence
dropout_rate = 0.1  # Dropout rate


In [15]:
div_term

array([1.00000000e+00, 8.65964323e-01, 7.49894209e-01, 6.49381632e-01,
       5.62341325e-01, 4.86967525e-01, 4.21696503e-01, 3.65174127e-01,
       3.16227766e-01, 2.73841963e-01, 2.37137371e-01, 2.05352503e-01,
       1.77827941e-01, 1.53992653e-01, 1.33352143e-01, 1.15478198e-01,
       1.00000000e-01, 8.65964323e-02, 7.49894209e-02, 6.49381632e-02,
       5.62341325e-02, 4.86967525e-02, 4.21696503e-02, 3.65174127e-02,
       3.16227766e-02, 2.73841963e-02, 2.37137371e-02, 2.05352503e-02,
       1.77827941e-02, 1.53992653e-02, 1.33352143e-02, 1.15478198e-02,
       1.00000000e-02, 8.65964323e-03, 7.49894209e-03, 6.49381632e-03,
       5.62341325e-03, 4.86967525e-03, 4.21696503e-03, 3.65174127e-03,
       3.16227766e-03, 2.73841963e-03, 2.37137371e-03, 2.05352503e-03,
       1.77827941e-03, 1.53992653e-03, 1.33352143e-03, 1.15478198e-03,
       1.00000000e-03, 8.65964323e-04, 7.49894209e-04, 6.49381632e-04,
       5.62341325e-04, 4.86967525e-04, 4.21696503e-04, 3.65174127e-04,
      