<a href="https://colab.research.google.com/github/bhavyajethi/Deep-Learning-practice/blob/main/positional_encoding.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
import numpy as np
import tensorflow as tf

In [5]:
def positional_encoding(position, d_model):
  """
  Calculates the positional encoding for a given position and model dimension.

  Args:
    position: The maximum length of the sequence.
    d_model: The dimension of the model.

  Returns:
    A tensor of shape (1, position, d_model) containing the positional encodings.
  """
  angle_rads = np.arange(position)[:, np.newaxis] / np.power(10000, (2 * (np.arange(d_model)[np.newaxis, :]//2)) / np.float32(d_model))

  # apply sin to even indices in the array; 2i
  angle_rads[:, 0::2] = np.sin(angle_rads[:, 0::2])

  # apply cos to odd indices in the array; 2i+1
  angle_rads[:, 1::2] = np.cos(angle_rads[:, 1::2])

  pos_encoding = angle_rads[np.newaxis, ...]

  return tf.cast(pos_encoding, dtype = tf.float32)

position = 50
d_model = 512
pos_encoding = positional_encoding(position, d_model)

# Access the shape of the returned tensor, not the function itself.
print("Positional Encodings Shape:", pos_encoding.shape)
print("Positional Encodings Example:\n", pos_encoding)

Positional Encodings Shape: (1, 50, 512)
Positional Encodings Example:
 tf.Tensor(
[[[ 0.00000000e+00  1.00000000e+00  0.00000000e+00 ...  1.00000000e+00
    0.00000000e+00  1.00000000e+00]
  [ 8.41470957e-01  5.40302277e-01  8.21856201e-01 ...  1.00000000e+00
    1.03663289e-04  1.00000000e+00]
  [ 9.09297407e-01 -4.16146845e-01  9.36414719e-01 ...  1.00000000e+00
    2.07326579e-04  1.00000000e+00]
  ...
  [ 1.23573124e-01 -9.92335498e-01  9.77189839e-01 ...  9.99987245e-01
    4.87215538e-03  9.99988139e-01]
  [-7.68254638e-01 -6.40144348e-01  7.31235921e-01 ...  9.99986708e-01
    4.97581763e-03  9.99987602e-01]
  [-9.53752637e-01  3.00592542e-01 -1.44026920e-01 ...  9.99986112e-01
    5.07947942e-03  9.99987125e-01]]], shape=(1, 50, 512), dtype=float32)


In [6]:
def add_positional_encoding(embeddings, position, d_model):
  """
  Adds positional encoding to the input embeddings.

  Args:
    embeddings: The input embeddings tensor of shape (batch_size, sequence_length, d_model).
    position: The maximum length of the sequence.
    d_model: The dimension of the model.

  Returns:
    A tensor of shape (batch_size, sequence_length, d_model) with positional encodings added.
  """
  pos_encoding = positional_encoding(position, d_model)
  # Add positional encoding to the embeddings
  return embeddings + pos_encoding[:, :tf.shape(embeddings)[1], :]

# Example usage:
batch_size = 64
sequence_length = 30
d_model = 512

# Create some dummy embeddings
dummy_embeddings = tf.random.uniform((batch_size, sequence_length, d_model))

# Add positional encoding to the dummy embeddings
embeddings_with_pos = add_positional_encoding(dummy_embeddings, position, d_model)

print("Shape of embeddings with positional encoding:", embeddings_with_pos.shape)

Shape of embeddings with positional encoding: (64, 30, 512)
