In [None]:
import torch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sn
import tensorflow as tf
from tensorflow import keras
from torch import nn


https://github.com/ajhalthor/Transformer-Neural-Network/blob/main/Positional_Encoding_in_Transformer_neural_networks.ipynb

In [None]:
#sinoisal encoding

import torch
import torch.nn as nn

class PositionalEncoding(nn.Module):

    def __init__(self, d_model, max_sequence_length):
        super().__init__()
        self.max_sequence_length = max_sequence_length
        self.d_model = d_model

    def forward(self):
        even_i = torch.arange(0, self.d_model, 2).float()
        denominator = torch.pow(10000, even_i/self.d_model)
        position = torch.arange(self.max_sequence_length).reshape(self.max_sequence_length, 1)
        even_PE = torch.sin(position / denominator)
        odd_PE = torch.cos(position / denominator)
        stacked = torch.stack([even_PE, odd_PE], dim=2)
        PE = torch.flatten(stacked, start_dim=1, end_dim=2)
        return PE



pe = PositionalEncoding(d_model=2, max_sequence_length=3)
pe.forward()

tensor([[ 0.0000,  1.0000],
        [ 0.8415,  0.5403],
        [ 0.9093, -0.4161]])

In [None]:
#for bathc
class PositionalEncoding(nn.Module):

    def __init__(self, d_model, max_sequence_length):
        super().__init__()
        self.max_sequence_length = max_sequence_length
        self.d_model = d_model

    def forward(self, x):
        batch_size, seq_len, _ = x.size()

        # Generate the positional encoding
        even_i = torch.arange(0, self.d_model, 2).float()
        denominator = torch.pow(10000, even_i/self.d_model)
        position = torch.arange(self.max_sequence_length).reshape(self.max_sequence_length, 1)
        even_PE = torch.sin(position / denominator)
        odd_PE = torch.cos(position / denominator)
        stacked = torch.stack([even_PE, odd_PE], dim=2)
        PE = torch.flatten(stacked, start_dim=1, end_dim=2)

        # Ensure the PE matches the batch size and sequence length
        PE = PE.unsqueeze(0).expand(batch_size, seq_len, self.d_model)

        return PE + x

x = torch.randn(2, 3, 2)  # Example batch of sequences
pe = PositionalEncoding(d_model=2, max_sequence_length=3)
pe.forward(x)

tensor([[[-0.5655,  0.5307],
         [ 2.0689,  1.7320],
         [ 2.4299, -0.9372]],

        [[-1.2400,  0.7311],
         [-1.3401, -1.4478],
         [ 0.3283, -1.6179]]])

In [None]:
#rope encoding
#rope
import torch
import torch.nn.functional as F

class RoPEEmbedding(torch.nn.Module):
    def __init__(self, embedding_dim):
        super().__init__()
        assert embedding_dim % 2 == 0, "Embedding dimension must be even for RoPE"
        self.embedding_dim = embedding_dim

    def forward(self, x):
        """
        Forward pass for Rotary Position Embedding.

        Args:
        - x: Tensor of shape (batch_size, seq_len, embedding_dim)

        Returns:
        - Tensor with RoPE applied to the last two dimensions.
        """
        seq_len = x.shape[1]

        # Generate position indices
        position_ids = torch.arange(seq_len, dtype=torch.float32, device=x.device)

        # Compute the rotary angles
        freqs = 1.0 / (10000 ** (torch.arange(0, self.embedding_dim, 2, dtype=torch.float32, device=x.device) / self.embedding_dim))
        angles = torch.einsum('i,j->ij', position_ids, freqs)

        # Create the rotation matrix for sin and cos embeddings
        sin = torch.sin(angles).repeat_interleave(2, dim=-1)
        cos = torch.cos(angles).repeat_interleave(2, dim=-1)

        # Apply rotation using cos and sin embeddings
        x1 = x * cos + self.rotate_half(x) * sin
        return x1+x



    def rotate_half(self,x):
        """
        Rotate the last dimension by swapping adjacent components and negating the correct ones.
        """
        x1 = x[..., ::2]  # Elements at even positions: x1, x3, x5
        x2 = x[..., 1::2]  # Elements at odd positions: x2, x4, x6
        return torch.flatten(torch.stack([-x2, x1], dim=-1), start_dim=-2)  # Interleave and negate correctly


batch_size = 2
seq_len = 2
embedding_dim = 2

x = torch.randn(batch_size, seq_len, embedding_dim)
print("Input:\n", x)

rope_layer = RoPEEmbedding(embedding_dim)
rope_output = rope_layer(x)

print("\nOutput after RoPE:\n", rope_output)



Input:
 tensor([[[ 0.6181, -0.2385],
         [ 0.0245, -0.4912]],

        [[-0.0591, -1.5653],
         [ 0.4258, -1.4818]]])

Output after RoPE:
 tensor([[[ 1.2362, -0.4770],
         [ 0.4511, -0.7360]],

        [[-0.1183, -3.1305],
         [ 1.9028, -1.9241]]])


In [None]:
#sinuoisal pos emcioding basics

import torch
import torch.nn as nn

max_sequence_length = 10
d_model = 6

#get even odd dims
even_i = torch.arange(0, d_model, 2).float()
odd_i=torch.arange(1, d_model, 2).float()
even_i,odd_i


(tensor([0., 2., 4.]), tensor([1., 3., 5.]))

In [None]:

edenominator = torch.pow(10000, (even_i)/d_model)
odenominator=torch.pow(10000, (odd_i - 1)/d_model)
edenominator,odenominator#both arre same so just use denom

(tensor([  1.0000,  21.5443, 464.1590]),
 tensor([  1.0000,  21.5443, 464.1590]))

In [None]:

position = torch.arange(max_sequence_length, dtype=torch.float).reshape(max_sequence_length, 1)
position

tensor([[0.],
        [1.],
        [2.],
        [3.],
        [4.],
        [5.],
        [6.],
        [7.],
        [8.],
        [9.]])

In [None]:
position.shape

torch.Size([10, 1])

In [None]:
denominator=edenominator
denominator.shape

torch.Size([3])

In [None]:
denominator

tensor([  1.0000,  21.5443, 464.1590])

In [None]:

even_PE = torch.sin(position / denominator)
odd_PE = torch.cos(position / denominator)
even_PE,odd_PE

(tensor([[ 0.0000,  0.0000,  0.0000],
         [ 0.8415,  0.0464,  0.0022],
         [ 0.9093,  0.0927,  0.0043],
         [ 0.1411,  0.1388,  0.0065],
         [-0.7568,  0.1846,  0.0086],
         [-0.9589,  0.2300,  0.0108],
         [-0.2794,  0.2749,  0.0129],
         [ 0.6570,  0.3192,  0.0151],
         [ 0.9894,  0.3629,  0.0172],
         [ 0.4121,  0.4057,  0.0194]]),
 tensor([[ 1.0000,  1.0000,  1.0000],
         [ 0.5403,  0.9989,  1.0000],
         [-0.4161,  0.9957,  1.0000],
         [-0.9900,  0.9903,  1.0000],
         [-0.6536,  0.9828,  1.0000],
         [ 0.2837,  0.9732,  0.9999],
         [ 0.9602,  0.9615,  0.9999],
         [ 0.7539,  0.9477,  0.9999],
         [-0.1455,  0.9318,  0.9999],
         [-0.9111,  0.9140,  0.9998]]))

In [None]:
# now we need to place one elem of even pe then odd pe then even pe then odd pe and so
stacked=torch.stack([even_PE,odd_PE],dim=2)#dim=2 mean put odd pe in 2nd value or eve 2snd pos
stacked

tensor([[[ 0.0000,  1.0000],
         [ 0.0000,  1.0000],
         [ 0.0000,  1.0000]],

        [[ 0.8415,  0.5403],
         [ 0.0464,  0.9989],
         [ 0.0022,  1.0000]],

        [[ 0.9093, -0.4161],
         [ 0.0927,  0.9957],
         [ 0.0043,  1.0000]],

        [[ 0.1411, -0.9900],
         [ 0.1388,  0.9903],
         [ 0.0065,  1.0000]],

        [[-0.7568, -0.6536],
         [ 0.1846,  0.9828],
         [ 0.0086,  1.0000]],

        [[-0.9589,  0.2837],
         [ 0.2300,  0.9732],
         [ 0.0108,  0.9999]],

        [[-0.2794,  0.9602],
         [ 0.2749,  0.9615],
         [ 0.0129,  0.9999]],

        [[ 0.6570,  0.7539],
         [ 0.3192,  0.9477],
         [ 0.0151,  0.9999]],

        [[ 0.9894, -0.1455],
         [ 0.3629,  0.9318],
         [ 0.0172,  0.9999]],

        [[ 0.4121, -0.9111],
         [ 0.4057,  0.9140],
         [ 0.0194,  0.9998]]])

In [None]:
stacked.shape

torch.Size([10, 3, 2])

In [None]:
#now flatten is
torch.flatten(stacked)

tensor([ 0.0000,  1.0000,  0.0000,  1.0000,  0.0000,  1.0000,  0.8415,  0.5403,
         0.0464,  0.9989,  0.0022,  1.0000,  0.9093, -0.4161,  0.0927,  0.9957,
         0.0043,  1.0000,  0.1411, -0.9900,  0.1388,  0.9903,  0.0065,  1.0000,
        -0.7568, -0.6536,  0.1846,  0.9828,  0.0086,  1.0000, -0.9589,  0.2837,
         0.2300,  0.9732,  0.0108,  0.9999, -0.2794,  0.9602,  0.2749,  0.9615,
         0.0129,  0.9999,  0.6570,  0.7539,  0.3192,  0.9477,  0.0151,  0.9999,
         0.9894, -0.1455,  0.3629,  0.9318,  0.0172,  0.9999,  0.4121, -0.9111,
         0.4057,  0.9140,  0.0194,  0.9998])

In [None]:
#verify if
even_PE,odd_PE

(tensor([[ 0.0000,  0.0000,  0.0000],
         [ 0.8415,  0.0464,  0.0022],
         [ 0.9093,  0.0927,  0.0043],
         [ 0.1411,  0.1388,  0.0065],
         [-0.7568,  0.1846,  0.0086],
         [-0.9589,  0.2300,  0.0108],
         [-0.2794,  0.2749,  0.0129],
         [ 0.6570,  0.3192,  0.0151],
         [ 0.9894,  0.3629,  0.0172],
         [ 0.4121,  0.4057,  0.0194]]),
 tensor([[ 1.0000,  1.0000,  1.0000],
         [ 0.5403,  0.9989,  1.0000],
         [-0.4161,  0.9957,  1.0000],
         [-0.9900,  0.9903,  1.0000],
         [-0.6536,  0.9828,  1.0000],
         [ 0.2837,  0.9732,  0.9999],
         [ 0.9602,  0.9615,  0.9999],
         [ 0.7539,  0.9477,  0.9999],
         [-0.1455,  0.9318,  0.9999],
         [-0.9111,  0.9140,  0.9998]]))

In [None]:
#above done thats it!

In [None]:
#larnable pos embeds
import torch.nn as nn
import torch
class LearnablePositionalEmbedding(nn.Module):
    def __init__(self, max_seq_length, d_model):
        super().__init__()
        self.embedding = nn.Embedding(max_seq_length, d_model)  # Learnable parameters here!

    def forward(self, x):
        position_ids = torch.arange(x.size(1), device=x.device)
        return x + self.embedding(position_ids)

In [None]:
def rotate_half1(x):
    """
    Rotate the last dimension by swapping adjacent even and odd components
    and negating the even ones according to RoPE specifications.
    """
    x1 = x[..., ::2]  # x1, x3, x5 (even indices in zero-based indexing)
    x2 = x[..., 1::2]  # x2, x4, x6 (odd indices)
    return torch.cat([-x2, x1], dim=-1)  # Negate the odd group

def rotate_half(x):
    """
    Rotate the last dimension by swapping adjacent components and negating the correct ones.
    """
    x1 = x[..., ::2]  # Elements at even positions: x1, x3, x5
    x2 = x[..., 1::2]  # Elements at odd positions: x2, x4, x6
    return torch.flatten(torch.stack([-x2, x1], dim=-1), start_dim=-2)  # Interleave and negate correctly


torch.manual_seed(2)
x=torch.randn(2,2,2)
print('x',x)
print('out',rotate_half(x))

x tensor([[[ 0.3923, -0.2236],
         [-0.3195, -1.2050]],

        [[ 1.0445, -0.6332],
         [ 0.5731,  0.5409]]])
out tensor([[[ 0.2236,  0.3923],
         [ 1.2050, -0.3195]],

        [[ 0.6332,  1.0445],
         [-0.5409,  0.5731]]])
