In [35]:
import torch as t
from torch import nn
import plotly.express as px
from IPython.display import display
import pandas as pd
import numpy as np
import utils

In [2]:
import transformers

tokenizer = transformers.AutoTokenizer.from_pretrained("gpt2")

In [3]:
test_str = """A day will come, one day in the unending succession of days, 
    when beings, beings who are now latent in our thoughts and hidden 
    in our loins, shall stand upon this earth as one stands upon a 
    footstool, and shall laugh and reach out their hands amidst the 
    stars."""

In [4]:
tkns = tokenizer.encode(test_str)

In [5]:
tokenizer.decode(tkns)

'A day will come, one day in the unending succession of days, \n    when beings, beings who are now latent in our thoughts and hidden \n    in our loins, shall stand upon this earth as one stands upon a \n    footstool, and shall laugh and reach out their hands amidst the \n    stars.'

In [6]:
tokenizer(test_str)

{'input_ids': [32, 1110, 481, 1282, 11, 530, 1110, 287, 262, 555, 1571, 22435, 286, 1528, 11, 220, 198, 220, 220, 220, 618, 9791, 11, 9791, 508, 389, 783, 41270, 287, 674, 6066, 290, 7104, 220, 198, 220, 220, 220, 287, 674, 2376, 1040, 11, 2236, 1302, 2402, 428, 4534, 355, 530, 6296, 2402, 257, 220, 198, 220, 220, 220, 2366, 301, 970, 11, 290, 2236, 6487, 290, 3151, 503, 511, 2832, 31095, 262, 220, 198, 220, 220, 220, 5788, 13], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}

In [17]:
class Embedding(nn.Module):

    def __init__(self, num_embeddings: int, embedding_dim: int):
        super().__init__()
        self.num_embed = num_embeddings
        self.embed_dim = embedding_dim
        self.weight = nn.Parameter(t.ones(num_embeddings, embedding_dim).uniform_(-1, to=1))

    def forward(self, x: t.LongTensor) -> t.Tensor:
        '''For each integer in the input, return that row of the embedding.
        '''
        return self.weight[x]

    def extra_repr(self) -> str:
        return f"{self.num_embed}, {self.embed_dim}"

assert repr(Embedding(10, 20)) == repr(t.nn.Embedding(10, 20))
#utils.test_embedding(Embedding)

In [23]:
class PositionalEncoding(nn.Module):

    def __init__(self, max_seq_len: int, embedding_dim: int):
        super().__init__()
        self.max_seq_len = max_seq_len
        self.embed_dim = embedding_dim
        self.n = 10000
        
        freqs = np.outer(
            np.arange(max_seq_len), 
            1 / self.n ** (2 * np.arange(embedding_dim//2) / embedding_dim)
        )
        enc_2d = np.zeros((max_seq_len, embedding_dim))
        enc_2d[:, ::2] = np.sin(freqs)
        enc_2d[:, 1::2] = np.cos(freqs)
        self.pos_enc = t.from_numpy(enc_2d)
        self.register_buffer("pos_enc", self.pos_enc)

    def forward(self, x: t.Tensor) -> t.Tensor:
        '''
        x: shape (batch, seq_len, embedding_dim)
        '''
        return x + self.pos_enc[:x.shape[1],:]

    def extra_repr(self) -> str:
        return f"max_freq={self.n}, max_seq_len={self.max_seq_len}, embedding_dim={self.embed_dim}"

In [32]:
T = t.randn(2, 3, 4)
lnorm = nn.LayerNorm(T.shape[2])
out = lnorm(T)

In [33]:
T

tensor([[[-1.0023,  1.4372,  2.0048, -0.3291],
         [-0.6571,  1.2354, -0.8084,  1.0779],
         [ 0.3791,  1.1162, -0.5283,  1.3747]],

        [[ 2.1198, -0.0561, -0.4570,  0.9201],
         [ 0.1503,  1.6929, -0.4923,  0.3095],
         [-1.3740, -0.8491,  1.3189,  1.8742]]])

In [34]:
out

tensor([[[-1.2405,  0.7375,  1.1977, -0.6947],
         [-0.9169,  1.0798, -1.0764,  0.9136],
         [-0.2790,  0.7177, -1.5060,  1.0673]],

        [[ 1.4964, -0.6916, -1.0947,  0.2900],
         [-0.3325,  1.6043, -1.1392, -0.1326],
         [-1.1707, -0.7906,  0.7796,  1.1817]]],
       grad_fn=<NativeLayerNormBackward0>)

In [42]:
#from types import UnionType  # type: ignore
#from typing import List  # type: ignore


class LayerNorm(nn.Module):

    def __init__(
        self, 
        normalized_shape, 
        eps: float = 1e-05, 
        elementwise_affine: bool = True
        ):
        super().__init__()
        self.norm_shape = (normalized_shape, ) if isinstance(normalized_shape, int) else normalized_shape
        self.eps = eps
        self.elementwise_affine = elementwise_affine

        if self.elementwise_affine:
            self.weight = nn.Parameter(t.ones(normalized_shape))
            self.bias = nn.Parameter(t.zeros(normalized_shape))

    def forward(self, x: t.Tensor) -> t.Tensor:
        '''Normalize along each embedding'''
        x_dims, norm_shape_dims = len(x.shape), len(self.norm_shape)
        norm_dims = tuple([d for d in range(x_dims - norm_shape_dims, x_dims)])
        
        self.mean = t.mean(x, dim=norm_dims, keepdim=True)
        self.var = t.var(x, dim=norm_dims, unbiased=False, keepdim=True)

        out = (x - self.mean) / t.sqrt(self.var + self.eps)

        if self.elementwise_affine:
            out = out * self.weight + self.bias

        return out

    def extra_repr(self) -> str:
        return f"normalized_shape={self.norm_shape}, eps={self.eps}, elementwise_affine={self.elementwise_affine}"

utils.test_layernorm_mean_1d(LayerNorm)
utils.test_layernorm_mean_2d(LayerNorm)
utils.test_layernorm_std(LayerNorm)
utils.test_layernorm_exact(LayerNorm)
utils.test_layernorm_backward(LayerNorm)

All tests in `test_layernorm_mean_1d` passed.
All tests in `test_layernorm_mean_2d` passed.
All tests in `test_layernorm_std` passed.
All tests in `test_layernorm_exact` passed.
All tests in `test_layernorm_backward` passed.


In [None]:
class Dropout(nn.Module):

    def __init__(self, p: float):
        super().__init__()
        self.p = p

    def forward(self, x: t.Tensor) -> t.Tensor:
        pass

    def extra_repr(self) -> str:
        pass

utils.test_dropout_eval(Dropout)
utils.test_dropout_training(Dropout)