In [47]:
import torch as t
from torch import nn

from typing import Union, List

import utils

In [17]:
class Embedding(nn.Module):

    def __init__(self, num_embeddings: int, embedding_dim: int):
        super().__init__()
        self.num_embeddings = num_embeddings
        self.embedding_dim = embedding_dim
        self.weight = nn.Parameter(t.randn(num_embeddings, embedding_dim))

    def forward(self, x: t.LongTensor) -> t.Tensor:
        '''For each integer in the input, return that row of the embedding.
        '''
        return t.index_select(self.weight, 0, x)

    def extra_repr(self) -> str:
        params_to_print = ["num_embeddings", "embedding_dim"]
        params_string = ', '.join([f'{p}={getattr(self, p)}' for p in params_to_print])
        return f'{params_string}'

utils.test_embedding(Embedding)

In [45]:
class GELU(nn.Module):
    def forward(self, x: t.Tensor) -> t.Tensor:
        return 0.5 * x * (1 + t.tanh((t.sqrt(t.tensor(2) / t.pi)) * (x + 0.044715 * x ** 3)))


# Testing
x = t.linspace(-5, 10, 200)
t.testing.assert_close(
    GELU()(x),
    t.nn.functional.gelu(x, approximate='tanh')
)

In [181]:
class LayerNorm(nn.Module):
    def __init__(self,
        normalized_shape: Union[int, List[int]],
        eps: float = 1e-05,
        elementwise_affine: bool = True
    ):
        super().__init__()
        if isinstance(normalized_shape, int):
            normalized_shape = (normalized_shape,)
        self.normalized_shape = normalized_shape
        self.eps = eps
        self.elementwise_affine = elementwise_affine

        if self.elementwise_affine:
            self.weight = nn.Parameter(t.ones(normalized_shape))
            self.bias = nn.Parameter(t.zeros(normalized_shape))

    def forward(self, x: t.Tensor) -> t.Tensor:
        dims = tuple(range(x.dim()-len(self.normalized_shape), x.dim()))
        mean = x.mean(dim=dims, keepdim=True)
        var = x.var(dim=dims, unbiased=False, keepdim=True)

        x = (x - mean) / t.sqrt(var + self.eps)
        if self.elementwise_affine:
            x = x * self.weight + self.bias
        return x

utils.test_layernorm_mean_1d(LayerNorm)
utils.test_layernorm_mean_2d(LayerNorm)
utils.test_layernorm_std(LayerNorm)
utils.test_layernorm_exact(LayerNorm)
utils.test_layernorm_backward(LayerNorm)

All tests in `test_layernorm_mean_1d` passed.
All tests in `test_layernorm_mean_2d` passed.
All tests in `test_layernorm_std` passed.
All tests in `test_layernorm_exact` passed.
All tests in `test_layernorm_backward` passed.


In [65]:
class Dropout(nn.Module):
    def __init__(self, p: float):
        super().__init__()
        assert 0 <= p < 1
        self.p = p

    def forward(self, x: t.Tensor) -> t.Tensor:
        if self.training:
            mask = (t.rand_like(x) < self.p)
            return t.where(mask, 0.0, x / (1 - self.p))
        else:
            return x

utils.test_dropout_eval(Dropout)
utils.test_dropout_training(Dropout)

All tests in `test_dropout_eval` passed.
All tests in `test_dropout_training` passed.
