In [1]:
import torch
from torch import nn
import numpy as np

## Explore

### Common Functions

In [2]:
# generate normal data
torch.normal(0, 0.2, (10,2,))

tensor([[ 0.2904, -0.2004],
        [ 0.2562, -0.1379],
        [-0.1193,  0.0258],
        [-0.1380, -0.2429],
        [-0.0552,  0.1956],
        [-0.1482, -0.2079],
        [-0.0026,  0.2449],
        [-0.1600, -0.2543],
        [-0.1913, -0.2307],
        [ 0.1029, -0.3219]])

In [3]:
# Slicing Tensors
X = torch.normal(0, 1, (3,2,1)) 
X[1, 1:2,], X[0, 1,]

(tensor([[0.2372]]), tensor([-0.1493]))

### Linear Layer
* It takes any (n1, n2, ..., nk, in_dim) tensor and maps it to (n1, n2, ..., nk, out_dim) array

In [4]:
in_dim, out_dim = 4, 2
linear_layer = nn.Linear(in_features=in_dim, out_features=out_dim, bias=False)

In [5]:
batch_size = 3
X = torch.rand(1, 2, batch_size, in_dim)
display(X)
display(X.shape)

tensor([[[[0.7573, 0.6799, 0.5769, 0.6115],
          [0.4796, 0.9606, 0.2561, 0.0980],
          [0.6114, 0.3206, 0.5559, 0.4730]],

         [[0.6714, 0.1972, 0.4625, 0.1150],
          [0.7412, 0.8133, 0.9782, 0.4173],
          [0.3976, 0.4183, 0.0580, 0.9802]]]])

torch.Size([1, 2, 3, 4])

In [6]:
linear_layer(X)

tensor([[[[-0.8973, -0.3517],
          [-0.4788, -0.3368],
          [-0.7347, -0.2338]],

         [[-0.5787, -0.1360],
          [-1.0363, -0.4824],
          [-0.5826, -0.1959]]]], grad_fn=<UnsafeViewBackward>)

### Softmax
Takes any tensor of shape (d1, d2, ..., dk, D) and returns a tensor of the same shape.
Softmax is applied along the last dimension.

In [7]:
X = torch.rand(1, 2, 3, 4, 5)
X = torch.rand(4, 3, 2)

In [8]:
X.shape

torch.Size([4, 3, 2])

In [9]:
nn.functional.softmax(X, dim=-1)

tensor([[[0.6898, 0.3102],
         [0.5261, 0.4739],
         [0.3303, 0.6697]],

        [[0.6150, 0.3850],
         [0.2907, 0.7093],
         [0.5548, 0.4452]],

        [[0.6301, 0.3699],
         [0.4671, 0.5329],
         [0.5185, 0.4815]],

        [[0.5783, 0.4217],
         [0.4964, 0.5036],
         [0.6278, 0.3722]]])

### Torch.repeat_interleave

* What do `torch.repeat_interleave` and `torch.tile` do? How are they different?
    - `torch.tile` is available only pytorch version >= 1.8
* How's it different from numpy.repeat and numpy.tile?
* numpy.repeat ~ torch.repeat_interleave
* toch.tile ~ torch.tile ( >= v1.8)
* np.tile ~ torch.repeat ( <= v1.7)

In [10]:
y = torch.tensor([[1, 2], [3, 4]])
y

tensor([[1, 2],
        [3, 4]])

In [11]:
torch.repeat_interleave(y, 3, dim=0)

tensor([[1, 2],
        [1, 2],
        [1, 2],
        [3, 4],
        [3, 4],
        [3, 4]])

In [12]:
# DON'T USE
# this is replaced by torch.tile or tensor.tile
y.repeat((2,2))

tensor([[1, 2, 1, 2],
        [3, 4, 3, 4],
        [1, 2, 1, 2],
        [3, 4, 3, 4]])

In [13]:
yNP = np.array([1,2])
yNP

array([1, 2])

In [14]:
np.repeat(yNP, 3)  # also yNP.repeat(3)

array([1, 1, 1, 2, 2, 2])

In [15]:
# np.repeat is similar to torch.repeat_interleave
np.repeat(yNP, [3, 7])

array([1, 1, 1, 2, 2, 2, 2, 2, 2, 2])

In [16]:
np.tile(yNP, (2,2))

array([[1, 2, 1, 2],
       [1, 2, 1, 2]])

### Unsqueeze and BMM 

In [17]:
N, M = 5, 7
keys = torch.randn(N, M)
values = torch.randn(N, M)

In [18]:
keys

tensor([[-1.9960,  0.7362, -0.1414, -1.0739,  0.8829,  0.9771,  0.5810],
        [-0.7134,  0.4469,  1.0334,  0.0054,  0.1181,  1.1070,  0.7764],
        [-0.2494, -0.5957, -1.0957,  0.3801, -1.2862, -0.9432,  0.4663],
        [-0.6413,  1.3547, -1.7787, -0.5930,  0.7135, -1.3045,  0.7364],
        [-0.5168,  0.2317, -0.4032, -0.0285, -1.3790,  1.0844, -2.1884]])

In [19]:
values

tensor([[ 0.3595, -1.5099, -0.3294, -0.1450, -0.0164, -0.1691, -0.8583],
        [-0.0958,  1.3129, -1.4244,  0.5585,  0.3352, -1.1671,  1.2605],
        [ 1.0333,  0.3613, -0.1452, -0.8588, -1.1601,  1.3795,  0.6896],
        [-0.5574, -0.2654, -0.4720, -0.0266, -0.5568, -1.4078,  0.2490],
        [-0.7238,  0.1196, -2.1268, -0.6486,  0.5666, -0.7749,  0.3147]])

We want to do the following

In [20]:
(keys*values).sum(dim=1)

tensor([-2.3051, -1.0878, -0.1277,  2.4757, -1.0327])

In [21]:
# unsqueeze adds and additional dimension of 1
# squeeze removes a dimension of 1
keys.shape, keys.unsqueeze(1).shape, keys.unsqueeze(1).squeeze().shape

(torch.Size([5, 7]), torch.Size([5, 1, 7]), torch.Size([5, 7]))

With batch multiplication there is a lot of reshaping

In [22]:
torch.bmm(keys.unsqueeze(1), values.unsqueeze(-1)).reshape(-1)

tensor([-2.3051, -1.0878, -0.1277,  2.4757, -1.0327])

With einsum it's elegant and simple

In [23]:
torch.einsum("ij, ij->i", values, keys)

tensor([-2.3051, -1.0878, -0.1277,  2.4757, -1.0327])

# Torch Functions

In [32]:
def rand_matrices(*shapes):
    return [torch.randn(shape) for shape in shapes]        

#### Concatenate Tensors

In [40]:
X, Y = rand_matrices((3, 2), (3, 1))
torch.cat([X, Y], dim=1);

#### Matrix Multiplication

In [39]:
X, Y = rand_matrices((3,2), (2, 4))
torch.matmul(X, Y);

#### One Hot Encoding

In [41]:
from torch.nn import functional as F
F.one_hot(
    torch.tensor([0, 2, 3]), num_classes=5)

tensor([[1, 0, 0, 0, 0],
        [0, 0, 1, 0, 0],
        [0, 0, 0, 1, 0]])

#### Looping over an index

In [45]:
X = rand_matrices((5, 3, 2))[0]

In [49]:
len(X)

5

In [50]:
for x in X:
    print(x.shape)

torch.Size([3, 2])
torch.Size([3, 2])
torch.Size([3, 2])
torch.Size([3, 2])
torch.Size([3, 2])
