In [1]:
import torch
import numpy as np

In [2]:
data = [[1, 2],[3, 4]]
tensor_data = torch.tensor(data)
tensor_data

tensor([[1, 2],
        [3, 4]])

In [3]:
array = np.array(data)
tensor_from_np = torch.from_numpy(array)
tensor_from_np

tensor([[1, 2],
        [3, 4]])

In [4]:
tensor_one = torch.ones_like(tensor_data)
tensor_rd = torch.rand_like(tensor_data, dtype=torch.float)
tensor_one, tensor_rd

(tensor([[1, 1],
         [1, 1]]),
 tensor([[0.0967, 0.0129],
         [0.7864, 0.5196]]))

In [6]:
shape = (3,4,)
torch.zeros(shape), torch.ones(shape), torch.rand(shape)

(tensor([[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]]),
 tensor([[1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.]]),
 tensor([[0.5047, 0.9350, 0.1998, 0.7971],
         [0.4294, 0.8615, 0.7691, 0.3809],
         [0.4488, 0.2268, 0.9650, 0.0052]]))

In [9]:
torch.cat([tensor_one, tensor_one, tensor_one])

tensor([[1, 1],
        [1, 1],
        [1, 1],
        [1, 1],
        [1, 1],
        [1, 1]])

In [10]:
torch.cat([tensor_one, tensor_one, tensor_one], dim=1)

tensor([[1, 1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1, 1]])

In [17]:
torch.ones(shape) @ torch.ones(shape).T, torch.ones(shape) * torch.ones(shape)

(tensor([[4., 4., 4.],
         [4., 4., 4.],
         [4., 4., 4.]]),
 tensor([[1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.]]))

In [21]:
tensor_data.sum(), tensor_data.sum().item()

(tensor(10), 10)

In [20]:
import torch.nn as nn

# (vocab_size, channels)
embed = nn.Embedding(10, 10)

In [21]:
import torch
idx = torch.randint(0, 10, (4, 8))
# (batch_size, time_series)
idx

tensor([[4, 3, 8, 7, 3, 0, 6, 4],
        [4, 9, 2, 2, 8, 2, 6, 5],
        [2, 2, 8, 8, 8, 3, 3, 6],
        [2, 9, 2, 0, 2, 3, 4, 3]])

In [22]:
# (batch_size, time_series, channels)
logits = embed(idx)
print(logits.shape)
# (batch_size, channels)
logits[:,-1,:]

torch.Size([4, 8, 10])


tensor([[-0.8693, -0.4141, -0.3540,  0.4200,  0.4499,  1.3841,  0.4793,  1.2094,
         -0.2168,  0.9221],
        [ 1.2956,  1.3801, -0.1099, -0.1425, -0.3306,  0.0220,  1.1231,  1.9980,
         -0.3036,  1.0518],
        [ 0.8234, -0.6167, -1.7465, -0.8713,  1.3887,  0.4619,  0.5397, -2.0471,
         -1.8950, -0.0414],
        [-1.7325,  0.8044,  0.4042,  1.9385,  0.4402, -0.3148, -1.3143, -2.0211,
         -1.1408, -1.0346]], grad_fn=<SliceBackward0>)

In [23]:
from torch.nn import functional as F
probs = F.softmax(logits[:,-1,:], dim=-1)
# (batch_size, channels)
probs

tensor([[0.0244, 0.0385, 0.0409, 0.0888, 0.0914, 0.2327, 0.0942, 0.1954, 0.0469,
         0.1466],
        [0.1451, 0.1579, 0.0356, 0.0344, 0.0285, 0.0406, 0.1221, 0.2928, 0.0293,
         0.1137],
        [0.1905, 0.0451, 0.0146, 0.0350, 0.3352, 0.1327, 0.1434, 0.0108, 0.0126,
         0.0802],
        [0.0124, 0.1572, 0.1054, 0.4887, 0.1092, 0.0513, 0.0189, 0.0093, 0.0225,
         0.0250]], grad_fn=<SoftmaxBackward0>)

In [24]:
torch.multinomial(probs, 1)

tensor([[7],
        [5],
        [0],
        [3]])

In [31]:
w = torch.tril(torch.ones(5,5))
w

tensor([[1., 0., 0., 0., 0.],
        [1., 1., 0., 0., 0.],
        [1., 1., 1., 0., 0.],
        [1., 1., 1., 1., 0.],
        [1., 1., 1., 1., 1.]])

In [37]:
w = w / torch.sum(w, 1, keepdim=True)
w

tensor([[1.0000, 0.0000, 0.0000, 0.0000, 0.0000],
        [0.5000, 0.5000, 0.0000, 0.0000, 0.0000],
        [0.3333, 0.3333, 0.3333, 0.0000, 0.0000],
        [0.2500, 0.2500, 0.2500, 0.2500, 0.0000],
        [0.2000, 0.2000, 0.2000, 0.2000, 0.2000]])

In [39]:
v = torch.randint(0,10, (3,5,4)).float()
v

tensor([[[3., 8., 7., 2.],
         [7., 0., 6., 5.],
         [5., 0., 0., 6.],
         [8., 5., 3., 1.],
         [4., 5., 2., 2.]],

        [[3., 7., 0., 0.],
         [1., 3., 6., 8.],
         [7., 5., 5., 7.],
         [7., 5., 1., 4.],
         [4., 3., 9., 4.]],

        [[1., 1., 8., 3.],
         [9., 4., 5., 5.],
         [1., 4., 1., 2.],
         [4., 5., 4., 8.],
         [5., 4., 0., 6.]]])

In [40]:
w @ v

tensor([[[3.0000, 8.0000, 7.0000, 2.0000],
         [5.0000, 4.0000, 6.5000, 3.5000],
         [5.0000, 2.6667, 4.3333, 4.3333],
         [5.7500, 3.2500, 4.0000, 3.5000],
         [5.4000, 3.6000, 3.6000, 3.2000]],

        [[3.0000, 7.0000, 0.0000, 0.0000],
         [2.0000, 5.0000, 3.0000, 4.0000],
         [3.6667, 5.0000, 3.6667, 5.0000],
         [4.5000, 5.0000, 3.0000, 4.7500],
         [4.4000, 4.6000, 4.2000, 4.6000]],

        [[1.0000, 1.0000, 8.0000, 3.0000],
         [5.0000, 2.5000, 6.5000, 4.0000],
         [3.6667, 3.0000, 4.6667, 3.3333],
         [3.7500, 3.5000, 4.5000, 4.5000],
         [4.0000, 3.6000, 3.6000, 4.8000]]])

In [41]:
tril = torch.tril(torch.ones(5,5))
w = torch.zeros((5,5))
w = w.masked_fill(tril == 0, float('-inf'))
w

tensor([[0., -inf, -inf, -inf, -inf],
        [0., 0., -inf, -inf, -inf],
        [0., 0., 0., -inf, -inf],
        [0., 0., 0., 0., -inf],
        [0., 0., 0., 0., 0.]])

In [42]:
w = F.softmax(w, dim=1)
w

tensor([[1.0000, 0.0000, 0.0000, 0.0000, 0.0000],
        [0.5000, 0.5000, 0.0000, 0.0000, 0.0000],
        [0.3333, 0.3333, 0.3333, 0.0000, 0.0000],
        [0.2500, 0.2500, 0.2500, 0.2500, 0.0000],
        [0.2000, 0.2000, 0.2000, 0.2000, 0.2000]])

In [43]:
w @ v

tensor([[[3.0000, 8.0000, 7.0000, 2.0000],
         [5.0000, 4.0000, 6.5000, 3.5000],
         [5.0000, 2.6667, 4.3333, 4.3333],
         [5.7500, 3.2500, 4.0000, 3.5000],
         [5.4000, 3.6000, 3.6000, 3.2000]],

        [[3.0000, 7.0000, 0.0000, 0.0000],
         [2.0000, 5.0000, 3.0000, 4.0000],
         [3.6667, 5.0000, 3.6667, 5.0000],
         [4.5000, 5.0000, 3.0000, 4.7500],
         [4.4000, 4.6000, 4.2000, 4.6000]],

        [[1.0000, 1.0000, 8.0000, 3.0000],
         [5.0000, 2.5000, 6.5000, 4.0000],
         [3.6667, 3.0000, 4.6667, 3.3333],
         [3.7500, 3.5000, 4.5000, 4.5000],
         [4.0000, 3.6000, 3.6000, 4.8000]]])