#### Probabilities
For predicting the next character or word.

In [5]:
import torch

# Define probabilities
probabilities = torch.tensor([0.3, 0.7])
n_samples = 10

# Generate samples
samples = torch.multinomial(probabilities, n_samples, replacement=True)
print(samples)

tensor([1, 0, 0, 1, 0, 1, 1, 1, 0, 0])


In [6]:
# Concatenate tensors 
tensor = torch.tensor([1, 2, 3, 4])
out = torch.cat((tensor, torch.tensor([5])), dim=0)
out

tensor([1, 2, 3, 4, 5])

In [7]:
# Triangle lower matrix
# Important for predicting the next character or word (Future and history)

out = torch.tril(torch.ones(5,5))
out

tensor([[1., 0., 0., 0., 0.],
        [1., 1., 0., 0., 0.],
        [1., 1., 1., 0., 0.],
        [1., 1., 1., 1., 0.],
        [1., 1., 1., 1., 1.]])

In [8]:
# Triangle upper matrix
out = torch.triu(torch.ones(5,5))
out

tensor([[1., 1., 1., 1., 1.],
        [0., 1., 1., 1., 1.],
        [0., 0., 1., 1., 1.],
        [0., 0., 0., 1., 1.],
        [0., 0., 0., 0., 1.]])

In [9]:
# Masking /converts the lower triangle to -inf
out = torch.zeros(5,5).masked_fill(torch.tril(torch.ones(5,5)) == 0, float('-inf'))
out

tensor([[0., -inf, -inf, -inf, -inf],
        [0., 0., -inf, -inf, -inf],
        [0., 0., 0., -inf, -inf],
        [0., 0., 0., 0., -inf],
        [0., 0., 0., 0., 0.]])

In [10]:
torch.exp(out)

tensor([[1., 0., 0., 0., 0.],
        [1., 1., 0., 0., 0.],
        [1., 1., 1., 0., 0.],
        [1., 1., 1., 1., 0.],
        [1., 1., 1., 1., 1.]])

In [11]:
input = torch.zeros(2,3,4)
print(input.shape)
out = input.transpose(0,2)
print(out.shape)


torch.Size([2, 3, 4])
torch.Size([4, 3, 2])


In [12]:
tensor1 = torch.tensor([1,2,3])
tensor2 = torch.tensor([4,5,6])
tensor3 = torch.tensor([7,8,9])

# Stack the tensors along a new dimension
stacked_tensor = torch.stack((tensor1, tensor2, tensor3))
stacked_tensor

tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])

### Linear layer transformation

In [13]:
import torch.nn as nn
sample = torch.tensor([10., 10., 10.])
linear = nn.Linear(3, 3, bias=False)
print(linear(sample))


tensor([ 5.3308, -6.0372, -9.0302], grad_fn=<SqueezeBackward4>)


### softmax()
- Exponentiates the values
- Adds them up into a total sum
- Divides each value by the total sum
- Result is a probability distribution

In [14]:
import torch.nn.functional as F


# Create a tensor
tensor1 = torch.tensor([1.0,2.0, 3.0])

# Apply softmax using torch.nn.functional.softmax()
softmax_output = F.softmax(tensor1, dim=0)

print(softmax_output)


tensor([0.0900, 0.2447, 0.6652])


### Embedding vectors
- Vector of numbers that represent a word or a character
- Used to represent words or characters in a high-dimensional space
- Can be used to find relationships between words or characters


- Embedding matrix is a matrix of size (vocab_size, embedding_dim)

In [15]:
# Init embedding layer
vocab_size = 1000
embedding_dim = 100
embedding = nn.Embedding(vocab_size, embedding_dim)

# Create input indices
input_indices = torch.LongTensor([1, 5, 3, 2])

# Apply the embedding layer
embedding_output = embedding(input_indices)

# The output will be a tensor of shape (4, 100), where 4 is the number of inputs
# and 100 is the embedding dimension
print(embedding_output.shape)


torch.Size([4, 100])


### Matrix multiplication
- Multiplies two matrices




In [16]:
a = torch.tensor([[1,2], [3,4], [5,6]])
b = torch.tensor([[7,8,9], [10,11,12]])

# Matrix multiplication
print(a @ b)
# print(torch.matmul(a, b)) # Same as @

tensor([[ 27,  30,  33],
        [ 61,  68,  75],
        [ 95, 106, 117]])


### INT vs Float
- Cant multiply int with float in pytorch
- expected m1 and m2 to have the same dtype, but got: long long != float

In [None]:
# INT
int_64 = torch.randint(1, (3,2))

# Float
float_32 = torch.rand(2, 3)

print(int_64)

# result = torch.matmul(int_64, float_32)
# print(result)


RuntimeError: expected m1 and m2 to have the same dtype, but got: long long != float